aoptx86.pas 171 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  33. protected
  34. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  35. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  36. { checks whether reading the value in reg1 depends on the value of reg2. This
  37. is very similar to SuperRegisterEquals, except it takes into account that
  38. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  39. depend on the value in AH). }
  40. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  41. procedure DebugMsg(const s : string; p : tai);inline;
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function DoSubAddOpt(var p : tai) : Boolean;
  46. function PrePeepholeOptSxx(var p : tai) : boolean;
  47. function PrePeepholeOptIMUL(var p : tai) : boolean;
  48. function OptPass1AND(var p : tai) : boolean;
  49. function OptPass1VMOVAP(var p : tai) : boolean;
  50. function OptPass1VOP(var p : tai) : boolean;
  51. function OptPass1MOV(var p : tai) : boolean;
  52. function OptPass1Movx(var p : tai) : boolean;
  53. function OptPass1MOVAP(var p : tai) : boolean;
  54. function OptPass1MOVXX(var p : tai) : boolean;
  55. function OptPass1OP(var p : tai) : boolean;
  56. function OptPass1LEA(var p : tai) : boolean;
  57. function OptPass1Sub(var p : tai) : boolean;
  58. function OptPass1SHLSAL(var p : tai) : boolean;
  59. function OptPass1SETcc(var p: tai): boolean;
  60. function OptPass1FSTP(var p: tai): boolean;
  61. function OptPass1FLD(var p: tai): boolean;
  62. function OptPass2MOV(var p : tai) : boolean;
  63. function OptPass2Imul(var p : tai) : boolean;
  64. function OptPass2Jmp(var p : tai) : boolean;
  65. function OptPass2Jcc(var p : tai) : boolean;
  66. function PostPeepholeOptMov(var p : tai) : Boolean;
  67. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  68. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  69. function PostPeepholeOptXor(var p : tai) : Boolean;
  70. {$endif}
  71. function PostPeepholeOptCmp(var p : tai) : Boolean;
  72. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  73. function PostPeepholeOptCall(var p : tai) : Boolean;
  74. function PostPeepholeOptLea(var p : tai) : Boolean;
  75. procedure OptReferences;
  76. end;
  77. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  80. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  81. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  82. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  83. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  84. function RefsEqual(const r1, r2: treference): boolean;
  85. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  86. { returns true, if ref is a reference using only the registers passed as base and index
  87. and having an offset }
  88. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  89. {$ifdef DEBUG_AOPTCPU}
  90. const
  91. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  92. {$else DEBUG_AOPTCPU}
  93. { Empty strings help the optimizer to remove string concatenations that won't
  94. ever appear to the user on release builds. [Kit] }
  95. const
  96. SPeepholeOptimization = '';
  97. {$endif DEBUG_AOPTCPU}
  98. implementation
  99. uses
  100. cutils,verbose,
  101. globals,
  102. cpuinfo,
  103. procinfo,
  104. aasmbase,
  105. aoptutils,
  106. symconst,symsym,
  107. cgx86,
  108. itcpugas;
  109. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  110. begin
  111. result :=
  112. (instr.typ = ait_instruction) and
  113. (taicpu(instr).opcode = op) and
  114. ((opsize = []) or (taicpu(instr).opsize in opsize));
  115. end;
  116. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  117. begin
  118. result :=
  119. (instr.typ = ait_instruction) and
  120. ((taicpu(instr).opcode = op1) or
  121. (taicpu(instr).opcode = op2)
  122. ) and
  123. ((opsize = []) or (taicpu(instr).opsize in opsize));
  124. end;
  125. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  126. begin
  127. result :=
  128. (instr.typ = ait_instruction) and
  129. ((taicpu(instr).opcode = op1) or
  130. (taicpu(instr).opcode = op2) or
  131. (taicpu(instr).opcode = op3)
  132. ) and
  133. ((opsize = []) or (taicpu(instr).opsize in opsize));
  134. end;
  135. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  136. const opsize : topsizes) : boolean;
  137. var
  138. op : TAsmOp;
  139. begin
  140. result:=false;
  141. for op in ops do
  142. begin
  143. if (instr.typ = ait_instruction) and
  144. (taicpu(instr).opcode = op) and
  145. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  146. begin
  147. result:=true;
  148. exit;
  149. end;
  150. end;
  151. end;
  152. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  153. begin
  154. result := (oper.typ = top_reg) and (oper.reg = reg);
  155. end;
  156. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  157. begin
  158. result := (oper.typ = top_const) and (oper.val = a);
  159. end;
  160. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  161. begin
  162. result := oper1.typ = oper2.typ;
  163. if result then
  164. case oper1.typ of
  165. top_const:
  166. Result:=oper1.val = oper2.val;
  167. top_reg:
  168. Result:=oper1.reg = oper2.reg;
  169. top_ref:
  170. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  171. else
  172. internalerror(2013102801);
  173. end
  174. end;
  175. function RefsEqual(const r1, r2: treference): boolean;
  176. begin
  177. RefsEqual :=
  178. (r1.offset = r2.offset) and
  179. (r1.segment = r2.segment) and (r1.base = r2.base) and
  180. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  181. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  182. (r1.relsymbol = r2.relsymbol) and
  183. (r1.volatility=[]) and
  184. (r2.volatility=[]);
  185. end;
  186. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  187. begin
  188. Result:=(ref.offset=0) and
  189. (ref.scalefactor in [0,1]) and
  190. (ref.segment=NR_NO) and
  191. (ref.symbol=nil) and
  192. (ref.relsymbol=nil) and
  193. ((base=NR_INVALID) or
  194. (ref.base=base)) and
  195. ((index=NR_INVALID) or
  196. (ref.index=index)) and
  197. (ref.volatility=[]);
  198. end;
  199. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  200. begin
  201. Result:=(ref.scalefactor in [0,1]) and
  202. (ref.segment=NR_NO) and
  203. (ref.symbol=nil) and
  204. (ref.relsymbol=nil) and
  205. ((base=NR_INVALID) or
  206. (ref.base=base)) and
  207. ((index=NR_INVALID) or
  208. (ref.index=index)) and
  209. (ref.volatility=[]);
  210. end;
  211. function InstrReadsFlags(p: tai): boolean;
  212. begin
  213. InstrReadsFlags := true;
  214. case p.typ of
  215. ait_instruction:
  216. if InsProp[taicpu(p).opcode].Ch*
  217. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  218. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  219. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  220. exit;
  221. ait_label:
  222. exit;
  223. else
  224. ;
  225. end;
  226. InstrReadsFlags := false;
  227. end;
  228. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  229. begin
  230. Next:=Current;
  231. repeat
  232. Result:=GetNextInstruction(Next,Next);
  233. until not (Result) or
  234. not(cs_opt_level3 in current_settings.optimizerswitches) or
  235. (Next.typ<>ait_instruction) or
  236. RegInInstruction(reg,Next) or
  237. is_calljmp(taicpu(Next).opcode);
  238. end;
  239. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  240. begin
  241. Result:=RegReadByInstruction(reg,hp);
  242. end;
  243. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  244. var
  245. p: taicpu;
  246. opcount: longint;
  247. begin
  248. RegReadByInstruction := false;
  249. if hp.typ <> ait_instruction then
  250. exit;
  251. p := taicpu(hp);
  252. case p.opcode of
  253. A_CALL:
  254. regreadbyinstruction := true;
  255. A_IMUL:
  256. case p.ops of
  257. 1:
  258. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  259. (
  260. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  261. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  262. );
  263. 2,3:
  264. regReadByInstruction :=
  265. reginop(reg,p.oper[0]^) or
  266. reginop(reg,p.oper[1]^);
  267. end;
  268. A_MUL:
  269. begin
  270. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  271. (
  272. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  273. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  274. );
  275. end;
  276. A_IDIV,A_DIV:
  277. begin
  278. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  279. (
  280. (getregtype(reg)=R_INTREGISTER) and
  281. (
  282. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  283. )
  284. );
  285. end;
  286. else
  287. begin
  288. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  289. begin
  290. RegReadByInstruction := false;
  291. exit;
  292. end;
  293. for opcount := 0 to p.ops-1 do
  294. if (p.oper[opCount]^.typ = top_ref) and
  295. RegInRef(reg,p.oper[opcount]^.ref^) then
  296. begin
  297. RegReadByInstruction := true;
  298. exit
  299. end;
  300. { special handling for SSE MOVSD }
  301. if (p.opcode=A_MOVSD) and (p.ops>0) then
  302. begin
  303. if p.ops<>2 then
  304. internalerror(2017042702);
  305. regReadByInstruction := reginop(reg,p.oper[0]^) or
  306. (
  307. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  308. );
  309. exit;
  310. end;
  311. with insprop[p.opcode] do
  312. begin
  313. if getregtype(reg)=R_INTREGISTER then
  314. begin
  315. case getsupreg(reg) of
  316. RS_EAX:
  317. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  318. begin
  319. RegReadByInstruction := true;
  320. exit
  321. end;
  322. RS_ECX:
  323. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  324. begin
  325. RegReadByInstruction := true;
  326. exit
  327. end;
  328. RS_EDX:
  329. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  330. begin
  331. RegReadByInstruction := true;
  332. exit
  333. end;
  334. RS_EBX:
  335. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  336. begin
  337. RegReadByInstruction := true;
  338. exit
  339. end;
  340. RS_ESP:
  341. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  342. begin
  343. RegReadByInstruction := true;
  344. exit
  345. end;
  346. RS_EBP:
  347. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  348. begin
  349. RegReadByInstruction := true;
  350. exit
  351. end;
  352. RS_ESI:
  353. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  354. begin
  355. RegReadByInstruction := true;
  356. exit
  357. end;
  358. RS_EDI:
  359. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  360. begin
  361. RegReadByInstruction := true;
  362. exit
  363. end;
  364. end;
  365. end;
  366. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  367. begin
  368. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  369. begin
  370. case p.condition of
  371. C_A,C_NBE, { CF=0 and ZF=0 }
  372. C_BE,C_NA: { CF=1 or ZF=1 }
  373. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  374. C_AE,C_NB,C_NC, { CF=0 }
  375. C_B,C_NAE,C_C: { CF=1 }
  376. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  377. C_NE,C_NZ, { ZF=0 }
  378. C_E,C_Z: { ZF=1 }
  379. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  380. C_G,C_NLE, { ZF=0 and SF=OF }
  381. C_LE,C_NG: { ZF=1 or SF<>OF }
  382. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  383. C_GE,C_NL, { SF=OF }
  384. C_L,C_NGE: { SF<>OF }
  385. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  386. C_NO, { OF=0 }
  387. C_O: { OF=1 }
  388. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  389. C_NP,C_PO, { PF=0 }
  390. C_P,C_PE: { PF=1 }
  391. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  392. C_NS, { SF=0 }
  393. C_S: { SF=1 }
  394. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  395. else
  396. internalerror(2017042701);
  397. end;
  398. if RegReadByInstruction then
  399. exit;
  400. end;
  401. case getsubreg(reg) of
  402. R_SUBW,R_SUBD,R_SUBQ:
  403. RegReadByInstruction :=
  404. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  405. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  406. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  407. R_SUBFLAGCARRY:
  408. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  409. R_SUBFLAGPARITY:
  410. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  411. R_SUBFLAGAUXILIARY:
  412. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  413. R_SUBFLAGZERO:
  414. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  415. R_SUBFLAGSIGN:
  416. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  417. R_SUBFLAGOVERFLOW:
  418. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  419. R_SUBFLAGINTERRUPT:
  420. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  421. R_SUBFLAGDIRECTION:
  422. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  423. else
  424. internalerror(2017042601);
  425. end;
  426. exit;
  427. end;
  428. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  429. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  430. (p.oper[0]^.reg=p.oper[1]^.reg) then
  431. exit;
  432. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  433. begin
  434. RegReadByInstruction := true;
  435. exit
  436. end;
  437. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  438. begin
  439. RegReadByInstruction := true;
  440. exit
  441. end;
  442. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  443. begin
  444. RegReadByInstruction := true;
  445. exit
  446. end;
  447. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  448. begin
  449. RegReadByInstruction := true;
  450. exit
  451. end;
  452. end;
  453. end;
  454. end;
  455. end;
  456. {$ifdef DEBUG_AOPTCPU}
  457. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  458. begin
  459. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  460. end;
  461. function debug_tostr(i: tcgint): string; inline;
  462. begin
  463. Result := tostr(i);
  464. end;
  465. function debug_regname(r: TRegister): string; inline;
  466. begin
  467. Result := '%' + std_regname(r);
  468. end;
  469. { Debug output function - creates a string representation of an operator }
  470. function debug_operstr(oper: TOper): string;
  471. begin
  472. case oper.typ of
  473. top_const:
  474. Result := '$' + debug_tostr(oper.val);
  475. top_reg:
  476. Result := debug_regname(oper.reg);
  477. top_ref:
  478. begin
  479. if oper.ref^.offset <> 0 then
  480. Result := debug_tostr(oper.ref^.offset) + '('
  481. else
  482. Result := '(';
  483. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  484. begin
  485. Result := Result + debug_regname(oper.ref^.base);
  486. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  487. Result := Result + ',' + debug_regname(oper.ref^.index);
  488. end
  489. else
  490. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  491. Result := Result + debug_regname(oper.ref^.index);
  492. if (oper.ref^.scalefactor > 1) then
  493. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  494. else
  495. Result := Result + ')';
  496. end;
  497. else
  498. Result := '[UNKNOWN]';
  499. end;
  500. end;
  501. function debug_op2str(opcode: tasmop): string; inline;
  502. begin
  503. Result := std_op2str[opcode];
  504. end;
  505. function debug_opsize2str(opsize: topsize): string; inline;
  506. begin
  507. Result := gas_opsize2str[opsize];
  508. end;
  509. {$else DEBUG_AOPTCPU}
  510. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  511. begin
  512. end;
  513. function debug_tostr(i: tcgint): string; inline;
  514. begin
  515. Result := '';
  516. end;
  517. function debug_regname(r: TRegister): string; inline;
  518. begin
  519. Result := '';
  520. end;
  521. function debug_operstr(oper: TOper): string; inline;
  522. begin
  523. Result := '';
  524. end;
  525. function debug_op2str(opcode: tasmop): string; inline;
  526. begin
  527. Result := '';
  528. end;
  529. function debug_opsize2str(opsize: topsize): string; inline;
  530. begin
  531. Result := '';
  532. end;
  533. {$endif DEBUG_AOPTCPU}
  534. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  535. begin
  536. if not SuperRegistersEqual(reg1,reg2) then
  537. exit(false);
  538. if getregtype(reg1)<>R_INTREGISTER then
  539. exit(true); {because SuperRegisterEqual is true}
  540. case getsubreg(reg1) of
  541. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  542. higher, it preserves the high bits, so the new value depends on
  543. reg2's previous value. In other words, it is equivalent to doing:
  544. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  545. R_SUBL:
  546. exit(getsubreg(reg2)=R_SUBL);
  547. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  548. higher, it actually does a:
  549. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  550. R_SUBH:
  551. exit(getsubreg(reg2)=R_SUBH);
  552. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  553. bits of reg2:
  554. reg2 := (reg2 and $ffff0000) or word(reg1); }
  555. R_SUBW:
  556. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  557. { a write to R_SUBD always overwrites every other subregister,
  558. because it clears the high 32 bits of R_SUBQ on x86_64 }
  559. R_SUBD,
  560. R_SUBQ:
  561. exit(true);
  562. else
  563. internalerror(2017042801);
  564. end;
  565. end;
  566. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  567. begin
  568. if not SuperRegistersEqual(reg1,reg2) then
  569. exit(false);
  570. if getregtype(reg1)<>R_INTREGISTER then
  571. exit(true); {because SuperRegisterEqual is true}
  572. case getsubreg(reg1) of
  573. R_SUBL:
  574. exit(getsubreg(reg2)<>R_SUBH);
  575. R_SUBH:
  576. exit(getsubreg(reg2)<>R_SUBL);
  577. R_SUBW,
  578. R_SUBD,
  579. R_SUBQ:
  580. exit(true);
  581. else
  582. internalerror(2017042802);
  583. end;
  584. end;
  585. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  586. var
  587. hp1 : tai;
  588. l : TCGInt;
  589. begin
  590. result:=false;
  591. { changes the code sequence
  592. shr/sar const1, x
  593. shl const2, x
  594. to
  595. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  596. if GetNextInstruction(p, hp1) and
  597. MatchInstruction(hp1,A_SHL,[]) and
  598. (taicpu(p).oper[0]^.typ = top_const) and
  599. (taicpu(hp1).oper[0]^.typ = top_const) and
  600. (taicpu(hp1).opsize = taicpu(p).opsize) and
  601. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  602. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  603. begin
  604. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  605. not(cs_opt_size in current_settings.optimizerswitches) then
  606. begin
  607. { shr/sar const1, %reg
  608. shl const2, %reg
  609. with const1 > const2 }
  610. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  611. taicpu(hp1).opcode := A_AND;
  612. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  613. case taicpu(p).opsize Of
  614. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  615. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  616. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  617. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  618. else
  619. Internalerror(2017050703)
  620. end;
  621. end
  622. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  623. not(cs_opt_size in current_settings.optimizerswitches) then
  624. begin
  625. { shr/sar const1, %reg
  626. shl const2, %reg
  627. with const1 < const2 }
  628. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  629. taicpu(p).opcode := A_AND;
  630. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  631. case taicpu(p).opsize Of
  632. S_B: taicpu(p).loadConst(0,l Xor $ff);
  633. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  634. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  635. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  636. else
  637. Internalerror(2017050702)
  638. end;
  639. end
  640. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  641. begin
  642. { shr/sar const1, %reg
  643. shl const2, %reg
  644. with const1 = const2 }
  645. taicpu(p).opcode := A_AND;
  646. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  647. case taicpu(p).opsize Of
  648. S_B: taicpu(p).loadConst(0,l Xor $ff);
  649. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  650. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  651. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  652. else
  653. Internalerror(2017050701)
  654. end;
  655. asml.remove(hp1);
  656. hp1.free;
  657. end;
  658. end;
  659. end;
  660. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  661. var
  662. opsize : topsize;
  663. hp1 : tai;
  664. tmpref : treference;
  665. ShiftValue : Cardinal;
  666. BaseValue : TCGInt;
  667. begin
  668. result:=false;
  669. opsize:=taicpu(p).opsize;
  670. { changes certain "imul const, %reg"'s to lea sequences }
  671. if (MatchOpType(taicpu(p),top_const,top_reg) or
  672. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  673. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  674. if (taicpu(p).oper[0]^.val = 1) then
  675. if (taicpu(p).ops = 2) then
  676. { remove "imul $1, reg" }
  677. begin
  678. hp1 := tai(p.Next);
  679. asml.remove(p);
  680. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  681. p.free;
  682. p := hp1;
  683. result:=true;
  684. end
  685. else
  686. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  687. begin
  688. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  689. InsertLLItem(p.previous, p.next, hp1);
  690. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  691. p.free;
  692. p := hp1;
  693. end
  694. else if
  695. ((taicpu(p).ops <= 2) or
  696. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  697. not(cs_opt_size in current_settings.optimizerswitches) and
  698. (not(GetNextInstruction(p, hp1)) or
  699. not((tai(hp1).typ = ait_instruction) and
  700. ((taicpu(hp1).opcode=A_Jcc) and
  701. (taicpu(hp1).condition in [C_O,C_NO])))) then
  702. begin
  703. {
  704. imul X, reg1, reg2 to
  705. lea (reg1,reg1,Y), reg2
  706. shl ZZ,reg2
  707. imul XX, reg1 to
  708. lea (reg1,reg1,YY), reg1
  709. shl ZZ,reg2
  710. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  711. it does not exist as a separate optimization target in FPC though.
  712. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  713. at most two zeros
  714. }
  715. reference_reset(tmpref,1,[]);
  716. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  717. begin
  718. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  719. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  720. TmpRef.base := taicpu(p).oper[1]^.reg;
  721. TmpRef.index := taicpu(p).oper[1]^.reg;
  722. if not(BaseValue in [3,5,9]) then
  723. Internalerror(2018110101);
  724. TmpRef.ScaleFactor := BaseValue-1;
  725. if (taicpu(p).ops = 2) then
  726. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  727. else
  728. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  729. AsmL.InsertAfter(hp1,p);
  730. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  731. AsmL.Remove(p);
  732. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  733. p.free;
  734. p := hp1;
  735. if ShiftValue>0 then
  736. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  737. end;
  738. end;
  739. end;
  740. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  741. var
  742. p: taicpu;
  743. begin
  744. if not assigned(hp) or
  745. (hp.typ <> ait_instruction) then
  746. begin
  747. Result := false;
  748. exit;
  749. end;
  750. p := taicpu(hp);
  751. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  752. with insprop[p.opcode] do
  753. begin
  754. case getsubreg(reg) of
  755. R_SUBW,R_SUBD,R_SUBQ:
  756. Result:=
  757. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  758. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  759. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  760. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  761. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  762. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  763. R_SUBFLAGCARRY:
  764. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  765. R_SUBFLAGPARITY:
  766. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  767. R_SUBFLAGAUXILIARY:
  768. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  769. R_SUBFLAGZERO:
  770. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  771. R_SUBFLAGSIGN:
  772. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  773. R_SUBFLAGOVERFLOW:
  774. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  775. R_SUBFLAGINTERRUPT:
  776. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  777. R_SUBFLAGDIRECTION:
  778. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  779. else
  780. begin
  781. writeln(getsubreg(reg));
  782. internalerror(2017050501);
  783. end;
  784. end;
  785. exit;
  786. end;
  787. Result :=
  788. (((p.opcode = A_MOV) or
  789. (p.opcode = A_MOVZX) or
  790. (p.opcode = A_MOVSX) or
  791. (p.opcode = A_LEA) or
  792. (p.opcode = A_VMOVSS) or
  793. (p.opcode = A_VMOVSD) or
  794. (p.opcode = A_VMOVAPD) or
  795. (p.opcode = A_VMOVAPS) or
  796. (p.opcode = A_VMOVQ) or
  797. (p.opcode = A_MOVSS) or
  798. (p.opcode = A_MOVSD) or
  799. (p.opcode = A_MOVQ) or
  800. (p.opcode = A_MOVAPD) or
  801. (p.opcode = A_MOVAPS) or
  802. {$ifndef x86_64}
  803. (p.opcode = A_LDS) or
  804. (p.opcode = A_LES) or
  805. {$endif not x86_64}
  806. (p.opcode = A_LFS) or
  807. (p.opcode = A_LGS) or
  808. (p.opcode = A_LSS)) and
  809. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  810. (p.oper[1]^.typ = top_reg) and
  811. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  812. ((p.oper[0]^.typ = top_const) or
  813. ((p.oper[0]^.typ = top_reg) and
  814. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  815. ((p.oper[0]^.typ = top_ref) and
  816. not RegInRef(reg,p.oper[0]^.ref^)))) or
  817. ((p.opcode = A_POP) and
  818. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  819. ((p.opcode = A_IMUL) and
  820. (p.ops=3) and
  821. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  822. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  823. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  824. ((((p.opcode = A_IMUL) or
  825. (p.opcode = A_MUL)) and
  826. (p.ops=1)) and
  827. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  828. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  829. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  830. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  831. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  832. {$ifdef x86_64}
  833. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  834. {$endif x86_64}
  835. )) or
  836. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  837. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  838. {$ifdef x86_64}
  839. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  840. {$endif x86_64}
  841. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  842. {$ifndef x86_64}
  843. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  844. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  845. {$endif not x86_64}
  846. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  847. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  848. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  849. {$ifndef x86_64}
  850. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  851. {$endif not x86_64}
  852. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  853. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  854. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  855. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  856. {$ifdef x86_64}
  857. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  858. {$endif x86_64}
  859. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  860. (((p.opcode = A_FSTSW) or
  861. (p.opcode = A_FNSTSW)) and
  862. (p.oper[0]^.typ=top_reg) and
  863. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  864. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  865. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  866. (p.oper[0]^.reg=p.oper[1]^.reg) and
  867. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  868. end;
  869. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  870. var
  871. hp2,hp3 : tai;
  872. begin
  873. { some x86-64 issue a NOP before the real exit code }
  874. if MatchInstruction(p,A_NOP,[]) then
  875. GetNextInstruction(p,p);
  876. result:=assigned(p) and (p.typ=ait_instruction) and
  877. ((taicpu(p).opcode = A_RET) or
  878. ((taicpu(p).opcode=A_LEAVE) and
  879. GetNextInstruction(p,hp2) and
  880. MatchInstruction(hp2,A_RET,[S_NO])
  881. ) or
  882. (((taicpu(p).opcode=A_LEA) and
  883. MatchOpType(taicpu(p),top_ref,top_reg) and
  884. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  885. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  886. ) and
  887. GetNextInstruction(p,hp2) and
  888. MatchInstruction(hp2,A_RET,[S_NO])
  889. ) or
  890. ((((taicpu(p).opcode=A_MOV) and
  891. MatchOpType(taicpu(p),top_reg,top_reg) and
  892. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  893. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  894. ((taicpu(p).opcode=A_LEA) and
  895. MatchOpType(taicpu(p),top_ref,top_reg) and
  896. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  897. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  898. )
  899. ) and
  900. GetNextInstruction(p,hp2) and
  901. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  902. MatchOpType(taicpu(hp2),top_reg) and
  903. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  904. GetNextInstruction(hp2,hp3) and
  905. MatchInstruction(hp3,A_RET,[S_NO])
  906. )
  907. );
  908. end;
  909. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  910. begin
  911. isFoldableArithOp := False;
  912. case hp1.opcode of
  913. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  914. isFoldableArithOp :=
  915. ((taicpu(hp1).oper[0]^.typ = top_const) or
  916. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  917. (taicpu(hp1).oper[0]^.reg <> reg))) and
  918. (taicpu(hp1).oper[1]^.typ = top_reg) and
  919. (taicpu(hp1).oper[1]^.reg = reg);
  920. A_INC,A_DEC,A_NEG,A_NOT:
  921. isFoldableArithOp :=
  922. (taicpu(hp1).oper[0]^.typ = top_reg) and
  923. (taicpu(hp1).oper[0]^.reg = reg);
  924. else
  925. ;
  926. end;
  927. end;
  928. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  929. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  930. var
  931. hp2: tai;
  932. begin
  933. hp2 := p;
  934. repeat
  935. hp2 := tai(hp2.previous);
  936. if assigned(hp2) and
  937. (hp2.typ = ait_regalloc) and
  938. (tai_regalloc(hp2).ratype=ra_dealloc) and
  939. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  940. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  941. begin
  942. asml.remove(hp2);
  943. hp2.free;
  944. break;
  945. end;
  946. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  947. end;
  948. begin
  949. case current_procinfo.procdef.returndef.typ of
  950. arraydef,recorddef,pointerdef,
  951. stringdef,enumdef,procdef,objectdef,errordef,
  952. filedef,setdef,procvardef,
  953. classrefdef,forwarddef:
  954. DoRemoveLastDeallocForFuncRes(RS_EAX);
  955. orddef:
  956. if current_procinfo.procdef.returndef.size <> 0 then
  957. begin
  958. DoRemoveLastDeallocForFuncRes(RS_EAX);
  959. { for int64/qword }
  960. if current_procinfo.procdef.returndef.size = 8 then
  961. DoRemoveLastDeallocForFuncRes(RS_EDX);
  962. end;
  963. else
  964. ;
  965. end;
  966. end;
  967. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  968. var
  969. hp1,hp2 : tai;
  970. begin
  971. result:=false;
  972. if MatchOpType(taicpu(p),top_reg,top_reg) and
  973. GetNextInstruction(p, hp1) and
  974. (hp1.typ = ait_instruction) and
  975. GetNextInstruction(hp1, hp2) and
  976. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  977. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  978. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  979. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  980. (((taicpu(p).opcode=A_MOVAPS) and
  981. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  982. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  983. ((taicpu(p).opcode=A_MOVAPD) and
  984. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  985. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  986. ) then
  987. { change
  988. movapX reg,reg2
  989. addsX/subsX/... reg3, reg2
  990. movapX reg2,reg
  991. to
  992. addsX/subsX/... reg3,reg
  993. }
  994. begin
  995. TransferUsedRegs(TmpUsedRegs);
  996. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  997. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  998. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  999. begin
  1000. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1001. debug_op2str(taicpu(p).opcode)+' '+
  1002. debug_op2str(taicpu(hp1).opcode)+' '+
  1003. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1004. { we cannot eliminate the first move if
  1005. the operations uses the same register for source and dest }
  1006. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1007. begin
  1008. asml.remove(p);
  1009. p.Free;
  1010. end;
  1011. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1012. asml.remove(hp2);
  1013. hp2.Free;
  1014. p:=hp1;
  1015. result:=true;
  1016. end;
  1017. end
  1018. end;
  1019. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  1020. var
  1021. hp1,hp2 : tai;
  1022. begin
  1023. result:=false;
  1024. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1025. begin
  1026. { vmova* reg1,reg1
  1027. =>
  1028. <nop> }
  1029. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1030. begin
  1031. GetNextInstruction(p,hp1);
  1032. asml.Remove(p);
  1033. p.Free;
  1034. p:=hp1;
  1035. result:=true;
  1036. end
  1037. else if GetNextInstruction(p,hp1) then
  1038. begin
  1039. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1040. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1041. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1042. begin
  1043. { vmova* reg1,reg2
  1044. vmova* reg2,reg3
  1045. dealloc reg2
  1046. =>
  1047. vmova* reg1,reg3 }
  1048. TransferUsedRegs(TmpUsedRegs);
  1049. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1050. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1051. begin
  1052. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1053. asml.Remove(hp1);
  1054. hp1.Free;
  1055. result:=true;
  1056. end
  1057. { special case:
  1058. vmova* reg1,reg2
  1059. vmova* reg2,reg1
  1060. =>
  1061. vmova* reg1,reg2 }
  1062. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  1063. begin
  1064. asml.Remove(hp1);
  1065. hp1.Free;
  1066. result:=true;
  1067. end
  1068. end
  1069. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  1070. { we mix single and double opperations here because we assume that the compiler
  1071. generates vmovapd only after double operations and vmovaps only after single operations }
  1072. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1073. GetNextInstruction(hp1,hp2) and
  1074. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1075. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1076. begin
  1077. TransferUsedRegs(TmpUsedRegs);
  1078. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1079. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1080. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  1081. then
  1082. begin
  1083. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1084. asml.Remove(p);
  1085. p.Free;
  1086. asml.Remove(hp2);
  1087. hp2.Free;
  1088. p:=hp1;
  1089. end;
  1090. end;
  1091. end;
  1092. end;
  1093. end;
  1094. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1095. var
  1096. hp1 : tai;
  1097. begin
  1098. result:=false;
  1099. { replace
  1100. V<Op>X %mreg1,%mreg2,%mreg3
  1101. VMovX %mreg3,%mreg4
  1102. dealloc %mreg3
  1103. by
  1104. V<Op>X %mreg1,%mreg2,%mreg4
  1105. ?
  1106. }
  1107. if GetNextInstruction(p,hp1) and
  1108. { we mix single and double operations here because we assume that the compiler
  1109. generates vmovapd only after double operations and vmovaps only after single operations }
  1110. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1111. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1112. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1113. begin
  1114. TransferUsedRegs(TmpUsedRegs);
  1115. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1116. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1117. ) then
  1118. begin
  1119. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1120. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1121. asml.Remove(hp1);
  1122. hp1.Free;
  1123. result:=true;
  1124. end;
  1125. end;
  1126. end;
  1127. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1128. var
  1129. hp1, hp2: tai;
  1130. GetNextInstruction_p: Boolean;
  1131. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  1132. NewSize: topsize;
  1133. begin
  1134. Result:=false;
  1135. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1136. { remove mov reg1,reg1? }
  1137. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  1138. then
  1139. begin
  1140. DebugMsg(SPeepholeOptimization + 'Mov2Nop done',p);
  1141. { take care of the register (de)allocs following p }
  1142. UpdateUsedRegs(tai(p.next));
  1143. asml.remove(p);
  1144. p.free;
  1145. p:=hp1;
  1146. Result:=true;
  1147. exit;
  1148. end;
  1149. if GetNextInstruction_p and
  1150. MatchInstruction(hp1,A_AND,[]) and
  1151. (taicpu(p).oper[1]^.typ = top_reg) and
  1152. MatchOpType(taicpu(hp1),top_const,top_reg) then
  1153. begin
  1154. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1155. begin
  1156. case taicpu(p).opsize of
  1157. S_L:
  1158. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1159. begin
  1160. { Optimize out:
  1161. mov x, %reg
  1162. and ffffffffh, %reg
  1163. }
  1164. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1165. asml.remove(hp1);
  1166. hp1.free;
  1167. Result:=true;
  1168. exit;
  1169. end;
  1170. S_Q: { TODO: Confirm if this is even possible }
  1171. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1172. begin
  1173. { Optimize out:
  1174. mov x, %reg
  1175. and ffffffffffffffffh, %reg
  1176. }
  1177. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1178. asml.remove(hp1);
  1179. hp1.free;
  1180. Result:=true;
  1181. exit;
  1182. end;
  1183. else
  1184. ;
  1185. end;
  1186. end
  1187. else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1188. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1189. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1190. then
  1191. begin
  1192. InputVal := debug_operstr(taicpu(p).oper[0]^);
  1193. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  1194. case taicpu(p).opsize of
  1195. S_B:
  1196. if (taicpu(hp1).oper[0]^.val = $ff) then
  1197. begin
  1198. { Convert:
  1199. movb x, %regl movb x, %regl
  1200. andw ffh, %regw andl ffh, %regd
  1201. To:
  1202. movzbw x, %regd movzbl x, %regd
  1203. (Identical registers, just different sizes)
  1204. }
  1205. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1206. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1207. case taicpu(hp1).opsize of
  1208. S_W: NewSize := S_BW;
  1209. S_L: NewSize := S_BL;
  1210. {$ifdef x86_64}
  1211. S_Q: NewSize := S_BQ;
  1212. {$endif x86_64}
  1213. else
  1214. InternalError(2018011510);
  1215. end;
  1216. end
  1217. else
  1218. NewSize := S_NO;
  1219. S_W:
  1220. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1221. begin
  1222. { Convert:
  1223. movw x, %regw
  1224. andl ffffh, %regd
  1225. To:
  1226. movzwl x, %regd
  1227. (Identical registers, just different sizes)
  1228. }
  1229. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  1230. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  1231. case taicpu(hp1).opsize of
  1232. S_L: NewSize := S_WL;
  1233. {$ifdef x86_64}
  1234. S_Q: NewSize := S_WQ;
  1235. {$endif x86_64}
  1236. else
  1237. InternalError(2018011511);
  1238. end;
  1239. end
  1240. else
  1241. NewSize := S_NO;
  1242. else
  1243. NewSize := S_NO;
  1244. end;
  1245. if NewSize <> S_NO then
  1246. begin
  1247. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  1248. { The actual optimization }
  1249. taicpu(p).opcode := A_MOVZX;
  1250. taicpu(p).changeopsize(NewSize);
  1251. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  1252. { Safeguard if "and" is followed by a conditional command }
  1253. TransferUsedRegs(TmpUsedRegs);
  1254. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  1255. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  1256. begin
  1257. { At this point, the "and" command is effectively equivalent to
  1258. "test %reg,%reg". This will be handled separately by the
  1259. Peephole Optimizer. [Kit] }
  1260. DebugMsg(SPeepholeOptimization + PreMessage +
  1261. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1262. end
  1263. else
  1264. begin
  1265. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  1266. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1267. asml.Remove(hp1);
  1268. hp1.Free;
  1269. end;
  1270. Result := True;
  1271. Exit;
  1272. end;
  1273. end;
  1274. end
  1275. else if GetNextInstruction_p and
  1276. MatchInstruction(hp1,A_MOV,[]) and
  1277. (taicpu(p).oper[1]^.typ = top_reg) and
  1278. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1279. begin
  1280. TransferUsedRegs(TmpUsedRegs);
  1281. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1282. { we have
  1283. mov x, %treg
  1284. mov %treg, y
  1285. }
  1286. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1287. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1288. { we've got
  1289. mov x, %treg
  1290. mov %treg, y
  1291. with %treg is not used after }
  1292. case taicpu(p).oper[0]^.typ Of
  1293. top_reg:
  1294. begin
  1295. { change
  1296. mov %reg, %treg
  1297. mov %treg, y
  1298. to
  1299. mov %reg, y
  1300. }
  1301. if taicpu(hp1).oper[1]^.typ=top_reg then
  1302. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1303. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1304. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
  1305. asml.remove(hp1);
  1306. hp1.free;
  1307. Result:=true;
  1308. Exit;
  1309. end;
  1310. top_const:
  1311. begin
  1312. { change
  1313. mov const, %treg
  1314. mov %treg, y
  1315. to
  1316. mov const, y
  1317. }
  1318. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  1319. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1320. begin
  1321. if taicpu(hp1).oper[1]^.typ=top_reg then
  1322. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1323. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1324. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  1325. asml.remove(hp1);
  1326. hp1.free;
  1327. Result:=true;
  1328. Exit;
  1329. end;
  1330. end;
  1331. top_ref:
  1332. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1333. begin
  1334. { change
  1335. mov mem, %treg
  1336. mov %treg, %reg
  1337. to
  1338. mov mem, %reg"
  1339. }
  1340. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1341. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  1342. asml.remove(hp1);
  1343. hp1.free;
  1344. Result:=true;
  1345. Exit;
  1346. end;
  1347. else
  1348. ;
  1349. end;
  1350. end
  1351. else
  1352. { Change
  1353. mov %reg1, %reg2
  1354. xxx %reg2, ???
  1355. to
  1356. mov %reg1, %reg2
  1357. xxx %reg1, ???
  1358. to avoid a write/read penalty
  1359. }
  1360. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1361. GetNextInstruction(p,hp1) and
  1362. (tai(hp1).typ = ait_instruction) and
  1363. (taicpu(hp1).ops >= 1) and
  1364. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1365. { we have
  1366. mov %reg1, %reg2
  1367. XXX %reg2, ???
  1368. }
  1369. begin
  1370. if ((taicpu(hp1).opcode = A_OR) or
  1371. (taicpu(hp1).opcode = A_AND) or
  1372. (taicpu(hp1).opcode = A_TEST)) and
  1373. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1374. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1375. { we have
  1376. mov %reg1, %reg2
  1377. test/or/and %reg2, %reg2
  1378. }
  1379. begin
  1380. TransferUsedRegs(TmpUsedRegs);
  1381. { reg1 will be used after the first instruction,
  1382. so update the allocation info }
  1383. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1384. if GetNextInstruction(hp1, hp2) and
  1385. (hp2.typ = ait_instruction) and
  1386. taicpu(hp2).is_jmp and
  1387. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1388. { change
  1389. mov %reg1, %reg2
  1390. test/or/and %reg2, %reg2
  1391. jxx
  1392. to
  1393. test %reg1, %reg1
  1394. jxx
  1395. }
  1396. begin
  1397. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1398. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1399. DebugMsg(SPeepholeOptimization + 'MovTestJxx2TestMov done',p);
  1400. asml.remove(p);
  1401. p.free;
  1402. p := hp1;
  1403. Exit;
  1404. end
  1405. else
  1406. { change
  1407. mov %reg1, %reg2
  1408. test/or/and %reg2, %reg2
  1409. to
  1410. mov %reg1, %reg2
  1411. test/or/and %reg1, %reg1
  1412. }
  1413. begin
  1414. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1415. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1416. DebugMsg(SPeepholeOptimization + 'MovTestJxx2MovTestJxx done',p);
  1417. end;
  1418. end
  1419. end
  1420. else
  1421. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1422. x >= RetOffset) as it doesn't do anything (it writes either to a
  1423. parameter or to the temporary storage room for the function
  1424. result)
  1425. }
  1426. if GetNextInstruction_p and
  1427. (tai(hp1).typ = ait_instruction) then
  1428. begin
  1429. if IsExitCode(hp1) and
  1430. MatchOpType(taicpu(p),top_reg,top_ref) and
  1431. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1432. not(assigned(current_procinfo.procdef.funcretsym) and
  1433. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1434. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1435. begin
  1436. asml.remove(p);
  1437. p.free;
  1438. p:=hp1;
  1439. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  1440. RemoveLastDeallocForFuncRes(p);
  1441. exit;
  1442. end
  1443. { change
  1444. mov reg1, mem1
  1445. test/cmp x, mem1
  1446. to
  1447. mov reg1, mem1
  1448. test/cmp x, reg1
  1449. }
  1450. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1451. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1452. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1453. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1454. begin
  1455. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1456. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  1457. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1458. end;
  1459. end;
  1460. { Next instruction is also a MOV ? }
  1461. if GetNextInstruction_p and
  1462. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1463. begin
  1464. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1465. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1466. { mov reg1, mem1 or mov mem1, reg1
  1467. mov mem2, reg2 mov reg2, mem2}
  1468. begin
  1469. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1470. { mov reg1, mem1 or mov mem1, reg1
  1471. mov mem2, reg1 mov reg2, mem1}
  1472. begin
  1473. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1474. { Removes the second statement from
  1475. mov reg1, mem1/reg2
  1476. mov mem1/reg2, reg1 }
  1477. begin
  1478. if taicpu(p).oper[0]^.typ=top_reg then
  1479. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1480. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  1481. asml.remove(hp1);
  1482. hp1.free;
  1483. Result:=true;
  1484. exit;
  1485. end
  1486. else
  1487. begin
  1488. TransferUsedRegs(TmpUsedRegs);
  1489. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1490. if (taicpu(p).oper[1]^.typ = top_ref) and
  1491. { mov reg1, mem1
  1492. mov mem2, reg1 }
  1493. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1494. GetNextInstruction(hp1, hp2) and
  1495. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1496. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1497. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1498. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1499. { change to
  1500. mov reg1, mem1 mov reg1, mem1
  1501. mov mem2, reg1 cmp reg1, mem2
  1502. cmp mem1, reg1
  1503. }
  1504. begin
  1505. asml.remove(hp2);
  1506. hp2.free;
  1507. taicpu(hp1).opcode := A_CMP;
  1508. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1509. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1510. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1511. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  1512. end;
  1513. end;
  1514. end
  1515. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1516. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1517. begin
  1518. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1519. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1520. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  1521. end
  1522. else
  1523. begin
  1524. TransferUsedRegs(TmpUsedRegs);
  1525. if GetNextInstruction(hp1, hp2) and
  1526. MatchOpType(taicpu(p),top_ref,top_reg) and
  1527. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1528. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1529. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1530. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1531. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1532. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1533. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1534. { mov mem1, %reg1
  1535. mov %reg1, mem2
  1536. mov mem2, reg2
  1537. to:
  1538. mov mem1, reg2
  1539. mov reg2, mem2}
  1540. begin
  1541. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1542. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  1543. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1544. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1545. asml.remove(hp2);
  1546. hp2.free;
  1547. end
  1548. {$ifdef i386}
  1549. { this is enabled for i386 only, as the rules to create the reg sets below
  1550. are too complicated for x86-64, so this makes this code too error prone
  1551. on x86-64
  1552. }
  1553. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1554. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1555. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1556. { mov mem1, reg1 mov mem1, reg1
  1557. mov reg1, mem2 mov reg1, mem2
  1558. mov mem2, reg2 mov mem2, reg1
  1559. to: to:
  1560. mov mem1, reg1 mov mem1, reg1
  1561. mov mem1, reg2 mov reg1, mem2
  1562. mov reg1, mem2
  1563. or (if mem1 depends on reg1
  1564. and/or if mem2 depends on reg2)
  1565. to:
  1566. mov mem1, reg1
  1567. mov reg1, mem2
  1568. mov reg1, reg2
  1569. }
  1570. begin
  1571. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1572. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1573. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1574. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1575. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1576. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1577. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1578. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1579. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1580. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1581. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1582. end
  1583. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1584. begin
  1585. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1586. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1587. end
  1588. else
  1589. begin
  1590. asml.remove(hp2);
  1591. hp2.free;
  1592. end
  1593. {$endif i386}
  1594. ;
  1595. end;
  1596. end
  1597. (* { movl [mem1],reg1
  1598. movl [mem1],reg2
  1599. to
  1600. movl [mem1],reg1
  1601. movl reg1,reg2
  1602. }
  1603. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1604. (taicpu(p).oper[1]^.typ = top_reg) and
  1605. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1606. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1607. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1608. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1609. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1610. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1611. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1612. else*)
  1613. { movl const1,[mem1]
  1614. movl [mem1],reg1
  1615. to
  1616. movl const1,reg1
  1617. movl reg1,[mem1]
  1618. }
  1619. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1620. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1621. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1622. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1623. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1624. begin
  1625. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1626. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1627. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1628. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1629. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1630. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  1631. end
  1632. {
  1633. mov* x,reg1
  1634. mov* y,reg1
  1635. to
  1636. mov* y,reg1
  1637. }
  1638. else if (taicpu(p).oper[1]^.typ=top_reg) and
  1639. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1640. not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
  1641. begin
  1642. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
  1643. { take care of the register (de)allocs following p }
  1644. UpdateUsedRegs(tai(p.next));
  1645. asml.remove(p);
  1646. p.free;
  1647. p:=hp1;
  1648. Result:=true;
  1649. exit;
  1650. end;
  1651. end
  1652. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1653. GetNextInstruction_p and
  1654. (hp1.typ = ait_instruction) and
  1655. GetNextInstruction(hp1, hp2) and
  1656. MatchInstruction(hp2,A_MOV,[]) and
  1657. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1658. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1659. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  1660. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1661. ) then
  1662. begin
  1663. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1664. (taicpu(hp2).oper[0]^.typ=top_reg) then
  1665. { change movsX/movzX reg/ref, reg2
  1666. add/sub/or/... reg3/$const, reg2
  1667. mov reg2 reg/ref
  1668. dealloc reg2
  1669. to
  1670. add/sub/or/... reg3/$const, reg/ref }
  1671. begin
  1672. TransferUsedRegs(TmpUsedRegs);
  1673. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1674. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1675. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1676. begin
  1677. { by example:
  1678. movswl %si,%eax movswl %si,%eax p
  1679. decl %eax addl %edx,%eax hp1
  1680. movw %ax,%si movw %ax,%si hp2
  1681. ->
  1682. movswl %si,%eax movswl %si,%eax p
  1683. decw %eax addw %edx,%eax hp1
  1684. movw %ax,%si movw %ax,%si hp2
  1685. }
  1686. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  1687. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  1688. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  1689. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize),p);
  1690. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1691. {
  1692. ->
  1693. movswl %si,%eax movswl %si,%eax p
  1694. decw %si addw %dx,%si hp1
  1695. movw %ax,%si movw %ax,%si hp2
  1696. }
  1697. case taicpu(hp1).ops of
  1698. 1:
  1699. begin
  1700. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1701. if taicpu(hp1).oper[0]^.typ=top_reg then
  1702. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1703. end;
  1704. 2:
  1705. begin
  1706. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1707. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1708. (taicpu(hp1).opcode<>A_SHL) and
  1709. (taicpu(hp1).opcode<>A_SHR) and
  1710. (taicpu(hp1).opcode<>A_SAR) then
  1711. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1712. end;
  1713. else
  1714. internalerror(2008042701);
  1715. end;
  1716. {
  1717. ->
  1718. decw %si addw %dx,%si p
  1719. }
  1720. asml.remove(p);
  1721. asml.remove(hp2);
  1722. p.Free;
  1723. hp2.Free;
  1724. p := hp1;
  1725. end;
  1726. end
  1727. else if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1728. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  1729. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  1730. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  1731. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  1732. )
  1733. {$ifdef i386}
  1734. { byte registers of esi, edi, ebp, esp are not available on i386 }
  1735. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  1736. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  1737. {$endif i386}
  1738. then
  1739. { change movsX/movzX reg/ref, reg2
  1740. add/sub/or/... regX/$const, reg2
  1741. mov reg2, reg3
  1742. dealloc reg2
  1743. to
  1744. movsX/movzX reg/ref, reg3
  1745. add/sub/or/... reg3/$const, reg3
  1746. }
  1747. begin
  1748. TransferUsedRegs(TmpUsedRegs);
  1749. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1750. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1751. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1752. begin
  1753. { by example:
  1754. movswl %si,%eax movswl %si,%eax p
  1755. decl %eax addl %edx,%eax hp1
  1756. movw %ax,%si movw %ax,%si hp2
  1757. ->
  1758. movswl %si,%eax movswl %si,%eax p
  1759. decw %eax addw %edx,%eax hp1
  1760. movw %ax,%si movw %ax,%si hp2
  1761. }
  1762. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  1763. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  1764. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  1765. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize),p);
  1766. { limit size of constants as well to avoid assembler errors, but
  1767. check opsize to avoid overflow when left shifting the 1 }
  1768. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=4) then
  1769. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl (topsize2memsize[taicpu(hp2).opsize]*8))-1);
  1770. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1771. taicpu(p).changeopsize(taicpu(hp2).opsize);
  1772. if taicpu(p).oper[0]^.typ=top_reg then
  1773. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1774. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  1775. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  1776. {
  1777. ->
  1778. movswl %si,%eax movswl %si,%eax p
  1779. decw %si addw %dx,%si hp1
  1780. movw %ax,%si movw %ax,%si hp2
  1781. }
  1782. case taicpu(hp1).ops of
  1783. 1:
  1784. begin
  1785. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1786. if taicpu(hp1).oper[0]^.typ=top_reg then
  1787. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1788. end;
  1789. 2:
  1790. begin
  1791. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1792. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1793. (taicpu(hp1).opcode<>A_SHL) and
  1794. (taicpu(hp1).opcode<>A_SHR) and
  1795. (taicpu(hp1).opcode<>A_SAR) then
  1796. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1797. end;
  1798. else
  1799. internalerror(2018111801);
  1800. end;
  1801. {
  1802. ->
  1803. decw %si addw %dx,%si p
  1804. }
  1805. asml.remove(hp2);
  1806. hp2.Free;
  1807. end;
  1808. end;
  1809. end
  1810. else if GetNextInstruction_p and
  1811. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1812. GetNextInstruction(hp1, hp2) and
  1813. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1814. MatchOperand(Taicpu(p).oper[0]^,0) and
  1815. (Taicpu(p).oper[1]^.typ = top_reg) and
  1816. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1817. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1818. { mov reg1,0
  1819. bts reg1,operand1 --> mov reg1,operand2
  1820. or reg1,operand2 bts reg1,operand1}
  1821. begin
  1822. Taicpu(hp2).opcode:=A_MOV;
  1823. asml.remove(hp1);
  1824. insertllitem(hp2,hp2.next,hp1);
  1825. asml.remove(p);
  1826. p.free;
  1827. p:=hp1;
  1828. end
  1829. else if GetNextInstruction_p and
  1830. MatchInstruction(hp1,A_LEA,[S_L]) and
  1831. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1832. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1833. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1834. ) or
  1835. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1836. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1837. )
  1838. ) then
  1839. { mov reg1,ref
  1840. lea reg2,[reg1,reg2]
  1841. to
  1842. add reg2,ref}
  1843. begin
  1844. TransferUsedRegs(TmpUsedRegs);
  1845. { reg1 may not be used afterwards }
  1846. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1847. begin
  1848. Taicpu(hp1).opcode:=A_ADD;
  1849. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1850. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  1851. asml.remove(p);
  1852. p.free;
  1853. p:=hp1;
  1854. end;
  1855. end;
  1856. end;
  1857. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1858. var
  1859. hp1 : tai;
  1860. begin
  1861. Result:=false;
  1862. if taicpu(p).ops <> 2 then
  1863. exit;
  1864. if GetNextInstruction(p,hp1) and
  1865. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1866. (taicpu(hp1).ops = 2) then
  1867. begin
  1868. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1869. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1870. { movXX reg1, mem1 or movXX mem1, reg1
  1871. movXX mem2, reg2 movXX reg2, mem2}
  1872. begin
  1873. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1874. { movXX reg1, mem1 or movXX mem1, reg1
  1875. movXX mem2, reg1 movXX reg2, mem1}
  1876. begin
  1877. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1878. begin
  1879. { Removes the second statement from
  1880. movXX reg1, mem1/reg2
  1881. movXX mem1/reg2, reg1
  1882. }
  1883. if taicpu(p).oper[0]^.typ=top_reg then
  1884. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1885. { Removes the second statement from
  1886. movXX mem1/reg1, reg2
  1887. movXX reg2, mem1/reg1
  1888. }
  1889. if (taicpu(p).oper[1]^.typ=top_reg) and
  1890. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1891. begin
  1892. asml.remove(p);
  1893. p.free;
  1894. GetNextInstruction(hp1,p);
  1895. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  1896. end
  1897. else
  1898. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  1899. asml.remove(hp1);
  1900. hp1.free;
  1901. Result:=true;
  1902. exit;
  1903. end
  1904. end;
  1905. end;
  1906. end;
  1907. end;
  1908. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  1909. var
  1910. hp1 : tai;
  1911. begin
  1912. result:=false;
  1913. { replace
  1914. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1915. MovX %mreg2,%mreg1
  1916. dealloc %mreg2
  1917. by
  1918. <Op>X %mreg2,%mreg1
  1919. ?
  1920. }
  1921. if GetNextInstruction(p,hp1) and
  1922. { we mix single and double opperations here because we assume that the compiler
  1923. generates vmovapd only after double operations and vmovaps only after single operations }
  1924. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1925. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1926. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1927. (taicpu(p).oper[0]^.typ=top_reg) then
  1928. begin
  1929. TransferUsedRegs(TmpUsedRegs);
  1930. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1931. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1932. begin
  1933. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1934. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1935. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  1936. asml.Remove(hp1);
  1937. hp1.Free;
  1938. result:=true;
  1939. end;
  1940. end;
  1941. end;
  1942. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1943. var
  1944. hp1, hp2, hp3: tai;
  1945. l : ASizeInt;
  1946. begin
  1947. Result:=false;
  1948. { removes seg register prefixes from LEA operations, as they
  1949. don't do anything}
  1950. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1951. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1952. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1953. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1954. { do not mess with leas acessing the stack pointer }
  1955. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1956. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1957. begin
  1958. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1959. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1960. begin
  1961. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1962. taicpu(p).oper[1]^.reg);
  1963. InsertLLItem(p.previous,p.next, hp1);
  1964. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  1965. p.free;
  1966. p:=hp1;
  1967. Result:=true;
  1968. exit;
  1969. end
  1970. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1971. begin
  1972. hp1:=taicpu(p.Next);
  1973. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  1974. asml.remove(p);
  1975. p.free;
  1976. p:=hp1;
  1977. Result:=true;
  1978. exit;
  1979. end
  1980. { continue to use lea to adjust the stack pointer,
  1981. it is the recommended way, but only if not optimizing for size }
  1982. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1983. (cs_opt_size in current_settings.optimizerswitches) then
  1984. with taicpu(p).oper[0]^.ref^ do
  1985. if (base = taicpu(p).oper[1]^.reg) then
  1986. begin
  1987. l:=offset;
  1988. if (l=1) and UseIncDec then
  1989. begin
  1990. taicpu(p).opcode:=A_INC;
  1991. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1992. taicpu(p).ops:=1;
  1993. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  1994. end
  1995. else if (l=-1) and UseIncDec then
  1996. begin
  1997. taicpu(p).opcode:=A_DEC;
  1998. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1999. taicpu(p).ops:=1;
  2000. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2001. end
  2002. else
  2003. begin
  2004. if (l<0) and (l<>-2147483648) then
  2005. begin
  2006. taicpu(p).opcode:=A_SUB;
  2007. taicpu(p).loadConst(0,-l);
  2008. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2009. end
  2010. else
  2011. begin
  2012. taicpu(p).opcode:=A_ADD;
  2013. taicpu(p).loadConst(0,l);
  2014. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2015. end;
  2016. end;
  2017. Result:=true;
  2018. exit;
  2019. end;
  2020. end;
  2021. if GetNextInstruction(p,hp1) and
  2022. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  2023. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2024. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  2025. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  2026. begin
  2027. TransferUsedRegs(TmpUsedRegs);
  2028. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2029. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2030. begin
  2031. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2032. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  2033. asml.Remove(hp1);
  2034. hp1.Free;
  2035. result:=true;
  2036. end;
  2037. end;
  2038. { changes
  2039. lea offset1(regX), reg1
  2040. lea offset2(reg1), reg1
  2041. to
  2042. lea offset1+offset2(regX), reg1 }
  2043. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  2044. MatchInstruction(hp1,A_LEA,[S_L]) and
  2045. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2046. (taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg) and
  2047. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2048. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2049. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2050. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2051. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2052. (taicpu(p).oper[0]^.ref^.index=taicpu(hp1).oper[0]^.ref^.index) and
  2053. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp1).oper[0]^.ref^.relsymbol) and
  2054. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp1).oper[0]^.ref^.scalefactor) and
  2055. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp1).oper[0]^.ref^.segment) and
  2056. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp1).oper[0]^.ref^.symbol) then
  2057. begin
  2058. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea done',p);
  2059. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2060. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2061. asml.Remove(p);
  2062. p.Free;
  2063. p:=hp1;
  2064. result:=true;
  2065. end;
  2066. { replace
  2067. lea x(stackpointer),stackpointer
  2068. call procname
  2069. lea -x(stackpointer),stackpointer
  2070. ret
  2071. by
  2072. jmp procname
  2073. this should never hurt except when pic is used, not sure
  2074. how to handle it then
  2075. but do it only on level 4 because it destroys stack back traces
  2076. }
  2077. if (cs_opt_level4 in current_settings.optimizerswitches) and
  2078. not(cs_create_pic in current_settings.moduleswitches) and
  2079. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2080. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  2081. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2082. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2083. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2084. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2085. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2086. GetNextInstruction(p, hp1) and
  2087. MatchInstruction(hp1,A_CALL,[S_NO]) and
  2088. GetNextInstruction(hp1, hp2) and
  2089. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  2090. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2091. (taicpu(p).oper[0]^.ref^.base=taicpu(hp2).oper[0]^.ref^.base) and
  2092. (taicpu(p).oper[0]^.ref^.index=taicpu(hp2).oper[0]^.ref^.index) and
  2093. (taicpu(p).oper[0]^.ref^.offset=-taicpu(hp2).oper[0]^.ref^.offset) and
  2094. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp2).oper[0]^.ref^.relsymbol) and
  2095. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp2).oper[0]^.ref^.scalefactor) and
  2096. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp2).oper[0]^.ref^.segment) and
  2097. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp2).oper[0]^.ref^.symbol) and
  2098. GetNextInstruction(hp2, hp3) and
  2099. MatchInstruction(hp3,A_RET,[S_NO]) and
  2100. (taicpu(hp3).ops=0) then
  2101. begin
  2102. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  2103. taicpu(hp1).opcode:=A_JMP;
  2104. taicpu(hp1).is_jmp:=true;
  2105. asml.remove(p);
  2106. asml.remove(hp2);
  2107. asml.remove(hp3);
  2108. p.free;
  2109. hp2.free;
  2110. hp3.free;
  2111. p:=hp1;
  2112. Result:=true;
  2113. end;
  2114. end;
  2115. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  2116. var
  2117. hp1 : tai;
  2118. begin
  2119. DoSubAddOpt := False;
  2120. if GetLastInstruction(p, hp1) and
  2121. (hp1.typ = ait_instruction) and
  2122. (taicpu(hp1).opsize = taicpu(p).opsize) then
  2123. case taicpu(hp1).opcode Of
  2124. A_DEC:
  2125. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  2126. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2127. begin
  2128. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  2129. asml.remove(hp1);
  2130. hp1.free;
  2131. end;
  2132. A_SUB:
  2133. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2134. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2135. begin
  2136. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  2137. asml.remove(hp1);
  2138. hp1.free;
  2139. end;
  2140. A_ADD:
  2141. begin
  2142. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2143. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2144. begin
  2145. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  2146. asml.remove(hp1);
  2147. hp1.free;
  2148. if (taicpu(p).oper[0]^.val = 0) then
  2149. begin
  2150. hp1 := tai(p.next);
  2151. asml.remove(p);
  2152. p.free;
  2153. if not GetLastInstruction(hp1, p) then
  2154. p := hp1;
  2155. DoSubAddOpt := True;
  2156. end
  2157. end;
  2158. end;
  2159. else
  2160. ;
  2161. end;
  2162. end;
  2163. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  2164. {$ifdef i386}
  2165. var
  2166. hp1 : tai;
  2167. {$endif i386}
  2168. begin
  2169. Result:=false;
  2170. { * change "subl $2, %esp; pushw x" to "pushl x"}
  2171. { * change "sub/add const1, reg" or "dec reg" followed by
  2172. "sub const2, reg" to one "sub ..., reg" }
  2173. if MatchOpType(taicpu(p),top_const,top_reg) then
  2174. begin
  2175. {$ifdef i386}
  2176. if (taicpu(p).oper[0]^.val = 2) and
  2177. (taicpu(p).oper[1]^.reg = NR_ESP) and
  2178. { Don't do the sub/push optimization if the sub }
  2179. { comes from setting up the stack frame (JM) }
  2180. (not(GetLastInstruction(p,hp1)) or
  2181. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  2182. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  2183. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  2184. begin
  2185. hp1 := tai(p.next);
  2186. while Assigned(hp1) and
  2187. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  2188. not RegReadByInstruction(NR_ESP,hp1) and
  2189. not RegModifiedByInstruction(NR_ESP,hp1) do
  2190. hp1 := tai(hp1.next);
  2191. if Assigned(hp1) and
  2192. MatchInstruction(hp1,A_PUSH,[S_W]) then
  2193. begin
  2194. taicpu(hp1).changeopsize(S_L);
  2195. if taicpu(hp1).oper[0]^.typ=top_reg then
  2196. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  2197. hp1 := tai(p.next);
  2198. asml.remove(p);
  2199. p.free;
  2200. p := hp1;
  2201. Result:=true;
  2202. exit;
  2203. end;
  2204. end;
  2205. {$endif i386}
  2206. if DoSubAddOpt(p) then
  2207. Result:=true;
  2208. end;
  2209. end;
  2210. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  2211. var
  2212. TmpBool1,TmpBool2 : Boolean;
  2213. tmpref : treference;
  2214. hp1,hp2: tai;
  2215. begin
  2216. Result:=false;
  2217. if MatchOpType(taicpu(p),top_const,top_reg) and
  2218. (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  2219. (taicpu(p).oper[0]^.val <= 3) then
  2220. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  2221. begin
  2222. { should we check the next instruction? }
  2223. TmpBool1 := True;
  2224. { have we found an add/sub which could be
  2225. integrated in the lea? }
  2226. TmpBool2 := False;
  2227. reference_reset(tmpref,2,[]);
  2228. TmpRef.index := taicpu(p).oper[1]^.reg;
  2229. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2230. while TmpBool1 and
  2231. GetNextInstruction(p, hp1) and
  2232. (tai(hp1).typ = ait_instruction) and
  2233. ((((taicpu(hp1).opcode = A_ADD) or
  2234. (taicpu(hp1).opcode = A_SUB)) and
  2235. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  2236. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  2237. (((taicpu(hp1).opcode = A_INC) or
  2238. (taicpu(hp1).opcode = A_DEC)) and
  2239. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2240. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  2241. (not GetNextInstruction(hp1,hp2) or
  2242. not instrReadsFlags(hp2)) Do
  2243. begin
  2244. TmpBool1 := False;
  2245. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  2246. begin
  2247. TmpBool1 := True;
  2248. TmpBool2 := True;
  2249. case taicpu(hp1).opcode of
  2250. A_ADD:
  2251. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2252. A_SUB:
  2253. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2254. else
  2255. internalerror(2019050536);
  2256. end;
  2257. asml.remove(hp1);
  2258. hp1.free;
  2259. end
  2260. else
  2261. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2262. (((taicpu(hp1).opcode = A_ADD) and
  2263. (TmpRef.base = NR_NO)) or
  2264. (taicpu(hp1).opcode = A_INC) or
  2265. (taicpu(hp1).opcode = A_DEC)) then
  2266. begin
  2267. TmpBool1 := True;
  2268. TmpBool2 := True;
  2269. case taicpu(hp1).opcode of
  2270. A_ADD:
  2271. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  2272. A_INC:
  2273. inc(TmpRef.offset);
  2274. A_DEC:
  2275. dec(TmpRef.offset);
  2276. else
  2277. internalerror(2019050535);
  2278. end;
  2279. asml.remove(hp1);
  2280. hp1.free;
  2281. end;
  2282. end;
  2283. if TmpBool2
  2284. {$ifndef x86_64}
  2285. or
  2286. ((current_settings.optimizecputype < cpu_Pentium2) and
  2287. (taicpu(p).oper[0]^.val <= 3) and
  2288. not(cs_opt_size in current_settings.optimizerswitches))
  2289. {$endif x86_64}
  2290. then
  2291. begin
  2292. if not(TmpBool2) and
  2293. (taicpu(p).oper[0]^.val = 1) then
  2294. begin
  2295. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2296. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  2297. end
  2298. else
  2299. hp1 := taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  2300. taicpu(p).oper[1]^.reg);
  2301. InsertLLItem(p.previous, p.next, hp1);
  2302. p.free;
  2303. p := hp1;
  2304. end;
  2305. end
  2306. {$ifndef x86_64}
  2307. else if (current_settings.optimizecputype < cpu_Pentium2) and
  2308. MatchOpType(taicpu(p),top_const,top_reg) then
  2309. begin
  2310. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  2311. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  2312. (unlike shl, which is only Tairable in the U pipe) }
  2313. if taicpu(p).oper[0]^.val=1 then
  2314. begin
  2315. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2316. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2317. InsertLLItem(p.previous, p.next, hp1);
  2318. p.free;
  2319. p := hp1;
  2320. end
  2321. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  2322. "shl $3, %reg" to "lea (,%reg,8), %reg }
  2323. else if (taicpu(p).opsize = S_L) and
  2324. (taicpu(p).oper[0]^.val<= 3) then
  2325. begin
  2326. reference_reset(tmpref,2,[]);
  2327. TmpRef.index := taicpu(p).oper[1]^.reg;
  2328. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2329. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  2330. InsertLLItem(p.previous, p.next, hp1);
  2331. p.free;
  2332. p := hp1;
  2333. end;
  2334. end
  2335. {$endif x86_64}
  2336. ;
  2337. end;
  2338. function TX86AsmOptimizer.OptPass1SETcc(var p: tai): boolean;
  2339. var
  2340. hp1,hp2,next: tai; SetC, JumpC: TAsmCond;
  2341. begin
  2342. Result:=false;
  2343. if MatchOpType(taicpu(p),top_reg) and
  2344. GetNextInstruction(p, hp1) and
  2345. MatchInstruction(hp1, A_TEST, [S_B]) and
  2346. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2347. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  2348. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  2349. GetNextInstruction(hp1, hp2) and
  2350. MatchInstruction(hp2, A_Jcc, []) then
  2351. { Change from: To:
  2352. set(C) %reg j(~C) label
  2353. test %reg,%reg
  2354. je label
  2355. set(C) %reg j(C) label
  2356. test %reg,%reg
  2357. jne label
  2358. }
  2359. begin
  2360. next := tai(p.Next);
  2361. TransferUsedRegs(TmpUsedRegs);
  2362. UpdateUsedRegs(TmpUsedRegs, next);
  2363. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2364. asml.Remove(hp1);
  2365. hp1.Free;
  2366. JumpC := taicpu(hp2).condition;
  2367. if conditions_equal(JumpC, C_E) then
  2368. SetC := inverse_cond(taicpu(p).condition)
  2369. else if conditions_equal(JumpC, C_NE) then
  2370. SetC := taicpu(p).condition
  2371. else
  2372. InternalError(2018061400);
  2373. if SetC = C_NONE then
  2374. InternalError(2018061401);
  2375. taicpu(hp2).SetCondition(SetC);
  2376. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs) then
  2377. begin
  2378. asml.Remove(p);
  2379. UpdateUsedRegs(next);
  2380. p.Free;
  2381. Result := True;
  2382. p := hp2;
  2383. end;
  2384. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> Jcc',p);
  2385. end;
  2386. end;
  2387. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  2388. { returns true if a "continue" should be done after this optimization }
  2389. var
  2390. hp1, hp2: tai;
  2391. begin
  2392. Result := false;
  2393. if MatchOpType(taicpu(p),top_ref) and
  2394. GetNextInstruction(p, hp1) and
  2395. (hp1.typ = ait_instruction) and
  2396. (((taicpu(hp1).opcode = A_FLD) and
  2397. (taicpu(p).opcode = A_FSTP)) or
  2398. ((taicpu(p).opcode = A_FISTP) and
  2399. (taicpu(hp1).opcode = A_FILD))) and
  2400. MatchOpType(taicpu(hp1),top_ref) and
  2401. (taicpu(hp1).opsize = taicpu(p).opsize) and
  2402. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2403. begin
  2404. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  2405. if (taicpu(p).opsize=S_FX) and
  2406. GetNextInstruction(hp1, hp2) and
  2407. (hp2.typ = ait_instruction) and
  2408. IsExitCode(hp2) and
  2409. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  2410. not(assigned(current_procinfo.procdef.funcretsym) and
  2411. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  2412. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  2413. begin
  2414. asml.remove(p);
  2415. asml.remove(hp1);
  2416. p.free;
  2417. hp1.free;
  2418. p := hp2;
  2419. RemoveLastDeallocForFuncRes(p);
  2420. Result := true;
  2421. end
  2422. (* can't be done because the store operation rounds
  2423. else
  2424. { fst can't store an extended value! }
  2425. if (taicpu(p).opsize <> S_FX) and
  2426. (taicpu(p).opsize <> S_IQ) then
  2427. begin
  2428. if (taicpu(p).opcode = A_FSTP) then
  2429. taicpu(p).opcode := A_FST
  2430. else taicpu(p).opcode := A_FIST;
  2431. asml.remove(hp1);
  2432. hp1.free;
  2433. end
  2434. *)
  2435. end;
  2436. end;
  2437. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  2438. var
  2439. hp1, hp2: tai;
  2440. begin
  2441. result:=false;
  2442. if MatchOpType(taicpu(p),top_reg) and
  2443. GetNextInstruction(p, hp1) and
  2444. (hp1.typ = Ait_Instruction) and
  2445. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2446. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  2447. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  2448. { change to
  2449. fld reg fxxx reg,st
  2450. fxxxp st, st1 (hp1)
  2451. Remark: non commutative operations must be reversed!
  2452. }
  2453. begin
  2454. case taicpu(hp1).opcode Of
  2455. A_FMULP,A_FADDP,
  2456. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2457. begin
  2458. case taicpu(hp1).opcode Of
  2459. A_FADDP: taicpu(hp1).opcode := A_FADD;
  2460. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  2461. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  2462. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  2463. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  2464. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  2465. else
  2466. internalerror(2019050534);
  2467. end;
  2468. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  2469. taicpu(hp1).oper[1]^.reg := NR_ST;
  2470. asml.remove(p);
  2471. p.free;
  2472. p := hp1;
  2473. Result:=true;
  2474. exit;
  2475. end;
  2476. else
  2477. ;
  2478. end;
  2479. end
  2480. else
  2481. if MatchOpType(taicpu(p),top_ref) and
  2482. GetNextInstruction(p, hp2) and
  2483. (hp2.typ = Ait_Instruction) and
  2484. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2485. (taicpu(p).opsize in [S_FS, S_FL]) and
  2486. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  2487. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  2488. if GetLastInstruction(p, hp1) and
  2489. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  2490. MatchOpType(taicpu(hp1),top_ref) and
  2491. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2492. if ((taicpu(hp2).opcode = A_FMULP) or
  2493. (taicpu(hp2).opcode = A_FADDP)) then
  2494. { change to
  2495. fld/fst mem1 (hp1) fld/fst mem1
  2496. fld mem1 (p) fadd/
  2497. faddp/ fmul st, st
  2498. fmulp st, st1 (hp2) }
  2499. begin
  2500. asml.remove(p);
  2501. p.free;
  2502. p := hp1;
  2503. if (taicpu(hp2).opcode = A_FADDP) then
  2504. taicpu(hp2).opcode := A_FADD
  2505. else
  2506. taicpu(hp2).opcode := A_FMUL;
  2507. taicpu(hp2).oper[1]^.reg := NR_ST;
  2508. end
  2509. else
  2510. { change to
  2511. fld/fst mem1 (hp1) fld/fst mem1
  2512. fld mem1 (p) fld st}
  2513. begin
  2514. taicpu(p).changeopsize(S_FL);
  2515. taicpu(p).loadreg(0,NR_ST);
  2516. end
  2517. else
  2518. begin
  2519. case taicpu(hp2).opcode Of
  2520. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2521. { change to
  2522. fld/fst mem1 (hp1) fld/fst mem1
  2523. fld mem2 (p) fxxx mem2
  2524. fxxxp st, st1 (hp2) }
  2525. begin
  2526. case taicpu(hp2).opcode Of
  2527. A_FADDP: taicpu(p).opcode := A_FADD;
  2528. A_FMULP: taicpu(p).opcode := A_FMUL;
  2529. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  2530. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  2531. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  2532. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  2533. else
  2534. internalerror(2019050533);
  2535. end;
  2536. asml.remove(hp2);
  2537. hp2.free;
  2538. end
  2539. else
  2540. ;
  2541. end
  2542. end
  2543. end;
  2544. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  2545. var
  2546. hp1,hp2: tai;
  2547. {$ifdef x86_64}
  2548. hp3: tai;
  2549. {$endif x86_64}
  2550. begin
  2551. Result:=false;
  2552. if MatchOpType(taicpu(p),top_reg,top_reg) and
  2553. GetNextInstruction(p, hp1) and
  2554. {$ifdef x86_64}
  2555. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  2556. {$else x86_64}
  2557. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  2558. {$endif x86_64}
  2559. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2560. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  2561. { mov reg1, reg2 mov reg1, reg2
  2562. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  2563. begin
  2564. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  2565. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  2566. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  2567. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  2568. TransferUsedRegs(TmpUsedRegs);
  2569. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2570. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  2571. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  2572. then
  2573. begin
  2574. asml.remove(p);
  2575. p.free;
  2576. p := hp1;
  2577. Result:=true;
  2578. end;
  2579. exit;
  2580. end
  2581. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  2582. GetNextInstruction(p, hp1) and
  2583. {$ifdef x86_64}
  2584. MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
  2585. {$else x86_64}
  2586. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  2587. {$endif x86_64}
  2588. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  2589. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  2590. or
  2591. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  2592. ) and
  2593. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2594. { mov reg1, reg2
  2595. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2596. begin
  2597. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2598. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2599. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2600. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2601. DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
  2602. asml.remove(p);
  2603. p.free;
  2604. p := hp1;
  2605. Result:=true;
  2606. exit;
  2607. end
  2608. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2609. GetNextInstruction(p,hp1) and
  2610. (hp1.typ = ait_instruction) and
  2611. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  2612. doing it separately in both branches allows to do the cheap checks
  2613. with low probability earlier }
  2614. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2615. GetNextInstruction(hp1,hp2) and
  2616. MatchInstruction(hp2,A_MOV,[])
  2617. ) or
  2618. ((taicpu(hp1).opcode=A_LEA) and
  2619. GetNextInstruction(hp1,hp2) and
  2620. MatchInstruction(hp2,A_MOV,[]) and
  2621. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2622. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  2623. ) or
  2624. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  2625. taicpu(p).oper[1]^.reg) and
  2626. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  2627. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  2628. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  2629. ) and
  2630. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  2631. )
  2632. ) and
  2633. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  2634. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2635. begin
  2636. TransferUsedRegs(TmpUsedRegs);
  2637. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  2638. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2639. if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and
  2640. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then
  2641. { change mov (ref), reg
  2642. add/sub/or/... reg2/$const, reg
  2643. mov reg, (ref)
  2644. # release reg
  2645. to add/sub/or/... reg2/$const, (ref) }
  2646. begin
  2647. case taicpu(hp1).opcode of
  2648. A_INC,A_DEC,A_NOT,A_NEG :
  2649. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2650. A_LEA :
  2651. begin
  2652. taicpu(hp1).opcode:=A_ADD;
  2653. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  2654. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2655. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  2656. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  2657. else
  2658. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  2659. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2660. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  2661. end
  2662. else
  2663. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2664. end;
  2665. asml.remove(p);
  2666. asml.remove(hp2);
  2667. p.free;
  2668. hp2.free;
  2669. p := hp1
  2670. end;
  2671. Exit;
  2672. {$ifdef x86_64}
  2673. end
  2674. else if (taicpu(p).opsize = S_L) and
  2675. (taicpu(p).oper[1]^.typ = top_reg) and
  2676. (
  2677. GetNextInstruction(p, hp1) and
  2678. MatchInstruction(hp1, A_MOV,[]) and
  2679. (taicpu(hp1).opsize = S_L) and
  2680. (taicpu(hp1).oper[1]^.typ = top_reg)
  2681. ) and (
  2682. GetNextInstruction(hp1, hp2) and
  2683. (tai(hp2).typ=ait_instruction) and
  2684. (taicpu(hp2).opsize = S_Q) and
  2685. (
  2686. (
  2687. MatchInstruction(hp2, A_ADD,[]) and
  2688. (taicpu(hp2).opsize = S_Q) and
  2689. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  2690. (
  2691. (
  2692. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  2693. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2694. ) or (
  2695. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2696. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  2697. )
  2698. )
  2699. ) or (
  2700. MatchInstruction(hp2, A_LEA,[]) and
  2701. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  2702. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  2703. (
  2704. (
  2705. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  2706. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2707. ) or (
  2708. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2709. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  2710. )
  2711. ) and (
  2712. (
  2713. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2714. ) or (
  2715. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  2716. )
  2717. )
  2718. )
  2719. )
  2720. ) and (
  2721. GetNextInstruction(hp2, hp3) and
  2722. MatchInstruction(hp3, A_SHR,[]) and
  2723. (taicpu(hp3).opsize = S_Q) and
  2724. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  2725. (taicpu(hp3).oper[0]^.val = 1) and
  2726. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  2727. ) then
  2728. begin
  2729. { Change movl x, reg1d movl x, reg1d
  2730. movl y, reg2d movl y, reg2d
  2731. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  2732. shrq $1, reg1q shrq $1, reg1q
  2733. ( reg1d and reg2d can be switched around in the first two instructions )
  2734. To movl x, reg1d
  2735. addl y, reg1d
  2736. rcrl $1, reg1d
  2737. This corresponds to the common expression (x + y) shr 1, where
  2738. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  2739. smaller code, but won't account for x + y causing an overflow). [Kit]
  2740. }
  2741. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  2742. { Change first MOV command to have the same register as the final output }
  2743. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  2744. else
  2745. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  2746. { Change second MOV command to an ADD command. This is easier than
  2747. converting the existing command because it means we don't have to
  2748. touch 'y', which might be a complicated reference, and also the
  2749. fact that the third command might either be ADD or LEA. [Kit] }
  2750. taicpu(hp1).opcode := A_ADD;
  2751. { Delete old ADD/LEA instruction }
  2752. asml.remove(hp2);
  2753. hp2.free;
  2754. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  2755. taicpu(hp3).opcode := A_RCR;
  2756. taicpu(hp3).changeopsize(S_L);
  2757. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  2758. {$endif x86_64}
  2759. end;
  2760. end;
  2761. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  2762. var
  2763. hp1 : tai;
  2764. begin
  2765. Result:=false;
  2766. if (taicpu(p).ops >= 2) and
  2767. ((taicpu(p).oper[0]^.typ = top_const) or
  2768. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2769. (taicpu(p).oper[1]^.typ = top_reg) and
  2770. ((taicpu(p).ops = 2) or
  2771. ((taicpu(p).oper[2]^.typ = top_reg) and
  2772. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2773. GetLastInstruction(p,hp1) and
  2774. MatchInstruction(hp1,A_MOV,[]) and
  2775. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2776. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  2777. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  2778. begin
  2779. TransferUsedRegs(TmpUsedRegs);
  2780. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  2781. { change
  2782. mov reg1,reg2
  2783. imul y,reg2 to imul y,reg1,reg2 }
  2784. begin
  2785. taicpu(p).ops := 3;
  2786. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2787. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2788. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  2789. asml.remove(hp1);
  2790. hp1.free;
  2791. result:=true;
  2792. end;
  2793. end;
  2794. end;
  2795. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  2796. var
  2797. hp1 : tai;
  2798. begin
  2799. {
  2800. change
  2801. jmp .L1
  2802. ...
  2803. .L1:
  2804. ret
  2805. into
  2806. ret
  2807. }
  2808. result:=false;
  2809. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  2810. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  2811. begin
  2812. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  2813. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  2814. MatchInstruction(hp1,A_RET,[S_NO]) then
  2815. begin
  2816. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  2817. taicpu(p).opcode:=A_RET;
  2818. taicpu(p).is_jmp:=false;
  2819. taicpu(p).ops:=taicpu(hp1).ops;
  2820. case taicpu(hp1).ops of
  2821. 0:
  2822. taicpu(p).clearop(0);
  2823. 1:
  2824. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  2825. else
  2826. internalerror(2016041301);
  2827. end;
  2828. result:=true;
  2829. end;
  2830. end;
  2831. end;
  2832. function CanBeCMOV(p : tai) : boolean;
  2833. begin
  2834. CanBeCMOV:=assigned(p) and
  2835. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  2836. { we can't use cmov ref,reg because
  2837. ref could be nil and cmov still throws an exception
  2838. if ref=nil but the mov isn't done (FK)
  2839. or ((taicpu(p).oper[0]^.typ = top_ref) and
  2840. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  2841. }
  2842. MatchOpType(taicpu(p),top_reg,top_reg);
  2843. end;
  2844. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  2845. var
  2846. hp1,hp2,hp3,hp4,hpmov2: tai;
  2847. carryadd_opcode : TAsmOp;
  2848. l : Longint;
  2849. condition : TAsmCond;
  2850. symbol: TAsmSymbol;
  2851. begin
  2852. result:=false;
  2853. symbol:=nil;
  2854. if GetNextInstruction(p,hp1) then
  2855. begin
  2856. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  2857. if (hp1.typ=ait_instruction) and
  2858. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  2859. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  2860. { jb @@1 cmc
  2861. inc/dec operand --> adc/sbb operand,0
  2862. @@1:
  2863. ... and ...
  2864. jnb @@1
  2865. inc/dec operand --> adc/sbb operand,0
  2866. @@1: }
  2867. begin
  2868. carryadd_opcode:=A_NONE;
  2869. if Taicpu(p).condition in [C_NAE,C_B] then
  2870. begin
  2871. if Taicpu(hp1).opcode=A_INC then
  2872. carryadd_opcode:=A_ADC;
  2873. if Taicpu(hp1).opcode=A_DEC then
  2874. carryadd_opcode:=A_SBB;
  2875. if carryadd_opcode<>A_NONE then
  2876. begin
  2877. Taicpu(p).clearop(0);
  2878. Taicpu(p).ops:=0;
  2879. Taicpu(p).is_jmp:=false;
  2880. Taicpu(p).opcode:=A_CMC;
  2881. Taicpu(p).condition:=C_NONE;
  2882. Taicpu(hp1).ops:=2;
  2883. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2884. Taicpu(hp1).loadconst(0,0);
  2885. Taicpu(hp1).opcode:=carryadd_opcode;
  2886. result:=true;
  2887. exit;
  2888. end;
  2889. end;
  2890. if Taicpu(p).condition in [C_AE,C_NB] then
  2891. begin
  2892. if Taicpu(hp1).opcode=A_INC then
  2893. carryadd_opcode:=A_ADC;
  2894. if Taicpu(hp1).opcode=A_DEC then
  2895. carryadd_opcode:=A_SBB;
  2896. if carryadd_opcode<>A_NONE then
  2897. begin
  2898. asml.remove(p);
  2899. p.free;
  2900. Taicpu(hp1).ops:=2;
  2901. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2902. Taicpu(hp1).loadconst(0,0);
  2903. Taicpu(hp1).opcode:=carryadd_opcode;
  2904. p:=hp1;
  2905. result:=true;
  2906. exit;
  2907. end;
  2908. end;
  2909. end;
  2910. if ((hp1.typ = ait_label) and (symbol = tai_label(hp1).labsym))
  2911. or ((hp1.typ = ait_align) and GetNextInstruction(hp1, hp2) and (hp2.typ = ait_label) and (symbol = tai_label(hp2).labsym)) then
  2912. begin
  2913. { If Jcc is immediately followed by the label that it's supposed to jump to, remove it }
  2914. DebugMsg(SPeepholeOptimization + 'Removed conditional jump whose destination was immediately after it', p);
  2915. UpdateUsedRegs(hp1);
  2916. TAsmLabel(symbol).decrefs;
  2917. { if the label refs. reach zero, remove any alignment before the label }
  2918. if (hp1.typ = ait_align) then
  2919. begin
  2920. UpdateUsedRegs(hp2);
  2921. if (TAsmLabel(symbol).getrefs = 0) then
  2922. begin
  2923. asml.Remove(hp1);
  2924. hp1.Free;
  2925. end;
  2926. hp1 := hp2; { Set hp1 to the label }
  2927. end;
  2928. asml.remove(p);
  2929. p.free;
  2930. if (TAsmLabel(symbol).getrefs = 0) then
  2931. begin
  2932. GetNextInstruction(hp1, p); { Instruction following the label }
  2933. asml.remove(hp1);
  2934. hp1.free;
  2935. UpdateUsedRegs(p);
  2936. Result := True;
  2937. end
  2938. else
  2939. begin
  2940. { We don't need to set the result to True because we know hp1
  2941. is a label and won't trigger any optimisation routines. [Kit] }
  2942. p := hp1;
  2943. end;
  2944. Exit;
  2945. end;
  2946. end;
  2947. {$ifndef i8086}
  2948. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  2949. begin
  2950. { check for
  2951. jCC xxx
  2952. <several movs>
  2953. xxx:
  2954. }
  2955. l:=0;
  2956. GetNextInstruction(p, hp1);
  2957. while assigned(hp1) and
  2958. CanBeCMOV(hp1) and
  2959. { stop on labels }
  2960. not(hp1.typ=ait_label) do
  2961. begin
  2962. inc(l);
  2963. GetNextInstruction(hp1,hp1);
  2964. end;
  2965. if assigned(hp1) then
  2966. begin
  2967. if FindLabel(tasmlabel(symbol),hp1) then
  2968. begin
  2969. if (l<=4) and (l>0) then
  2970. begin
  2971. condition:=inverse_cond(taicpu(p).condition);
  2972. GetNextInstruction(p,hp1);
  2973. repeat
  2974. if not Assigned(hp1) then
  2975. InternalError(2018062900);
  2976. taicpu(hp1).opcode:=A_CMOVcc;
  2977. taicpu(hp1).condition:=condition;
  2978. UpdateUsedRegs(hp1);
  2979. GetNextInstruction(hp1,hp1);
  2980. until not(CanBeCMOV(hp1));
  2981. { Don't decrement the reference count on the label yet, otherwise
  2982. GetNextInstruction might skip over the label if it drops to
  2983. zero. }
  2984. GetNextInstruction(hp1,hp2);
  2985. { if the label refs. reach zero, remove any alignment before the label }
  2986. if (hp1.typ = ait_align) and (hp2.typ = ait_label) then
  2987. begin
  2988. { Ref = 1 means it will drop to zero }
  2989. if (tasmlabel(symbol).getrefs=1) then
  2990. begin
  2991. asml.Remove(hp1);
  2992. hp1.Free;
  2993. end;
  2994. end
  2995. else
  2996. hp2 := hp1;
  2997. if not Assigned(hp2) then
  2998. InternalError(2018062910);
  2999. if (hp2.typ <> ait_label) then
  3000. begin
  3001. { There's something other than CMOVs here. Move the original jump
  3002. to right before this point, then break out.
  3003. Originally this was part of the above internal error, but it got
  3004. triggered on the bootstrapping process sometimes. Investigate. [Kit] }
  3005. asml.remove(p);
  3006. asml.insertbefore(p, hp2);
  3007. DebugMsg('Jcc/CMOVcc drop-out', p);
  3008. UpdateUsedRegs(p);
  3009. Result := True;
  3010. Exit;
  3011. end;
  3012. { Now we can safely decrement the reference count }
  3013. tasmlabel(symbol).decrefs;
  3014. { Remove the original jump }
  3015. asml.Remove(p);
  3016. p.Free;
  3017. GetNextInstruction(hp2, p); { Instruction after the label }
  3018. { Remove the label if this is its final reference }
  3019. if (tasmlabel(symbol).getrefs=0) then
  3020. begin
  3021. asml.remove(hp2);
  3022. hp2.free;
  3023. end;
  3024. if Assigned(p) then
  3025. begin
  3026. UpdateUsedRegs(p);
  3027. result:=true;
  3028. end;
  3029. exit;
  3030. end;
  3031. end
  3032. else
  3033. begin
  3034. { check further for
  3035. jCC xxx
  3036. <several movs 1>
  3037. jmp yyy
  3038. xxx:
  3039. <several movs 2>
  3040. yyy:
  3041. }
  3042. { hp2 points to jmp yyy }
  3043. hp2:=hp1;
  3044. { skip hp1 to xxx (or an align right before it) }
  3045. GetNextInstruction(hp1, hp1);
  3046. if assigned(hp2) and
  3047. assigned(hp1) and
  3048. (l<=3) and
  3049. (hp2.typ=ait_instruction) and
  3050. (taicpu(hp2).is_jmp) and
  3051. (taicpu(hp2).condition=C_None) and
  3052. { real label and jump, no further references to the
  3053. label are allowed }
  3054. (tasmlabel(symbol).getrefs=1) and
  3055. FindLabel(tasmlabel(symbol),hp1) then
  3056. begin
  3057. l:=0;
  3058. { skip hp1 to <several moves 2> }
  3059. if (hp1.typ = ait_align) then
  3060. GetNextInstruction(hp1, hp1);
  3061. GetNextInstruction(hp1, hpmov2);
  3062. hp1 := hpmov2;
  3063. while assigned(hp1) and
  3064. CanBeCMOV(hp1) do
  3065. begin
  3066. inc(l);
  3067. GetNextInstruction(hp1, hp1);
  3068. end;
  3069. { hp1 points to yyy (or an align right before it) }
  3070. hp3 := hp1;
  3071. if assigned(hp1) and
  3072. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  3073. begin
  3074. condition:=inverse_cond(taicpu(p).condition);
  3075. GetNextInstruction(p,hp1);
  3076. repeat
  3077. taicpu(hp1).opcode:=A_CMOVcc;
  3078. taicpu(hp1).condition:=condition;
  3079. UpdateUsedRegs(hp1);
  3080. GetNextInstruction(hp1,hp1);
  3081. until not(assigned(hp1)) or
  3082. not(CanBeCMOV(hp1));
  3083. condition:=inverse_cond(condition);
  3084. hp1 := hpmov2;
  3085. { hp1 is now at <several movs 2> }
  3086. while Assigned(hp1) and CanBeCMOV(hp1) do
  3087. begin
  3088. taicpu(hp1).opcode:=A_CMOVcc;
  3089. taicpu(hp1).condition:=condition;
  3090. UpdateUsedRegs(hp1);
  3091. GetNextInstruction(hp1,hp1);
  3092. end;
  3093. hp1 := p;
  3094. { Get first instruction after label }
  3095. GetNextInstruction(hp3, p);
  3096. if assigned(p) and (hp3.typ = ait_align) then
  3097. GetNextInstruction(p, p);
  3098. { Don't dereference yet, as doing so will cause
  3099. GetNextInstruction to skip the label and
  3100. optional align marker. [Kit] }
  3101. GetNextInstruction(hp2, hp4);
  3102. { remove jCC }
  3103. asml.remove(hp1);
  3104. hp1.free;
  3105. { Remove label xxx (it will have a ref of zero due to the initial check }
  3106. if (hp4.typ = ait_align) then
  3107. begin
  3108. { Account for alignment as well }
  3109. GetNextInstruction(hp4, hp1);
  3110. asml.remove(hp1);
  3111. hp1.free;
  3112. end;
  3113. asml.remove(hp4);
  3114. hp4.free;
  3115. { Now we can safely decrement it }
  3116. tasmlabel(symbol).decrefs;
  3117. { remove jmp }
  3118. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  3119. asml.remove(hp2);
  3120. hp2.free;
  3121. { Remove label yyy (and the optional alignment) if its reference will fall to zero }
  3122. if tasmlabel(symbol).getrefs = 1 then
  3123. begin
  3124. if (hp3.typ = ait_align) then
  3125. begin
  3126. { Account for alignment as well }
  3127. GetNextInstruction(hp3, hp1);
  3128. asml.remove(hp1);
  3129. hp1.free;
  3130. end;
  3131. asml.remove(hp3);
  3132. hp3.free;
  3133. { As before, now we can safely decrement it }
  3134. tasmlabel(symbol).decrefs;
  3135. end;
  3136. if Assigned(p) then
  3137. begin
  3138. UpdateUsedRegs(p);
  3139. result:=true;
  3140. end;
  3141. exit;
  3142. end;
  3143. end;
  3144. end;
  3145. end;
  3146. end;
  3147. {$endif i8086}
  3148. end;
  3149. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  3150. var
  3151. hp1,hp2: tai;
  3152. begin
  3153. result:=false;
  3154. if (taicpu(p).oper[1]^.typ = top_reg) and
  3155. GetNextInstruction(p,hp1) and
  3156. (hp1.typ = ait_instruction) and
  3157. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  3158. GetNextInstruction(hp1,hp2) and
  3159. MatchInstruction(hp2,A_MOV,[]) and
  3160. (taicpu(hp2).oper[0]^.typ = top_reg) and
  3161. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  3162. {$ifdef i386}
  3163. { not all registers have byte size sub registers on i386 }
  3164. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  3165. {$endif i386}
  3166. (((taicpu(hp1).ops=2) and
  3167. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  3168. ((taicpu(hp1).ops=1) and
  3169. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  3170. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  3171. begin
  3172. { change movsX/movzX reg/ref, reg2
  3173. add/sub/or/... reg3/$const, reg2
  3174. mov reg2 reg/ref
  3175. to add/sub/or/... reg3/$const, reg/ref }
  3176. { by example:
  3177. movswl %si,%eax movswl %si,%eax p
  3178. decl %eax addl %edx,%eax hp1
  3179. movw %ax,%si movw %ax,%si hp2
  3180. ->
  3181. movswl %si,%eax movswl %si,%eax p
  3182. decw %eax addw %edx,%eax hp1
  3183. movw %ax,%si movw %ax,%si hp2
  3184. }
  3185. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3186. {
  3187. ->
  3188. movswl %si,%eax movswl %si,%eax p
  3189. decw %si addw %dx,%si hp1
  3190. movw %ax,%si movw %ax,%si hp2
  3191. }
  3192. case taicpu(hp1).ops of
  3193. 1:
  3194. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  3195. 2:
  3196. begin
  3197. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  3198. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  3199. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3200. end;
  3201. else
  3202. internalerror(2008042701);
  3203. end;
  3204. {
  3205. ->
  3206. decw %si addw %dx,%si p
  3207. }
  3208. DebugMsg(SPeepholeOptimization + 'var3',p);
  3209. asml.remove(p);
  3210. asml.remove(hp2);
  3211. p.free;
  3212. hp2.free;
  3213. p:=hp1;
  3214. end
  3215. else if taicpu(p).opcode=A_MOVZX then
  3216. begin
  3217. { removes superfluous And's after movzx's }
  3218. if (taicpu(p).oper[1]^.typ = top_reg) and
  3219. GetNextInstruction(p, hp1) and
  3220. (tai(hp1).typ = ait_instruction) and
  3221. (taicpu(hp1).opcode = A_AND) and
  3222. (taicpu(hp1).oper[0]^.typ = top_const) and
  3223. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3224. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3225. begin
  3226. case taicpu(p).opsize Of
  3227. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  3228. if (taicpu(hp1).oper[0]^.val = $ff) then
  3229. begin
  3230. DebugMsg(SPeepholeOptimization + 'var4',p);
  3231. asml.remove(hp1);
  3232. hp1.free;
  3233. end;
  3234. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  3235. if (taicpu(hp1).oper[0]^.val = $ffff) then
  3236. begin
  3237. DebugMsg(SPeepholeOptimization + 'var5',p);
  3238. asml.remove(hp1);
  3239. hp1.free;
  3240. end;
  3241. {$ifdef x86_64}
  3242. S_LQ:
  3243. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  3244. begin
  3245. if (cs_asm_source in current_settings.globalswitches) then
  3246. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  3247. asml.remove(hp1);
  3248. hp1.Free;
  3249. end;
  3250. {$endif x86_64}
  3251. else
  3252. ;
  3253. end;
  3254. end;
  3255. { changes some movzx constructs to faster synonims (all examples
  3256. are given with eax/ax, but are also valid for other registers)}
  3257. if (taicpu(p).oper[1]^.typ = top_reg) then
  3258. if (taicpu(p).oper[0]^.typ = top_reg) then
  3259. case taicpu(p).opsize of
  3260. S_BW:
  3261. begin
  3262. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3263. not(cs_opt_size in current_settings.optimizerswitches) then
  3264. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  3265. begin
  3266. taicpu(p).opcode := A_AND;
  3267. taicpu(p).changeopsize(S_W);
  3268. taicpu(p).loadConst(0,$ff);
  3269. DebugMsg(SPeepholeOptimization + 'var7',p);
  3270. end
  3271. else if GetNextInstruction(p, hp1) and
  3272. (tai(hp1).typ = ait_instruction) and
  3273. (taicpu(hp1).opcode = A_AND) and
  3274. (taicpu(hp1).oper[0]^.typ = top_const) and
  3275. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3276. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3277. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  3278. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  3279. begin
  3280. DebugMsg(SPeepholeOptimization + 'var8',p);
  3281. taicpu(p).opcode := A_MOV;
  3282. taicpu(p).changeopsize(S_W);
  3283. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  3284. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3285. end;
  3286. end;
  3287. S_BL:
  3288. begin
  3289. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3290. not(cs_opt_size in current_settings.optimizerswitches) then
  3291. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  3292. begin
  3293. taicpu(p).opcode := A_AND;
  3294. taicpu(p).changeopsize(S_L);
  3295. taicpu(p).loadConst(0,$ff)
  3296. end
  3297. else if GetNextInstruction(p, hp1) and
  3298. (tai(hp1).typ = ait_instruction) and
  3299. (taicpu(hp1).opcode = A_AND) and
  3300. (taicpu(hp1).oper[0]^.typ = top_const) and
  3301. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3302. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3303. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  3304. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  3305. begin
  3306. DebugMsg(SPeepholeOptimization + 'var10',p);
  3307. taicpu(p).opcode := A_MOV;
  3308. taicpu(p).changeopsize(S_L);
  3309. { do not use R_SUBWHOLE
  3310. as movl %rdx,%eax
  3311. is invalid in assembler PM }
  3312. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3313. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3314. end
  3315. end;
  3316. {$ifndef i8086}
  3317. S_WL:
  3318. begin
  3319. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3320. not(cs_opt_size in current_settings.optimizerswitches) then
  3321. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  3322. begin
  3323. DebugMsg(SPeepholeOptimization + 'var11',p);
  3324. taicpu(p).opcode := A_AND;
  3325. taicpu(p).changeopsize(S_L);
  3326. taicpu(p).loadConst(0,$ffff);
  3327. end
  3328. else if GetNextInstruction(p, hp1) and
  3329. (tai(hp1).typ = ait_instruction) and
  3330. (taicpu(hp1).opcode = A_AND) and
  3331. (taicpu(hp1).oper[0]^.typ = top_const) and
  3332. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3333. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3334. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  3335. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  3336. begin
  3337. DebugMsg(SPeepholeOptimization + 'var12',p);
  3338. taicpu(p).opcode := A_MOV;
  3339. taicpu(p).changeopsize(S_L);
  3340. { do not use R_SUBWHOLE
  3341. as movl %rdx,%eax
  3342. is invalid in assembler PM }
  3343. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3344. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3345. end;
  3346. end;
  3347. {$endif i8086}
  3348. else
  3349. ;
  3350. end
  3351. else if (taicpu(p).oper[0]^.typ = top_ref) then
  3352. begin
  3353. if GetNextInstruction(p, hp1) and
  3354. (tai(hp1).typ = ait_instruction) and
  3355. (taicpu(hp1).opcode = A_AND) and
  3356. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3357. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3358. begin
  3359. //taicpu(p).opcode := A_MOV;
  3360. case taicpu(p).opsize Of
  3361. S_BL:
  3362. begin
  3363. DebugMsg(SPeepholeOptimization + 'var13',p);
  3364. taicpu(hp1).changeopsize(S_L);
  3365. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3366. end;
  3367. S_WL:
  3368. begin
  3369. DebugMsg(SPeepholeOptimization + 'var14',p);
  3370. taicpu(hp1).changeopsize(S_L);
  3371. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3372. end;
  3373. S_BW:
  3374. begin
  3375. DebugMsg(SPeepholeOptimization + 'var15',p);
  3376. taicpu(hp1).changeopsize(S_W);
  3377. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3378. end;
  3379. {$ifdef x86_64}
  3380. S_BQ:
  3381. begin
  3382. DebugMsg(SPeepholeOptimization + 'var16',p);
  3383. taicpu(hp1).changeopsize(S_Q);
  3384. taicpu(hp1).loadConst(
  3385. 0, taicpu(hp1).oper[0]^.val and $ff);
  3386. end;
  3387. S_WQ:
  3388. begin
  3389. DebugMsg(SPeepholeOptimization + 'var17',p);
  3390. taicpu(hp1).changeopsize(S_Q);
  3391. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  3392. end;
  3393. S_LQ:
  3394. begin
  3395. DebugMsg(SPeepholeOptimization + 'var18',p);
  3396. taicpu(hp1).changeopsize(S_Q);
  3397. taicpu(hp1).loadConst(
  3398. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  3399. end;
  3400. {$endif x86_64}
  3401. else
  3402. Internalerror(2017050704)
  3403. end;
  3404. end;
  3405. end;
  3406. end;
  3407. end;
  3408. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  3409. var
  3410. hp1 : tai;
  3411. MaskLength : Cardinal;
  3412. begin
  3413. Result:=false;
  3414. if GetNextInstruction(p, hp1) then
  3415. begin
  3416. if MatchOpType(taicpu(p),top_const,top_reg) and
  3417. MatchInstruction(hp1,A_AND,[]) and
  3418. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3419. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3420. { the second register must contain the first one, so compare their subreg types }
  3421. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  3422. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  3423. { change
  3424. and const1, reg
  3425. and const2, reg
  3426. to
  3427. and (const1 and const2), reg
  3428. }
  3429. begin
  3430. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  3431. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  3432. asml.remove(p);
  3433. p.Free;
  3434. p:=hp1;
  3435. Result:=true;
  3436. exit;
  3437. end
  3438. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3439. MatchInstruction(hp1,A_MOVZX,[]) and
  3440. (taicpu(hp1).oper[0]^.typ = top_reg) and
  3441. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  3442. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3443. (((taicpu(p).opsize=S_W) and
  3444. (taicpu(hp1).opsize=S_BW)) or
  3445. ((taicpu(p).opsize=S_L) and
  3446. (taicpu(hp1).opsize in [S_WL,S_BL]))
  3447. {$ifdef x86_64}
  3448. or
  3449. ((taicpu(p).opsize=S_Q) and
  3450. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  3451. {$endif x86_64}
  3452. ) then
  3453. begin
  3454. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  3455. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  3456. ) or
  3457. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  3458. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  3459. then
  3460. begin
  3461. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  3462. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  3463. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  3464. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  3465. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  3466. }
  3467. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  3468. asml.remove(hp1);
  3469. hp1.free;
  3470. Exit;
  3471. end;
  3472. end
  3473. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3474. MatchInstruction(hp1,A_SHL,[]) and
  3475. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3476. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  3477. begin
  3478. {$ifopt R+}
  3479. {$define RANGE_WAS_ON}
  3480. {$R-}
  3481. {$endif}
  3482. { get length of potential and mask }
  3483. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  3484. { really a mask? }
  3485. {$ifdef RANGE_WAS_ON}
  3486. {$R+}
  3487. {$endif}
  3488. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  3489. { unmasked part shifted out? }
  3490. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  3491. begin
  3492. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  3493. { take care of the register (de)allocs following p }
  3494. UpdateUsedRegs(tai(p.next));
  3495. asml.remove(p);
  3496. p.free;
  3497. p:=hp1;
  3498. Result:=true;
  3499. exit;
  3500. end;
  3501. end
  3502. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3503. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  3504. (taicpu(hp1).oper[0]^.typ = top_reg) and
  3505. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  3506. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3507. (((taicpu(p).opsize=S_W) and
  3508. (taicpu(hp1).opsize=S_BW)) or
  3509. ((taicpu(p).opsize=S_L) and
  3510. (taicpu(hp1).opsize in [S_WL,S_BL]))
  3511. {$ifdef x86_64}
  3512. or
  3513. ((taicpu(p).opsize=S_Q) and
  3514. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  3515. {$endif x86_64}
  3516. ) then
  3517. begin
  3518. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  3519. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  3520. ) or
  3521. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  3522. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  3523. {$ifdef x86_64}
  3524. or
  3525. (((taicpu(hp1).opsize)=S_LQ) and
  3526. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  3527. )
  3528. {$endif x86_64}
  3529. then
  3530. begin
  3531. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  3532. asml.remove(hp1);
  3533. hp1.free;
  3534. Exit;
  3535. end;
  3536. end
  3537. else if (taicpu(p).oper[1]^.typ = top_reg) and
  3538. (hp1.typ = ait_instruction) and
  3539. (taicpu(hp1).is_jmp) and
  3540. (taicpu(hp1).opcode<>A_JMP) and
  3541. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  3542. begin
  3543. { change
  3544. and x, reg
  3545. jxx
  3546. to
  3547. test x, reg
  3548. jxx
  3549. if reg is deallocated before the
  3550. jump, but only if it's a conditional jump (PFV)
  3551. }
  3552. taicpu(p).opcode := A_TEST;
  3553. Exit;
  3554. end;
  3555. end;
  3556. { Lone AND tests }
  3557. if MatchOpType(taicpu(p),top_const,top_reg) then
  3558. begin
  3559. {
  3560. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  3561. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  3562. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  3563. }
  3564. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  3565. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  3566. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  3567. begin
  3568. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg)
  3569. end;
  3570. end;
  3571. end;
  3572. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  3573. begin
  3574. Result:=false;
  3575. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  3576. MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  3577. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  3578. begin
  3579. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  3580. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  3581. taicpu(p).opcode:=A_ADD;
  3582. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  3583. result:=true;
  3584. end
  3585. else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  3586. MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  3587. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  3588. begin
  3589. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  3590. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  3591. taicpu(p).opcode:=A_ADD;
  3592. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  3593. result:=true;
  3594. end;
  3595. end;
  3596. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  3597. var
  3598. Value, RegName: string;
  3599. begin
  3600. Result:=false;
  3601. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  3602. begin
  3603. case taicpu(p).oper[0]^.val of
  3604. 0:
  3605. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  3606. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  3607. begin
  3608. { change "mov $0,%reg" into "xor %reg,%reg" }
  3609. taicpu(p).opcode := A_XOR;
  3610. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  3611. Result := True;
  3612. end;
  3613. $1..$FFFFFFFF:
  3614. begin
  3615. { Code size reduction by J. Gareth "Kit" Moreton }
  3616. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  3617. case taicpu(p).opsize of
  3618. S_Q:
  3619. begin
  3620. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  3621. Value := debug_tostr(taicpu(p).oper[0]^.val);
  3622. { The actual optimization }
  3623. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3624. taicpu(p).changeopsize(S_L);
  3625. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  3626. Result := True;
  3627. end;
  3628. else
  3629. ;
  3630. end;
  3631. end;
  3632. end;
  3633. end;
  3634. end;
  3635. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  3636. begin
  3637. Result:=false;
  3638. { change "cmp $0, %reg" to "test %reg, %reg" }
  3639. if MatchOpType(taicpu(p),top_const,top_reg) and
  3640. (taicpu(p).oper[0]^.val = 0) then
  3641. begin
  3642. taicpu(p).opcode := A_TEST;
  3643. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  3644. Result:=true;
  3645. end;
  3646. end;
  3647. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  3648. var
  3649. IsTestConstX : Boolean;
  3650. hp1,hp2 : tai;
  3651. begin
  3652. Result:=false;
  3653. { removes the line marked with (x) from the sequence
  3654. and/or/xor/add/sub/... $x, %y
  3655. test/or %y, %y | test $-1, %y (x)
  3656. j(n)z _Label
  3657. as the first instruction already adjusts the ZF
  3658. %y operand may also be a reference }
  3659. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  3660. MatchOperand(taicpu(p).oper[0]^,-1);
  3661. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  3662. GetLastInstruction(p, hp1) and
  3663. (tai(hp1).typ = ait_instruction) and
  3664. GetNextInstruction(p,hp2) and
  3665. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  3666. case taicpu(hp1).opcode Of
  3667. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  3668. begin
  3669. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  3670. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  3671. { and in case of carry for A(E)/B(E)/C/NC }
  3672. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  3673. ((taicpu(hp1).opcode <> A_ADD) and
  3674. (taicpu(hp1).opcode <> A_SUB))) then
  3675. begin
  3676. hp1 := tai(p.next);
  3677. asml.remove(p);
  3678. p.free;
  3679. p := tai(hp1);
  3680. Result:=true;
  3681. end;
  3682. end;
  3683. A_SHL, A_SAL, A_SHR, A_SAR:
  3684. begin
  3685. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  3686. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  3687. { therefore, it's only safe to do this optimization for }
  3688. { shifts by a (nonzero) constant }
  3689. (taicpu(hp1).oper[0]^.typ = top_const) and
  3690. (taicpu(hp1).oper[0]^.val <> 0) and
  3691. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  3692. { and in case of carry for A(E)/B(E)/C/NC }
  3693. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  3694. begin
  3695. hp1 := tai(p.next);
  3696. asml.remove(p);
  3697. p.free;
  3698. p := tai(hp1);
  3699. Result:=true;
  3700. end;
  3701. end;
  3702. A_DEC, A_INC, A_NEG:
  3703. begin
  3704. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  3705. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  3706. { and in case of carry for A(E)/B(E)/C/NC }
  3707. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  3708. begin
  3709. case taicpu(hp1).opcode of
  3710. A_DEC, A_INC:
  3711. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  3712. begin
  3713. case taicpu(hp1).opcode Of
  3714. A_DEC: taicpu(hp1).opcode := A_SUB;
  3715. A_INC: taicpu(hp1).opcode := A_ADD;
  3716. else
  3717. ;
  3718. end;
  3719. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  3720. taicpu(hp1).loadConst(0,1);
  3721. taicpu(hp1).ops:=2;
  3722. end;
  3723. else
  3724. ;
  3725. end;
  3726. hp1 := tai(p.next);
  3727. asml.remove(p);
  3728. p.free;
  3729. p := tai(hp1);
  3730. Result:=true;
  3731. end;
  3732. end
  3733. else
  3734. { change "test $-1,%reg" into "test %reg,%reg" }
  3735. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  3736. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  3737. end { case }
  3738. { change "test $-1,%reg" into "test %reg,%reg" }
  3739. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  3740. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  3741. end;
  3742. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  3743. var
  3744. hp1 : tai;
  3745. {$ifndef x86_64}
  3746. hp2 : taicpu;
  3747. {$endif x86_64}
  3748. begin
  3749. Result:=false;
  3750. {$ifndef x86_64}
  3751. { don't do this on modern CPUs, this really hurts them due to
  3752. broken call/ret pairing }
  3753. if (current_settings.optimizecputype < cpu_Pentium2) and
  3754. not(cs_create_pic in current_settings.moduleswitches) and
  3755. GetNextInstruction(p, hp1) and
  3756. MatchInstruction(hp1,A_JMP,[S_NO]) and
  3757. MatchOpType(taicpu(hp1),top_ref) and
  3758. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  3759. begin
  3760. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  3761. InsertLLItem(p.previous, p, hp2);
  3762. taicpu(p).opcode := A_JMP;
  3763. taicpu(p).is_jmp := true;
  3764. asml.remove(hp1);
  3765. hp1.free;
  3766. Result:=true;
  3767. end
  3768. else
  3769. {$endif x86_64}
  3770. { replace
  3771. call procname
  3772. ret
  3773. by
  3774. jmp procname
  3775. this should never hurt except when pic is used, not sure
  3776. how to handle it then
  3777. but do it only on level 4 because it destroys stack back traces
  3778. }
  3779. if (cs_opt_level4 in current_settings.optimizerswitches) and
  3780. not(cs_create_pic in current_settings.moduleswitches) and
  3781. GetNextInstruction(p, hp1) and
  3782. MatchInstruction(hp1,A_RET,[S_NO]) and
  3783. (taicpu(hp1).ops=0) then
  3784. begin
  3785. taicpu(p).opcode := A_JMP;
  3786. taicpu(p).is_jmp := true;
  3787. asml.remove(hp1);
  3788. hp1.free;
  3789. Result:=true;
  3790. end;
  3791. end;
  3792. {$ifdef x86_64}
  3793. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  3794. var
  3795. PreMessage: string;
  3796. begin
  3797. Result := False;
  3798. { Code size reduction by J. Gareth "Kit" Moreton }
  3799. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  3800. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  3801. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  3802. then
  3803. begin
  3804. { Has 64-bit register name and opcode suffix }
  3805. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  3806. { The actual optimization }
  3807. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3808. if taicpu(p).opsize = S_BQ then
  3809. taicpu(p).changeopsize(S_BL)
  3810. else
  3811. taicpu(p).changeopsize(S_WL);
  3812. DebugMsg(SPeepholeOptimization + PreMessage +
  3813. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  3814. end;
  3815. end;
  3816. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  3817. var
  3818. PreMessage, RegName: string;
  3819. begin
  3820. { Code size reduction by J. Gareth "Kit" Moreton }
  3821. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  3822. as this removes the REX prefix }
  3823. Result := False;
  3824. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  3825. Exit;
  3826. if taicpu(p).oper[0]^.typ <> top_reg then
  3827. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  3828. InternalError(2018011500);
  3829. case taicpu(p).opsize of
  3830. S_Q:
  3831. begin
  3832. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  3833. begin
  3834. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  3835. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  3836. { The actual optimization }
  3837. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3838. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3839. taicpu(p).changeopsize(S_L);
  3840. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  3841. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  3842. end;
  3843. end;
  3844. else
  3845. ;
  3846. end;
  3847. end;
  3848. {$endif}
  3849. procedure TX86AsmOptimizer.OptReferences;
  3850. var
  3851. p: tai;
  3852. i: Integer;
  3853. begin
  3854. p := BlockStart;
  3855. while (p <> BlockEnd) Do
  3856. begin
  3857. if p.typ=ait_instruction then
  3858. begin
  3859. for i:=0 to taicpu(p).ops-1 do
  3860. if taicpu(p).oper[i]^.typ=top_ref then
  3861. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  3862. end;
  3863. p:=tai(p.next);
  3864. end;
  3865. end;
  3866. end.