aoptx86.pas 205 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  33. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  34. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  35. protected
  36. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  37. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  38. { checks whether reading the value in reg1 depends on the value of reg2. This
  39. is very similar to SuperRegisterEquals, except it takes into account that
  40. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  41. depend on the value in AH). }
  42. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  43. procedure DebugMsg(const s : string; p : tai);inline;
  44. class function IsExitCode(p : tai) : boolean; static;
  45. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean; static;
  46. procedure RemoveLastDeallocForFuncRes(p : tai);
  47. function DoSubAddOpt(var p : tai) : Boolean;
  48. function PrePeepholeOptSxx(var p : tai) : boolean;
  49. function PrePeepholeOptIMUL(var p : tai) : boolean;
  50. function OptPass1AND(var p : tai) : boolean;
  51. function OptPass1_V_MOVAP(var p : tai) : boolean;
  52. function OptPass1VOP(var p : tai) : boolean;
  53. function OptPass1MOV(var p : tai) : boolean;
  54. function OptPass1Movx(var p : tai) : boolean;
  55. function OptPass1MOVXX(var p : tai) : boolean;
  56. function OptPass1OP(var p : tai) : boolean;
  57. function OptPass1LEA(var p : tai) : boolean;
  58. function OptPass1Sub(var p : tai) : boolean;
  59. function OptPass1SHLSAL(var p : tai) : boolean;
  60. function OptPass1SETcc(var p : tai) : boolean;
  61. function OptPass1FSTP(var p : tai) : boolean;
  62. function OptPass1FLD(var p : tai) : boolean;
  63. function OptPass1Cmp(var p : tai) : boolean;
  64. function OptPass2MOV(var p : tai) : boolean;
  65. function OptPass2Imul(var p : tai) : boolean;
  66. function OptPass2Jmp(var p : tai) : boolean;
  67. function OptPass2Jcc(var p : tai) : boolean;
  68. function OptPass2Lea(var p: tai): Boolean;
  69. function PostPeepholeOptMov(var p : tai) : Boolean;
  70. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  71. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  72. function PostPeepholeOptXor(var p : tai) : Boolean;
  73. {$endif}
  74. function PostPeepholeOptCmp(var p : tai) : Boolean;
  75. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  76. function PostPeepholeOptCall(var p : tai) : Boolean;
  77. function PostPeepholeOptLea(var p : tai) : Boolean;
  78. procedure OptReferences;
  79. procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
  80. end;
  81. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  82. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  83. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  84. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  85. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  86. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  87. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  88. function RefsEqual(const r1, r2: treference): boolean;
  89. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  90. { returns true, if ref is a reference using only the registers passed as base and index
  91. and having an offset }
  92. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  93. implementation
  94. uses
  95. cutils,verbose,
  96. globals,
  97. cpuinfo,
  98. procinfo,
  99. aasmbase,
  100. aoptutils,
  101. symconst,symsym,
  102. cgx86,
  103. itcpugas;
  104. {$ifdef DEBUG_AOPTCPU}
  105. const
  106. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  107. {$else DEBUG_AOPTCPU}
  108. { Empty strings help the optimizer to remove string concatenations that won't
  109. ever appear to the user on release builds. [Kit] }
  110. const
  111. SPeepholeOptimization = '';
  112. {$endif DEBUG_AOPTCPU}
  113. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  114. begin
  115. result :=
  116. (instr.typ = ait_instruction) and
  117. (taicpu(instr).opcode = op) and
  118. ((opsize = []) or (taicpu(instr).opsize in opsize));
  119. end;
  120. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  121. begin
  122. result :=
  123. (instr.typ = ait_instruction) and
  124. ((taicpu(instr).opcode = op1) or
  125. (taicpu(instr).opcode = op2)
  126. ) and
  127. ((opsize = []) or (taicpu(instr).opsize in opsize));
  128. end;
  129. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  130. begin
  131. result :=
  132. (instr.typ = ait_instruction) and
  133. ((taicpu(instr).opcode = op1) or
  134. (taicpu(instr).opcode = op2) or
  135. (taicpu(instr).opcode = op3)
  136. ) and
  137. ((opsize = []) or (taicpu(instr).opsize in opsize));
  138. end;
  139. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  140. const opsize : topsizes) : boolean;
  141. var
  142. op : TAsmOp;
  143. begin
  144. result:=false;
  145. for op in ops do
  146. begin
  147. if (instr.typ = ait_instruction) and
  148. (taicpu(instr).opcode = op) and
  149. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  150. begin
  151. result:=true;
  152. exit;
  153. end;
  154. end;
  155. end;
  156. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  157. begin
  158. result := (oper.typ = top_reg) and (oper.reg = reg);
  159. end;
  160. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  161. begin
  162. result := (oper.typ = top_const) and (oper.val = a);
  163. end;
  164. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  165. begin
  166. result := oper1.typ = oper2.typ;
  167. if result then
  168. case oper1.typ of
  169. top_const:
  170. Result:=oper1.val = oper2.val;
  171. top_reg:
  172. Result:=oper1.reg = oper2.reg;
  173. top_ref:
  174. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  175. else
  176. internalerror(2013102801);
  177. end
  178. end;
  179. function RefsEqual(const r1, r2: treference): boolean;
  180. begin
  181. RefsEqual :=
  182. (r1.offset = r2.offset) and
  183. (r1.segment = r2.segment) and (r1.base = r2.base) and
  184. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  185. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  186. (r1.relsymbol = r2.relsymbol) and
  187. (r1.volatility=[]) and
  188. (r2.volatility=[]);
  189. end;
  190. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  191. begin
  192. Result:=(ref.offset=0) and
  193. (ref.scalefactor in [0,1]) and
  194. (ref.segment=NR_NO) and
  195. (ref.symbol=nil) and
  196. (ref.relsymbol=nil) and
  197. ((base=NR_INVALID) or
  198. (ref.base=base)) and
  199. ((index=NR_INVALID) or
  200. (ref.index=index)) and
  201. (ref.volatility=[]);
  202. end;
  203. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  204. begin
  205. Result:=(ref.scalefactor in [0,1]) and
  206. (ref.segment=NR_NO) and
  207. (ref.symbol=nil) and
  208. (ref.relsymbol=nil) and
  209. ((base=NR_INVALID) or
  210. (ref.base=base)) and
  211. ((index=NR_INVALID) or
  212. (ref.index=index)) and
  213. (ref.volatility=[]);
  214. end;
  215. function InstrReadsFlags(p: tai): boolean;
  216. begin
  217. InstrReadsFlags := true;
  218. case p.typ of
  219. ait_instruction:
  220. if InsProp[taicpu(p).opcode].Ch*
  221. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  222. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  223. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  224. exit;
  225. ait_label:
  226. exit;
  227. else
  228. ;
  229. end;
  230. InstrReadsFlags := false;
  231. end;
  232. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  233. begin
  234. Next:=Current;
  235. repeat
  236. Result:=GetNextInstruction(Next,Next);
  237. until not (Result) or
  238. not(cs_opt_level3 in current_settings.optimizerswitches) or
  239. (Next.typ<>ait_instruction) or
  240. RegInInstruction(reg,Next) or
  241. is_calljmp(taicpu(Next).opcode);
  242. end;
  243. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  244. begin
  245. Result:=RegReadByInstruction(reg,hp);
  246. end;
  247. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  248. var
  249. p: taicpu;
  250. opcount: longint;
  251. begin
  252. RegReadByInstruction := false;
  253. if hp.typ <> ait_instruction then
  254. exit;
  255. p := taicpu(hp);
  256. case p.opcode of
  257. A_CALL:
  258. regreadbyinstruction := true;
  259. A_IMUL:
  260. case p.ops of
  261. 1:
  262. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  263. (
  264. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  265. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  266. );
  267. 2,3:
  268. regReadByInstruction :=
  269. reginop(reg,p.oper[0]^) or
  270. reginop(reg,p.oper[1]^);
  271. else
  272. InternalError(2019112801);
  273. end;
  274. A_MUL:
  275. begin
  276. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  277. (
  278. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  279. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  280. );
  281. end;
  282. A_IDIV,A_DIV:
  283. begin
  284. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  285. (
  286. (getregtype(reg)=R_INTREGISTER) and
  287. (
  288. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  289. )
  290. );
  291. end;
  292. else
  293. begin
  294. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  295. begin
  296. RegReadByInstruction := false;
  297. exit;
  298. end;
  299. for opcount := 0 to p.ops-1 do
  300. if (p.oper[opCount]^.typ = top_ref) and
  301. RegInRef(reg,p.oper[opcount]^.ref^) then
  302. begin
  303. RegReadByInstruction := true;
  304. exit
  305. end;
  306. { special handling for SSE MOVSD }
  307. if (p.opcode=A_MOVSD) and (p.ops>0) then
  308. begin
  309. if p.ops<>2 then
  310. internalerror(2017042702);
  311. regReadByInstruction := reginop(reg,p.oper[0]^) or
  312. (
  313. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  314. );
  315. exit;
  316. end;
  317. with insprop[p.opcode] do
  318. begin
  319. if getregtype(reg)=R_INTREGISTER then
  320. begin
  321. case getsupreg(reg) of
  322. RS_EAX:
  323. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  324. begin
  325. RegReadByInstruction := true;
  326. exit
  327. end;
  328. RS_ECX:
  329. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  330. begin
  331. RegReadByInstruction := true;
  332. exit
  333. end;
  334. RS_EDX:
  335. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  336. begin
  337. RegReadByInstruction := true;
  338. exit
  339. end;
  340. RS_EBX:
  341. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  342. begin
  343. RegReadByInstruction := true;
  344. exit
  345. end;
  346. RS_ESP:
  347. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  348. begin
  349. RegReadByInstruction := true;
  350. exit
  351. end;
  352. RS_EBP:
  353. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  354. begin
  355. RegReadByInstruction := true;
  356. exit
  357. end;
  358. RS_ESI:
  359. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  360. begin
  361. RegReadByInstruction := true;
  362. exit
  363. end;
  364. RS_EDI:
  365. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  366. begin
  367. RegReadByInstruction := true;
  368. exit
  369. end;
  370. end;
  371. end;
  372. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  373. begin
  374. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  375. begin
  376. case p.condition of
  377. C_A,C_NBE, { CF=0 and ZF=0 }
  378. C_BE,C_NA: { CF=1 or ZF=1 }
  379. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  380. C_AE,C_NB,C_NC, { CF=0 }
  381. C_B,C_NAE,C_C: { CF=1 }
  382. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  383. C_NE,C_NZ, { ZF=0 }
  384. C_E,C_Z: { ZF=1 }
  385. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  386. C_G,C_NLE, { ZF=0 and SF=OF }
  387. C_LE,C_NG: { ZF=1 or SF<>OF }
  388. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  389. C_GE,C_NL, { SF=OF }
  390. C_L,C_NGE: { SF<>OF }
  391. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  392. C_NO, { OF=0 }
  393. C_O: { OF=1 }
  394. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  395. C_NP,C_PO, { PF=0 }
  396. C_P,C_PE: { PF=1 }
  397. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  398. C_NS, { SF=0 }
  399. C_S: { SF=1 }
  400. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  401. else
  402. internalerror(2017042701);
  403. end;
  404. if RegReadByInstruction then
  405. exit;
  406. end;
  407. case getsubreg(reg) of
  408. R_SUBW,R_SUBD,R_SUBQ:
  409. RegReadByInstruction :=
  410. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  411. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  412. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  413. R_SUBFLAGCARRY:
  414. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  415. R_SUBFLAGPARITY:
  416. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  417. R_SUBFLAGAUXILIARY:
  418. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  419. R_SUBFLAGZERO:
  420. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  421. R_SUBFLAGSIGN:
  422. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  423. R_SUBFLAGOVERFLOW:
  424. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  425. R_SUBFLAGINTERRUPT:
  426. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  427. R_SUBFLAGDIRECTION:
  428. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  429. else
  430. internalerror(2017042601);
  431. end;
  432. exit;
  433. end;
  434. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  435. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  436. (p.oper[0]^.reg=p.oper[1]^.reg) then
  437. exit;
  438. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  439. begin
  440. RegReadByInstruction := true;
  441. exit
  442. end;
  443. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  444. begin
  445. RegReadByInstruction := true;
  446. exit
  447. end;
  448. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  449. begin
  450. RegReadByInstruction := true;
  451. exit
  452. end;
  453. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  454. begin
  455. RegReadByInstruction := true;
  456. exit
  457. end;
  458. end;
  459. end;
  460. end;
  461. end;
  462. function TX86AsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  463. begin
  464. result:=false;
  465. if p1.typ<>ait_instruction then
  466. exit;
  467. if (Ch_All in insprop[taicpu(p1).opcode].Ch) then
  468. exit(true);
  469. if (getregtype(reg)=R_INTREGISTER) and
  470. { change information for xmm movsd are not correct }
  471. ((taicpu(p1).opcode<>A_MOVSD) or (taicpu(p1).ops=0)) then
  472. begin
  473. case getsupreg(reg) of
  474. { RS_EAX = RS_RAX on x86-64 }
  475. RS_EAX:
  476. result:=([Ch_REAX,Ch_RRAX,Ch_WEAX,Ch_WRAX,Ch_RWEAX,Ch_RWRAX,Ch_MEAX,Ch_MRAX]*insprop[taicpu(p1).opcode].Ch)<>[];
  477. RS_ECX:
  478. result:=([Ch_RECX,Ch_RRCX,Ch_WECX,Ch_WRCX,Ch_RWECX,Ch_RWRCX,Ch_MECX,Ch_MRCX]*insprop[taicpu(p1).opcode].Ch)<>[];
  479. RS_EDX:
  480. result:=([Ch_REDX,Ch_RRDX,Ch_WEDX,Ch_WRDX,Ch_RWEDX,Ch_RWRDX,Ch_MEDX,Ch_MRDX]*insprop[taicpu(p1).opcode].Ch)<>[];
  481. RS_EBX:
  482. result:=([Ch_REBX,Ch_RRBX,Ch_WEBX,Ch_WRBX,Ch_RWEBX,Ch_RWRBX,Ch_MEBX,Ch_MRBX]*insprop[taicpu(p1).opcode].Ch)<>[];
  483. RS_ESP:
  484. result:=([Ch_RESP,Ch_RRSP,Ch_WESP,Ch_WRSP,Ch_RWESP,Ch_RWRSP,Ch_MESP,Ch_MRSP]*insprop[taicpu(p1).opcode].Ch)<>[];
  485. RS_EBP:
  486. result:=([Ch_REBP,Ch_RRBP,Ch_WEBP,Ch_WRBP,Ch_RWEBP,Ch_RWRBP,Ch_MEBP,Ch_MRBP]*insprop[taicpu(p1).opcode].Ch)<>[];
  487. RS_ESI:
  488. result:=([Ch_RESI,Ch_RRSI,Ch_WESI,Ch_WRSI,Ch_RWESI,Ch_RWRSI,Ch_MESI,Ch_MRSI,Ch_RMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  489. RS_EDI:
  490. result:=([Ch_REDI,Ch_RRDI,Ch_WEDI,Ch_WRDI,Ch_RWEDI,Ch_RWRDI,Ch_MEDI,Ch_MRDI,Ch_WMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  491. else
  492. ;
  493. end;
  494. if result then
  495. exit;
  496. end
  497. else if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  498. begin
  499. if ([Ch_RFlags,Ch_WFlags,Ch_RWFlags,Ch_RFLAGScc]*insprop[taicpu(p1).opcode].Ch)<>[] then
  500. exit(true);
  501. case getsubreg(reg) of
  502. R_SUBFLAGCARRY:
  503. Result:=([Ch_RCarryFlag,Ch_RWCarryFlag,Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  504. R_SUBFLAGPARITY:
  505. Result:=([Ch_RParityFlag,Ch_RWParityFlag,Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  506. R_SUBFLAGAUXILIARY:
  507. Result:=([Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  508. R_SUBFLAGZERO:
  509. Result:=([Ch_RZeroFlag,Ch_RWZeroFlag,Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  510. R_SUBFLAGSIGN:
  511. Result:=([Ch_RSignFlag,Ch_RWSignFlag,Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  512. R_SUBFLAGOVERFLOW:
  513. Result:=([Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  514. R_SUBFLAGINTERRUPT:
  515. Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  516. R_SUBFLAGDIRECTION:
  517. Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  518. else
  519. ;
  520. end;
  521. if result then
  522. exit;
  523. end
  524. else if (getregtype(reg)=R_FPUREGISTER) and (Ch_FPU in insprop[taicpu(p1).opcode].Ch) then
  525. exit(true);
  526. Result:=inherited RegInInstruction(Reg, p1);
  527. end;
  528. function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  529. begin
  530. Result := False;
  531. if p1.typ <> ait_instruction then
  532. exit;
  533. with insprop[taicpu(p1).opcode] do
  534. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  535. begin
  536. case getsubreg(reg) of
  537. R_SUBW,R_SUBD,R_SUBQ:
  538. Result :=
  539. [Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
  540. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  541. Ch_W0DirFlag,Ch_W1DirFlag,Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  542. R_SUBFLAGCARRY:
  543. Result:=[Ch_WCarryFlag,Ch_RWCarryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  544. R_SUBFLAGPARITY:
  545. Result:=[Ch_WParityFlag,Ch_RWParityFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  546. R_SUBFLAGAUXILIARY:
  547. Result:=[Ch_WAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  548. R_SUBFLAGZERO:
  549. Result:=[Ch_WZeroFlag,Ch_RWZeroFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  550. R_SUBFLAGSIGN:
  551. Result:=[Ch_WSignFlag,Ch_RWSignFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  552. R_SUBFLAGOVERFLOW:
  553. Result:=[Ch_WOverflowFlag,Ch_RWOverflowFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  554. R_SUBFLAGINTERRUPT:
  555. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  556. R_SUBFLAGDIRECTION:
  557. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  558. else
  559. internalerror(2017042602);
  560. end;
  561. exit;
  562. end;
  563. case taicpu(p1).opcode of
  564. A_CALL:
  565. { We could potentially set Result to False if the register in
  566. question is non-volatile for the subroutine's calling convention,
  567. but this would require detecting the calling convention in use and
  568. also assuming that the routine doesn't contain malformed assembly
  569. language, for example... so it could only be done under -O4 as it
  570. would be considered a side-effect. [Kit] }
  571. Result := True;
  572. A_MOVSD:
  573. { special handling for SSE MOVSD }
  574. if (taicpu(p1).ops>0) then
  575. begin
  576. if taicpu(p1).ops<>2 then
  577. internalerror(2017042703);
  578. Result := (taicpu(p1).oper[1]^.typ=top_reg) and reginop(reg,taicpu(p1).oper[1]^);
  579. end;
  580. else
  581. ;
  582. end;
  583. if Result then
  584. exit;
  585. with insprop[taicpu(p1).opcode] do
  586. begin
  587. if getregtype(reg)=R_INTREGISTER then
  588. begin
  589. case getsupreg(reg) of
  590. RS_EAX:
  591. if [Ch_WEAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  592. begin
  593. Result := True;
  594. exit
  595. end;
  596. RS_ECX:
  597. if [Ch_WECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  598. begin
  599. Result := True;
  600. exit
  601. end;
  602. RS_EDX:
  603. if [Ch_WEDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  604. begin
  605. Result := True;
  606. exit
  607. end;
  608. RS_EBX:
  609. if [Ch_WEBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  610. begin
  611. Result := True;
  612. exit
  613. end;
  614. RS_ESP:
  615. if [Ch_WESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  616. begin
  617. Result := True;
  618. exit
  619. end;
  620. RS_EBP:
  621. if [Ch_WEBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  622. begin
  623. Result := True;
  624. exit
  625. end;
  626. RS_ESI:
  627. if [Ch_WESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  628. begin
  629. Result := True;
  630. exit
  631. end;
  632. RS_EDI:
  633. if [Ch_WEDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  634. begin
  635. Result := True;
  636. exit
  637. end;
  638. end;
  639. end;
  640. if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
  641. begin
  642. Result := true;
  643. exit
  644. end;
  645. if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
  646. begin
  647. Result := true;
  648. exit
  649. end;
  650. if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
  651. begin
  652. Result := true;
  653. exit
  654. end;
  655. if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
  656. begin
  657. Result := true;
  658. exit
  659. end;
  660. end;
  661. end;
  662. {$ifdef DEBUG_AOPTCPU}
  663. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  664. begin
  665. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  666. end;
  667. function debug_tostr(i: tcgint): string; inline;
  668. begin
  669. Result := tostr(i);
  670. end;
  671. function debug_regname(r: TRegister): string; inline;
  672. begin
  673. Result := '%' + std_regname(r);
  674. end;
  675. { Debug output function - creates a string representation of an operator }
  676. function debug_operstr(oper: TOper): string;
  677. begin
  678. case oper.typ of
  679. top_const:
  680. Result := '$' + debug_tostr(oper.val);
  681. top_reg:
  682. Result := debug_regname(oper.reg);
  683. top_ref:
  684. begin
  685. if oper.ref^.offset <> 0 then
  686. Result := debug_tostr(oper.ref^.offset) + '('
  687. else
  688. Result := '(';
  689. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  690. begin
  691. Result := Result + debug_regname(oper.ref^.base);
  692. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  693. Result := Result + ',' + debug_regname(oper.ref^.index);
  694. end
  695. else
  696. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  697. Result := Result + debug_regname(oper.ref^.index);
  698. if (oper.ref^.scalefactor > 1) then
  699. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  700. else
  701. Result := Result + ')';
  702. end;
  703. else
  704. Result := '[UNKNOWN]';
  705. end;
  706. end;
  707. function debug_op2str(opcode: tasmop): string; inline;
  708. begin
  709. Result := std_op2str[opcode];
  710. end;
  711. function debug_opsize2str(opsize: topsize): string; inline;
  712. begin
  713. Result := gas_opsize2str[opsize];
  714. end;
  715. {$else DEBUG_AOPTCPU}
  716. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  717. begin
  718. end;
  719. function debug_tostr(i: tcgint): string; inline;
  720. begin
  721. Result := '';
  722. end;
  723. function debug_regname(r: TRegister): string; inline;
  724. begin
  725. Result := '';
  726. end;
  727. function debug_operstr(oper: TOper): string; inline;
  728. begin
  729. Result := '';
  730. end;
  731. function debug_op2str(opcode: tasmop): string; inline;
  732. begin
  733. Result := '';
  734. end;
  735. function debug_opsize2str(opsize: topsize): string; inline;
  736. begin
  737. Result := '';
  738. end;
  739. {$endif DEBUG_AOPTCPU}
  740. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  741. begin
  742. if not SuperRegistersEqual(reg1,reg2) then
  743. exit(false);
  744. if getregtype(reg1)<>R_INTREGISTER then
  745. exit(true); {because SuperRegisterEqual is true}
  746. case getsubreg(reg1) of
  747. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  748. higher, it preserves the high bits, so the new value depends on
  749. reg2's previous value. In other words, it is equivalent to doing:
  750. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  751. R_SUBL:
  752. exit(getsubreg(reg2)=R_SUBL);
  753. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  754. higher, it actually does a:
  755. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  756. R_SUBH:
  757. exit(getsubreg(reg2)=R_SUBH);
  758. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  759. bits of reg2:
  760. reg2 := (reg2 and $ffff0000) or word(reg1); }
  761. R_SUBW:
  762. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  763. { a write to R_SUBD always overwrites every other subregister,
  764. because it clears the high 32 bits of R_SUBQ on x86_64 }
  765. R_SUBD,
  766. R_SUBQ:
  767. exit(true);
  768. else
  769. internalerror(2017042801);
  770. end;
  771. end;
  772. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  773. begin
  774. if not SuperRegistersEqual(reg1,reg2) then
  775. exit(false);
  776. if getregtype(reg1)<>R_INTREGISTER then
  777. exit(true); {because SuperRegisterEqual is true}
  778. case getsubreg(reg1) of
  779. R_SUBL:
  780. exit(getsubreg(reg2)<>R_SUBH);
  781. R_SUBH:
  782. exit(getsubreg(reg2)<>R_SUBL);
  783. R_SUBW,
  784. R_SUBD,
  785. R_SUBQ:
  786. exit(true);
  787. else
  788. internalerror(2017042802);
  789. end;
  790. end;
  791. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  792. var
  793. hp1 : tai;
  794. l : TCGInt;
  795. begin
  796. result:=false;
  797. { changes the code sequence
  798. shr/sar const1, x
  799. shl const2, x
  800. to
  801. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  802. if GetNextInstruction(p, hp1) and
  803. MatchInstruction(hp1,A_SHL,[]) and
  804. (taicpu(p).oper[0]^.typ = top_const) and
  805. (taicpu(hp1).oper[0]^.typ = top_const) and
  806. (taicpu(hp1).opsize = taicpu(p).opsize) and
  807. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  808. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  809. begin
  810. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  811. not(cs_opt_size in current_settings.optimizerswitches) then
  812. begin
  813. { shr/sar const1, %reg
  814. shl const2, %reg
  815. with const1 > const2 }
  816. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  817. taicpu(hp1).opcode := A_AND;
  818. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  819. case taicpu(p).opsize Of
  820. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  821. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  822. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  823. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  824. else
  825. Internalerror(2017050703)
  826. end;
  827. end
  828. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  829. not(cs_opt_size in current_settings.optimizerswitches) then
  830. begin
  831. { shr/sar const1, %reg
  832. shl const2, %reg
  833. with const1 < const2 }
  834. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  835. taicpu(p).opcode := A_AND;
  836. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  837. case taicpu(p).opsize Of
  838. S_B: taicpu(p).loadConst(0,l Xor $ff);
  839. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  840. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  841. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  842. else
  843. Internalerror(2017050702)
  844. end;
  845. end
  846. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  847. begin
  848. { shr/sar const1, %reg
  849. shl const2, %reg
  850. with const1 = const2 }
  851. taicpu(p).opcode := A_AND;
  852. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  853. case taicpu(p).opsize Of
  854. S_B: taicpu(p).loadConst(0,l Xor $ff);
  855. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  856. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  857. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  858. else
  859. Internalerror(2017050701)
  860. end;
  861. asml.remove(hp1);
  862. hp1.free;
  863. end;
  864. end;
  865. end;
  866. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  867. var
  868. opsize : topsize;
  869. hp1 : tai;
  870. tmpref : treference;
  871. ShiftValue : Cardinal;
  872. BaseValue : TCGInt;
  873. begin
  874. result:=false;
  875. opsize:=taicpu(p).opsize;
  876. { changes certain "imul const, %reg"'s to lea sequences }
  877. if (MatchOpType(taicpu(p),top_const,top_reg) or
  878. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  879. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  880. if (taicpu(p).oper[0]^.val = 1) then
  881. if (taicpu(p).ops = 2) then
  882. { remove "imul $1, reg" }
  883. begin
  884. hp1 := tai(p.Next);
  885. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  886. RemoveCurrentP(p);
  887. result:=true;
  888. end
  889. else
  890. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  891. begin
  892. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  893. InsertLLItem(p.previous, p.next, hp1);
  894. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  895. p.free;
  896. p := hp1;
  897. end
  898. else if ((taicpu(p).ops <= 2) or
  899. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  900. not(cs_opt_size in current_settings.optimizerswitches) and
  901. (not(GetNextInstruction(p, hp1)) or
  902. not((tai(hp1).typ = ait_instruction) and
  903. ((taicpu(hp1).opcode=A_Jcc) and
  904. (taicpu(hp1).condition in [C_O,C_NO])))) then
  905. begin
  906. {
  907. imul X, reg1, reg2 to
  908. lea (reg1,reg1,Y), reg2
  909. shl ZZ,reg2
  910. imul XX, reg1 to
  911. lea (reg1,reg1,YY), reg1
  912. shl ZZ,reg2
  913. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  914. it does not exist as a separate optimization target in FPC though.
  915. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  916. at most two zeros
  917. }
  918. reference_reset(tmpref,1,[]);
  919. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  920. begin
  921. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  922. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  923. TmpRef.base := taicpu(p).oper[1]^.reg;
  924. TmpRef.index := taicpu(p).oper[1]^.reg;
  925. if not(BaseValue in [3,5,9]) then
  926. Internalerror(2018110101);
  927. TmpRef.ScaleFactor := BaseValue-1;
  928. if (taicpu(p).ops = 2) then
  929. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  930. else
  931. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  932. AsmL.InsertAfter(hp1,p);
  933. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  934. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  935. RemoveCurrentP(p);
  936. if ShiftValue>0 then
  937. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  938. end;
  939. end;
  940. end;
  941. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  942. var
  943. p: taicpu;
  944. begin
  945. if not assigned(hp) or
  946. (hp.typ <> ait_instruction) then
  947. begin
  948. Result := false;
  949. exit;
  950. end;
  951. p := taicpu(hp);
  952. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  953. with insprop[p.opcode] do
  954. begin
  955. case getsubreg(reg) of
  956. R_SUBW,R_SUBD,R_SUBQ:
  957. Result:=
  958. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  959. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  960. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  961. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  962. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  963. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  964. R_SUBFLAGCARRY:
  965. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  966. R_SUBFLAGPARITY:
  967. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  968. R_SUBFLAGAUXILIARY:
  969. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  970. R_SUBFLAGZERO:
  971. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  972. R_SUBFLAGSIGN:
  973. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  974. R_SUBFLAGOVERFLOW:
  975. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  976. R_SUBFLAGINTERRUPT:
  977. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  978. R_SUBFLAGDIRECTION:
  979. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  980. else
  981. begin
  982. writeln(getsubreg(reg));
  983. internalerror(2017050501);
  984. end;
  985. end;
  986. exit;
  987. end;
  988. Result :=
  989. (((p.opcode = A_MOV) or
  990. (p.opcode = A_MOVZX) or
  991. (p.opcode = A_MOVSX) or
  992. (p.opcode = A_LEA) or
  993. (p.opcode = A_VMOVSS) or
  994. (p.opcode = A_VMOVSD) or
  995. (p.opcode = A_VMOVAPD) or
  996. (p.opcode = A_VMOVAPS) or
  997. (p.opcode = A_VMOVQ) or
  998. (p.opcode = A_MOVSS) or
  999. (p.opcode = A_MOVSD) or
  1000. (p.opcode = A_MOVQ) or
  1001. (p.opcode = A_MOVAPD) or
  1002. (p.opcode = A_MOVAPS) or
  1003. {$ifndef x86_64}
  1004. (p.opcode = A_LDS) or
  1005. (p.opcode = A_LES) or
  1006. {$endif not x86_64}
  1007. (p.opcode = A_LFS) or
  1008. (p.opcode = A_LGS) or
  1009. (p.opcode = A_LSS)) and
  1010. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  1011. (p.oper[1]^.typ = top_reg) and
  1012. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  1013. ((p.oper[0]^.typ = top_const) or
  1014. ((p.oper[0]^.typ = top_reg) and
  1015. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  1016. ((p.oper[0]^.typ = top_ref) and
  1017. not RegInRef(reg,p.oper[0]^.ref^)))) or
  1018. ((p.opcode = A_POP) and
  1019. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  1020. ((p.opcode = A_IMUL) and
  1021. (p.ops=3) and
  1022. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  1023. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  1024. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  1025. ((((p.opcode = A_IMUL) or
  1026. (p.opcode = A_MUL)) and
  1027. (p.ops=1)) and
  1028. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  1029. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  1030. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  1031. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  1032. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  1033. {$ifdef x86_64}
  1034. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  1035. {$endif x86_64}
  1036. )) or
  1037. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  1038. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  1039. {$ifdef x86_64}
  1040. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  1041. {$endif x86_64}
  1042. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  1043. {$ifndef x86_64}
  1044. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1045. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1046. {$endif not x86_64}
  1047. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1048. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1049. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1050. {$ifndef x86_64}
  1051. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  1052. {$endif not x86_64}
  1053. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  1054. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  1055. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  1056. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  1057. {$ifdef x86_64}
  1058. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  1059. {$endif x86_64}
  1060. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  1061. (((p.opcode = A_FSTSW) or
  1062. (p.opcode = A_FNSTSW)) and
  1063. (p.oper[0]^.typ=top_reg) and
  1064. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  1065. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  1066. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  1067. (p.oper[0]^.reg=p.oper[1]^.reg) and
  1068. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  1069. end;
  1070. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  1071. var
  1072. hp2,hp3 : tai;
  1073. begin
  1074. { some x86-64 issue a NOP before the real exit code }
  1075. if MatchInstruction(p,A_NOP,[]) then
  1076. GetNextInstruction(p,p);
  1077. result:=assigned(p) and (p.typ=ait_instruction) and
  1078. ((taicpu(p).opcode = A_RET) or
  1079. ((taicpu(p).opcode=A_LEAVE) and
  1080. GetNextInstruction(p,hp2) and
  1081. MatchInstruction(hp2,A_RET,[S_NO])
  1082. ) or
  1083. (((taicpu(p).opcode=A_LEA) and
  1084. MatchOpType(taicpu(p),top_ref,top_reg) and
  1085. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  1086. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1087. ) and
  1088. GetNextInstruction(p,hp2) and
  1089. MatchInstruction(hp2,A_RET,[S_NO])
  1090. ) or
  1091. ((((taicpu(p).opcode=A_MOV) and
  1092. MatchOpType(taicpu(p),top_reg,top_reg) and
  1093. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  1094. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  1095. ((taicpu(p).opcode=A_LEA) and
  1096. MatchOpType(taicpu(p),top_ref,top_reg) and
  1097. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  1098. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1099. )
  1100. ) and
  1101. GetNextInstruction(p,hp2) and
  1102. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  1103. MatchOpType(taicpu(hp2),top_reg) and
  1104. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  1105. GetNextInstruction(hp2,hp3) and
  1106. MatchInstruction(hp3,A_RET,[S_NO])
  1107. )
  1108. );
  1109. end;
  1110. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1111. begin
  1112. isFoldableArithOp := False;
  1113. case hp1.opcode of
  1114. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1115. isFoldableArithOp :=
  1116. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1117. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1118. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1119. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1120. (taicpu(hp1).oper[1]^.reg = reg);
  1121. A_INC,A_DEC,A_NEG,A_NOT:
  1122. isFoldableArithOp :=
  1123. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1124. (taicpu(hp1).oper[0]^.reg = reg);
  1125. else
  1126. ;
  1127. end;
  1128. end;
  1129. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  1130. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  1131. var
  1132. hp2: tai;
  1133. begin
  1134. hp2 := p;
  1135. repeat
  1136. hp2 := tai(hp2.previous);
  1137. if assigned(hp2) and
  1138. (hp2.typ = ait_regalloc) and
  1139. (tai_regalloc(hp2).ratype=ra_dealloc) and
  1140. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  1141. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  1142. begin
  1143. asml.remove(hp2);
  1144. hp2.free;
  1145. break;
  1146. end;
  1147. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  1148. end;
  1149. begin
  1150. case current_procinfo.procdef.returndef.typ of
  1151. arraydef,recorddef,pointerdef,
  1152. stringdef,enumdef,procdef,objectdef,errordef,
  1153. filedef,setdef,procvardef,
  1154. classrefdef,forwarddef:
  1155. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1156. orddef:
  1157. if current_procinfo.procdef.returndef.size <> 0 then
  1158. begin
  1159. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1160. { for int64/qword }
  1161. if current_procinfo.procdef.returndef.size = 8 then
  1162. DoRemoveLastDeallocForFuncRes(RS_EDX);
  1163. end;
  1164. else
  1165. ;
  1166. end;
  1167. end;
  1168. function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
  1169. var
  1170. hp1,hp2 : tai;
  1171. begin
  1172. result:=false;
  1173. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1174. begin
  1175. { vmova* reg1,reg1
  1176. =>
  1177. <nop> }
  1178. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1179. begin
  1180. GetNextInstruction(p,hp1);
  1181. asml.Remove(p);
  1182. p.Free;
  1183. p:=hp1;
  1184. result:=true;
  1185. end
  1186. else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  1187. begin
  1188. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1189. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1190. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1191. begin
  1192. { vmova* reg1,reg2
  1193. vmova* reg2,reg3
  1194. dealloc reg2
  1195. =>
  1196. vmova* reg1,reg3 }
  1197. TransferUsedRegs(TmpUsedRegs);
  1198. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1199. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1200. begin
  1201. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1202. asml.Remove(hp1);
  1203. hp1.Free;
  1204. result:=true;
  1205. end
  1206. { special case:
  1207. vmova* reg1,reg2
  1208. vmova* reg2,reg1
  1209. =>
  1210. vmova* reg1,reg2 }
  1211. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  1212. begin
  1213. asml.Remove(hp1);
  1214. hp1.Free;
  1215. result:=true;
  1216. end
  1217. end
  1218. else if MatchInstruction(hp1,[A_VFMADDPD,
  1219. A_VFMADD132PD,
  1220. A_VFMADD132PS,
  1221. A_VFMADD132SD,
  1222. A_VFMADD132SS,
  1223. A_VFMADD213PD,
  1224. A_VFMADD213PS,
  1225. A_VFMADD213SD,
  1226. A_VFMADD213SS,
  1227. A_VFMADD231PD,
  1228. A_VFMADD231PS,
  1229. A_VFMADD231SD,
  1230. A_VFMADD231SS,
  1231. A_VFMADDSUB132PD,
  1232. A_VFMADDSUB132PS,
  1233. A_VFMADDSUB213PD,
  1234. A_VFMADDSUB213PS,
  1235. A_VFMADDSUB231PD,
  1236. A_VFMADDSUB231PS,
  1237. A_VFMSUB132PD,
  1238. A_VFMSUB132PS,
  1239. A_VFMSUB132SD,
  1240. A_VFMSUB132SS,
  1241. A_VFMSUB213PD,
  1242. A_VFMSUB213PS,
  1243. A_VFMSUB213SD,
  1244. A_VFMSUB213SS,
  1245. A_VFMSUB231PD,
  1246. A_VFMSUB231PS,
  1247. A_VFMSUB231SD,
  1248. A_VFMSUB231SS,
  1249. A_VFMSUBADD132PD,
  1250. A_VFMSUBADD132PS,
  1251. A_VFMSUBADD213PD,
  1252. A_VFMSUBADD213PS,
  1253. A_VFMSUBADD231PD,
  1254. A_VFMSUBADD231PS,
  1255. A_VFNMADD132PD,
  1256. A_VFNMADD132PS,
  1257. A_VFNMADD132SD,
  1258. A_VFNMADD132SS,
  1259. A_VFNMADD213PD,
  1260. A_VFNMADD213PS,
  1261. A_VFNMADD213SD,
  1262. A_VFNMADD213SS,
  1263. A_VFNMADD231PD,
  1264. A_VFNMADD231PS,
  1265. A_VFNMADD231SD,
  1266. A_VFNMADD231SS,
  1267. A_VFNMSUB132PD,
  1268. A_VFNMSUB132PS,
  1269. A_VFNMSUB132SD,
  1270. A_VFNMSUB132SS,
  1271. A_VFNMSUB213PD,
  1272. A_VFNMSUB213PS,
  1273. A_VFNMSUB213SD,
  1274. A_VFNMSUB213SS,
  1275. A_VFNMSUB231PD,
  1276. A_VFNMSUB231PS,
  1277. A_VFNMSUB231SD,
  1278. A_VFNMSUB231SS],[S_NO]) and
  1279. { we mix single and double opperations here because we assume that the compiler
  1280. generates vmovapd only after double operations and vmovaps only after single operations }
  1281. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1282. GetNextInstruction(hp1,hp2) and
  1283. MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and
  1284. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1285. begin
  1286. TransferUsedRegs(TmpUsedRegs);
  1287. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1288. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1289. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  1290. then
  1291. begin
  1292. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1293. asml.Remove(p);
  1294. p.Free;
  1295. asml.Remove(hp2);
  1296. hp2.Free;
  1297. p:=hp1;
  1298. end;
  1299. end
  1300. else if (hp1.typ = ait_instruction) and
  1301. GetNextInstruction(hp1, hp2) and
  1302. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  1303. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1304. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1305. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1306. (((taicpu(p).opcode=A_MOVAPS) and
  1307. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  1308. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  1309. ((taicpu(p).opcode=A_MOVAPD) and
  1310. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  1311. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  1312. ) then
  1313. { change
  1314. movapX reg,reg2
  1315. addsX/subsX/... reg3, reg2
  1316. movapX reg2,reg
  1317. to
  1318. addsX/subsX/... reg3,reg
  1319. }
  1320. begin
  1321. TransferUsedRegs(TmpUsedRegs);
  1322. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1323. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1324. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1325. begin
  1326. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1327. debug_op2str(taicpu(p).opcode)+' '+
  1328. debug_op2str(taicpu(hp1).opcode)+' '+
  1329. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1330. { we cannot eliminate the first move if
  1331. the operations uses the same register for source and dest }
  1332. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1333. begin
  1334. asml.remove(p);
  1335. p.Free;
  1336. end;
  1337. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1338. asml.remove(hp2);
  1339. hp2.Free;
  1340. p:=hp1;
  1341. result:=true;
  1342. end;
  1343. end;
  1344. end;
  1345. end;
  1346. end;
  1347. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1348. var
  1349. hp1 : tai;
  1350. begin
  1351. result:=false;
  1352. { replace
  1353. V<Op>X %mreg1,%mreg2,%mreg3
  1354. VMovX %mreg3,%mreg4
  1355. dealloc %mreg3
  1356. by
  1357. V<Op>X %mreg1,%mreg2,%mreg4
  1358. ?
  1359. }
  1360. if GetNextInstruction(p,hp1) and
  1361. { we mix single and double operations here because we assume that the compiler
  1362. generates vmovapd only after double operations and vmovaps only after single operations }
  1363. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1364. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1365. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1366. begin
  1367. TransferUsedRegs(TmpUsedRegs);
  1368. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1369. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1370. ) then
  1371. begin
  1372. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1373. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1374. asml.Remove(hp1);
  1375. hp1.Free;
  1376. result:=true;
  1377. end;
  1378. end;
  1379. end;
  1380. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1381. var
  1382. hp1, hp2: tai;
  1383. GetNextInstruction_p: Boolean;
  1384. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  1385. NewSize: topsize;
  1386. begin
  1387. Result:=false;
  1388. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1389. { remove mov reg1,reg1? }
  1390. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  1391. then
  1392. begin
  1393. DebugMsg(SPeepholeOptimization + 'Mov2Nop done',p);
  1394. { take care of the register (de)allocs following p }
  1395. UpdateUsedRegs(tai(p.next));
  1396. asml.remove(p);
  1397. p.free;
  1398. p:=hp1;
  1399. Result:=true;
  1400. exit;
  1401. end;
  1402. if GetNextInstruction_p and
  1403. MatchInstruction(hp1,A_AND,[]) and
  1404. (taicpu(p).oper[1]^.typ = top_reg) and
  1405. MatchOpType(taicpu(hp1),top_const,top_reg) then
  1406. begin
  1407. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1408. begin
  1409. case taicpu(p).opsize of
  1410. S_L:
  1411. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1412. begin
  1413. { Optimize out:
  1414. mov x, %reg
  1415. and ffffffffh, %reg
  1416. }
  1417. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1418. asml.remove(hp1);
  1419. hp1.free;
  1420. Result:=true;
  1421. exit;
  1422. end;
  1423. S_Q: { TODO: Confirm if this is even possible }
  1424. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1425. begin
  1426. { Optimize out:
  1427. mov x, %reg
  1428. and ffffffffffffffffh, %reg
  1429. }
  1430. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1431. asml.remove(hp1);
  1432. hp1.free;
  1433. Result:=true;
  1434. exit;
  1435. end;
  1436. else
  1437. ;
  1438. end;
  1439. end
  1440. else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1441. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1442. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1443. then
  1444. begin
  1445. InputVal := debug_operstr(taicpu(p).oper[0]^);
  1446. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  1447. case taicpu(p).opsize of
  1448. S_B:
  1449. if (taicpu(hp1).oper[0]^.val = $ff) then
  1450. begin
  1451. { Convert:
  1452. movb x, %regl movb x, %regl
  1453. andw ffh, %regw andl ffh, %regd
  1454. To:
  1455. movzbw x, %regd movzbl x, %regd
  1456. (Identical registers, just different sizes)
  1457. }
  1458. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1459. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1460. case taicpu(hp1).opsize of
  1461. S_W: NewSize := S_BW;
  1462. S_L: NewSize := S_BL;
  1463. {$ifdef x86_64}
  1464. S_Q: NewSize := S_BQ;
  1465. {$endif x86_64}
  1466. else
  1467. InternalError(2018011510);
  1468. end;
  1469. end
  1470. else
  1471. NewSize := S_NO;
  1472. S_W:
  1473. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1474. begin
  1475. { Convert:
  1476. movw x, %regw
  1477. andl ffffh, %regd
  1478. To:
  1479. movzwl x, %regd
  1480. (Identical registers, just different sizes)
  1481. }
  1482. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  1483. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  1484. case taicpu(hp1).opsize of
  1485. S_L: NewSize := S_WL;
  1486. {$ifdef x86_64}
  1487. S_Q: NewSize := S_WQ;
  1488. {$endif x86_64}
  1489. else
  1490. InternalError(2018011511);
  1491. end;
  1492. end
  1493. else
  1494. NewSize := S_NO;
  1495. else
  1496. NewSize := S_NO;
  1497. end;
  1498. if NewSize <> S_NO then
  1499. begin
  1500. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  1501. { The actual optimization }
  1502. taicpu(p).opcode := A_MOVZX;
  1503. taicpu(p).changeopsize(NewSize);
  1504. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  1505. { Safeguard if "and" is followed by a conditional command }
  1506. TransferUsedRegs(TmpUsedRegs);
  1507. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  1508. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  1509. begin
  1510. { At this point, the "and" command is effectively equivalent to
  1511. "test %reg,%reg". This will be handled separately by the
  1512. Peephole Optimizer. [Kit] }
  1513. DebugMsg(SPeepholeOptimization + PreMessage +
  1514. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1515. end
  1516. else
  1517. begin
  1518. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  1519. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1520. asml.Remove(hp1);
  1521. hp1.Free;
  1522. end;
  1523. Result := True;
  1524. Exit;
  1525. end;
  1526. end;
  1527. end;
  1528. { Next instruction is also a MOV ? }
  1529. if GetNextInstruction_p and
  1530. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1531. begin
  1532. if (taicpu(p).oper[1]^.typ = top_reg) and
  1533. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1534. begin
  1535. TransferUsedRegs(TmpUsedRegs);
  1536. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1537. { we have
  1538. mov x, %treg
  1539. mov %treg, y
  1540. }
  1541. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1542. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1543. { we've got
  1544. mov x, %treg
  1545. mov %treg, y
  1546. with %treg is not used after }
  1547. case taicpu(p).oper[0]^.typ Of
  1548. top_reg:
  1549. begin
  1550. { change
  1551. mov %reg, %treg
  1552. mov %treg, y
  1553. to
  1554. mov %reg, y
  1555. }
  1556. if taicpu(hp1).oper[1]^.typ=top_reg then
  1557. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1558. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1559. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
  1560. asml.remove(hp1);
  1561. hp1.free;
  1562. Result:=true;
  1563. Exit;
  1564. end;
  1565. top_const:
  1566. begin
  1567. { change
  1568. mov const, %treg
  1569. mov %treg, y
  1570. to
  1571. mov const, y
  1572. }
  1573. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  1574. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1575. begin
  1576. if taicpu(hp1).oper[1]^.typ=top_reg then
  1577. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1578. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1579. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  1580. asml.remove(hp1);
  1581. hp1.free;
  1582. Result:=true;
  1583. Exit;
  1584. end;
  1585. end;
  1586. top_ref:
  1587. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1588. begin
  1589. { change
  1590. mov mem, %treg
  1591. mov %treg, %reg
  1592. to
  1593. mov mem, %reg"
  1594. }
  1595. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1596. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  1597. asml.remove(hp1);
  1598. hp1.free;
  1599. Result:=true;
  1600. Exit;
  1601. end;
  1602. else
  1603. ;
  1604. end;
  1605. end;
  1606. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1607. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1608. { mov reg1, mem1 or mov mem1, reg1
  1609. mov mem2, reg2 mov reg2, mem2}
  1610. begin
  1611. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1612. { mov reg1, mem1 or mov mem1, reg1
  1613. mov mem2, reg1 mov reg2, mem1}
  1614. begin
  1615. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1616. { Removes the second statement from
  1617. mov reg1, mem1/reg2
  1618. mov mem1/reg2, reg1 }
  1619. begin
  1620. if taicpu(p).oper[0]^.typ=top_reg then
  1621. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1622. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  1623. asml.remove(hp1);
  1624. hp1.free;
  1625. Result:=true;
  1626. exit;
  1627. end
  1628. else
  1629. begin
  1630. TransferUsedRegs(TmpUsedRegs);
  1631. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1632. if (taicpu(p).oper[1]^.typ = top_ref) and
  1633. { mov reg1, mem1
  1634. mov mem2, reg1 }
  1635. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1636. GetNextInstruction(hp1, hp2) and
  1637. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1638. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1639. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1640. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1641. { change to
  1642. mov reg1, mem1 mov reg1, mem1
  1643. mov mem2, reg1 cmp reg1, mem2
  1644. cmp mem1, reg1
  1645. }
  1646. begin
  1647. asml.remove(hp2);
  1648. hp2.free;
  1649. taicpu(hp1).opcode := A_CMP;
  1650. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1651. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1652. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1653. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  1654. end;
  1655. end;
  1656. end
  1657. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1658. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1659. begin
  1660. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1661. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1662. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  1663. end
  1664. else
  1665. begin
  1666. TransferUsedRegs(TmpUsedRegs);
  1667. if GetNextInstruction(hp1, hp2) and
  1668. MatchOpType(taicpu(p),top_ref,top_reg) and
  1669. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1670. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1671. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1672. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1673. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1674. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1675. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1676. { mov mem1, %reg1
  1677. mov %reg1, mem2
  1678. mov mem2, reg2
  1679. to:
  1680. mov mem1, reg2
  1681. mov reg2, mem2}
  1682. begin
  1683. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1684. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  1685. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1686. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1687. asml.remove(hp2);
  1688. hp2.free;
  1689. end
  1690. {$ifdef i386}
  1691. { this is enabled for i386 only, as the rules to create the reg sets below
  1692. are too complicated for x86-64, so this makes this code too error prone
  1693. on x86-64
  1694. }
  1695. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1696. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1697. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1698. { mov mem1, reg1 mov mem1, reg1
  1699. mov reg1, mem2 mov reg1, mem2
  1700. mov mem2, reg2 mov mem2, reg1
  1701. to: to:
  1702. mov mem1, reg1 mov mem1, reg1
  1703. mov mem1, reg2 mov reg1, mem2
  1704. mov reg1, mem2
  1705. or (if mem1 depends on reg1
  1706. and/or if mem2 depends on reg2)
  1707. to:
  1708. mov mem1, reg1
  1709. mov reg1, mem2
  1710. mov reg1, reg2
  1711. }
  1712. begin
  1713. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1714. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1715. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1716. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1717. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1718. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1719. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1720. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1721. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1722. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1723. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1724. end
  1725. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1726. begin
  1727. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1728. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1729. end
  1730. else
  1731. begin
  1732. asml.remove(hp2);
  1733. hp2.free;
  1734. end
  1735. {$endif i386}
  1736. ;
  1737. end;
  1738. end;
  1739. (* { movl [mem1],reg1
  1740. movl [mem1],reg2
  1741. to
  1742. movl [mem1],reg1
  1743. movl reg1,reg2
  1744. }
  1745. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1746. (taicpu(p).oper[1]^.typ = top_reg) and
  1747. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1748. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1749. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1750. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1751. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1752. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1753. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1754. else*)
  1755. { movl const1,[mem1]
  1756. movl [mem1],reg1
  1757. to
  1758. movl const1,reg1
  1759. movl reg1,[mem1]
  1760. }
  1761. if MatchOpType(Taicpu(p),top_const,top_ref) and
  1762. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1763. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1764. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1765. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1766. begin
  1767. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1768. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1769. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1770. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1771. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1772. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  1773. Result:=true;
  1774. exit;
  1775. end;
  1776. {
  1777. mov* x,reg1
  1778. mov* y,reg1
  1779. to
  1780. mov* y,reg1
  1781. }
  1782. if (taicpu(p).oper[1]^.typ=top_reg) and
  1783. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1784. not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
  1785. begin
  1786. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
  1787. { take care of the register (de)allocs following p }
  1788. UpdateUsedRegs(tai(p.next));
  1789. asml.remove(p);
  1790. p.free;
  1791. p:=hp1;
  1792. Result:=true;
  1793. exit;
  1794. end;
  1795. end;
  1796. { search further than the next instruction for a mov }
  1797. if (cs_opt_level3 in current_settings.optimizerswitches) and
  1798. { check as much as possible before the expensive GetNextInstructionUsingReg call }
  1799. (taicpu(p).oper[1]^.typ = top_reg) and
  1800. (taicpu(p).oper[0]^.typ in [top_reg,top_const]) and
  1801. { we work with hp2 here, so hp1 can be still used later on when
  1802. checking for GetNextInstruction_p }
  1803. GetNextInstructionUsingReg(p,hp2,taicpu(p).oper[1]^.reg) and
  1804. MatchInstruction(hp2,A_MOV,[]) and
  1805. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1806. ((taicpu(p).oper[0]^.typ=top_const) or
  1807. ((taicpu(p).oper[0]^.typ=top_reg) and
  1808. not(RegUsedBetween(taicpu(p).oper[0]^.reg, p, hp2))
  1809. )
  1810. ) then
  1811. begin
  1812. TransferUsedRegs(TmpUsedRegs);
  1813. { we have
  1814. mov x, %treg
  1815. mov %treg, y
  1816. }
  1817. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp2).oper[1]^)) and
  1818. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs)) then
  1819. { we've got
  1820. mov x, %treg
  1821. mov %treg, y
  1822. with %treg is not used after }
  1823. case taicpu(p).oper[0]^.typ Of
  1824. top_reg:
  1825. begin
  1826. { change
  1827. mov %reg, %treg
  1828. mov %treg, y
  1829. to
  1830. mov %reg, y
  1831. }
  1832. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp2,usedregs);
  1833. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  1834. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
  1835. { take care of the register (de)allocs following p }
  1836. UpdateUsedRegs(tai(p.next));
  1837. asml.remove(p);
  1838. p.free;
  1839. p:=hp1;
  1840. Result:=true;
  1841. Exit;
  1842. end;
  1843. top_const:
  1844. begin
  1845. { change
  1846. mov const, %treg
  1847. mov %treg, y
  1848. to
  1849. mov const, y
  1850. }
  1851. if (taicpu(hp2).oper[1]^.typ=top_reg) or
  1852. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1853. begin
  1854. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  1855. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 7 done',p);
  1856. { take care of the register (de)allocs following p }
  1857. UpdateUsedRegs(tai(p.next));
  1858. asml.remove(p);
  1859. p.free;
  1860. p:=hp1;
  1861. Result:=true;
  1862. Exit;
  1863. end;
  1864. end;
  1865. else
  1866. Internalerror(2019103001);
  1867. end;
  1868. end;
  1869. { Change
  1870. mov %reg1, %reg2
  1871. xxx %reg2, ???
  1872. to
  1873. mov %reg1, %reg2
  1874. xxx %reg1, ???
  1875. to avoid a write/read penalty
  1876. }
  1877. if GetNextInstruction_p and
  1878. MatchOpType(taicpu(p),top_reg,top_reg) and
  1879. ((MatchInstruction(hp1,A_OR,A_AND,A_TEST,[]) and
  1880. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1881. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1882. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg)) or
  1883. (MatchInstruction(hp1,A_CMP,[]) and
  1884. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1885. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1886. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg)
  1887. )
  1888. ) then
  1889. { we have
  1890. mov %reg1, %reg2
  1891. test/or/and %reg2, %reg2
  1892. }
  1893. begin
  1894. TransferUsedRegs(TmpUsedRegs);
  1895. { reg1 will be used after the first instruction,
  1896. so update the allocation info }
  1897. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1898. if GetNextInstruction(hp1, hp2) and
  1899. (hp2.typ = ait_instruction) and
  1900. taicpu(hp2).is_jmp and
  1901. not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1902. { change
  1903. mov %reg1, %reg2
  1904. test/or/and %reg2, %reg2
  1905. jxx
  1906. to
  1907. test %reg1, %reg1
  1908. jxx
  1909. }
  1910. begin
  1911. if taicpu(hp1).opcode<>A_CMP then
  1912. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1913. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1914. DebugMsg(SPeepholeOptimization + 'MovTest/Cmp/Or/AndJxx2Test/Cmp/Or/AndJxx done',p);
  1915. asml.remove(p);
  1916. p.free;
  1917. p := hp1;
  1918. Exit;
  1919. end
  1920. else
  1921. { change
  1922. mov %reg1, %reg2
  1923. test/or/and %reg2, %reg2
  1924. to
  1925. mov %reg1, %reg2
  1926. test/or/and %reg1, %reg1
  1927. }
  1928. begin
  1929. if taicpu(hp1).opcode<>A_CMP then
  1930. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1931. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1932. DebugMsg(SPeepholeOptimization + 'MovTest/Cmp/Or/AndJxx2MovTest/Cmp/Or/AndJxx done',p);
  1933. end;
  1934. end;
  1935. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1936. x >= RetOffset) as it doesn't do anything (it writes either to a
  1937. parameter or to the temporary storage room for the function
  1938. result)
  1939. }
  1940. if GetNextInstruction_p and
  1941. IsExitCode(hp1) and
  1942. MatchOpType(taicpu(p),top_reg,top_ref) and
  1943. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1944. not(assigned(current_procinfo.procdef.funcretsym) and
  1945. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1946. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1947. begin
  1948. asml.remove(p);
  1949. p.free;
  1950. p:=hp1;
  1951. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  1952. RemoveLastDeallocForFuncRes(p);
  1953. Result:=true;
  1954. exit;
  1955. end;
  1956. if GetNextInstruction_p and
  1957. MatchOpType(taicpu(p),top_reg,top_ref) and
  1958. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1959. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1960. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1961. begin
  1962. { change
  1963. mov reg1, mem1
  1964. test/cmp x, mem1
  1965. to
  1966. mov reg1, mem1
  1967. test/cmp x, reg1
  1968. }
  1969. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1970. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  1971. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1972. exit;
  1973. end;
  1974. if GetNextInstruction_p and
  1975. (taicpu(p).oper[1]^.typ = top_reg) and
  1976. (hp1.typ = ait_instruction) and
  1977. GetNextInstruction(hp1, hp2) and
  1978. MatchInstruction(hp2,A_MOV,[]) and
  1979. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1980. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1981. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  1982. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1983. ) then
  1984. begin
  1985. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1986. (taicpu(hp2).oper[0]^.typ=top_reg) then
  1987. { change movsX/movzX reg/ref, reg2
  1988. add/sub/or/... reg3/$const, reg2
  1989. mov reg2 reg/ref
  1990. dealloc reg2
  1991. to
  1992. add/sub/or/... reg3/$const, reg/ref }
  1993. begin
  1994. TransferUsedRegs(TmpUsedRegs);
  1995. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1996. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1997. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1998. begin
  1999. { by example:
  2000. movswl %si,%eax movswl %si,%eax p
  2001. decl %eax addl %edx,%eax hp1
  2002. movw %ax,%si movw %ax,%si hp2
  2003. ->
  2004. movswl %si,%eax movswl %si,%eax p
  2005. decw %eax addw %edx,%eax hp1
  2006. movw %ax,%si movw %ax,%si hp2
  2007. }
  2008. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  2009. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  2010. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  2011. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  2012. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2013. {
  2014. ->
  2015. movswl %si,%eax movswl %si,%eax p
  2016. decw %si addw %dx,%si hp1
  2017. movw %ax,%si movw %ax,%si hp2
  2018. }
  2019. case taicpu(hp1).ops of
  2020. 1:
  2021. begin
  2022. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  2023. if taicpu(hp1).oper[0]^.typ=top_reg then
  2024. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2025. end;
  2026. 2:
  2027. begin
  2028. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  2029. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  2030. (taicpu(hp1).opcode<>A_SHL) and
  2031. (taicpu(hp1).opcode<>A_SHR) and
  2032. (taicpu(hp1).opcode<>A_SAR) then
  2033. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2034. end;
  2035. else
  2036. internalerror(2008042701);
  2037. end;
  2038. {
  2039. ->
  2040. decw %si addw %dx,%si p
  2041. }
  2042. asml.remove(hp2);
  2043. hp2.Free;
  2044. RemoveCurrentP(p);
  2045. Result:=True;
  2046. Exit;
  2047. end;
  2048. end;
  2049. if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2050. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  2051. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  2052. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  2053. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  2054. )
  2055. {$ifdef i386}
  2056. { byte registers of esi, edi, ebp, esp are not available on i386 }
  2057. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  2058. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  2059. {$endif i386}
  2060. then
  2061. { change movsX/movzX reg/ref, reg2
  2062. add/sub/or/... regX/$const, reg2
  2063. mov reg2, reg3
  2064. dealloc reg2
  2065. to
  2066. movsX/movzX reg/ref, reg3
  2067. add/sub/or/... reg3/$const, reg3
  2068. }
  2069. begin
  2070. TransferUsedRegs(TmpUsedRegs);
  2071. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2072. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2073. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  2074. begin
  2075. { by example:
  2076. movswl %si,%eax movswl %si,%eax p
  2077. decl %eax addl %edx,%eax hp1
  2078. movw %ax,%si movw %ax,%si hp2
  2079. ->
  2080. movswl %si,%eax movswl %si,%eax p
  2081. decw %eax addw %edx,%eax hp1
  2082. movw %ax,%si movw %ax,%si hp2
  2083. }
  2084. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  2085. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  2086. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  2087. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  2088. { limit size of constants as well to avoid assembler errors, but
  2089. check opsize to avoid overflow when left shifting the 1 }
  2090. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=63) then
  2091. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl topsize2memsize[taicpu(hp2).opsize])-1);
  2092. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2093. taicpu(p).changeopsize(taicpu(hp2).opsize);
  2094. if taicpu(p).oper[0]^.typ=top_reg then
  2095. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2096. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  2097. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  2098. {
  2099. ->
  2100. movswl %si,%eax movswl %si,%eax p
  2101. decw %si addw %dx,%si hp1
  2102. movw %ax,%si movw %ax,%si hp2
  2103. }
  2104. case taicpu(hp1).ops of
  2105. 1:
  2106. begin
  2107. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  2108. if taicpu(hp1).oper[0]^.typ=top_reg then
  2109. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2110. end;
  2111. 2:
  2112. begin
  2113. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  2114. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  2115. (taicpu(hp1).opcode<>A_SHL) and
  2116. (taicpu(hp1).opcode<>A_SHR) and
  2117. (taicpu(hp1).opcode<>A_SAR) then
  2118. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2119. end;
  2120. else
  2121. internalerror(2018111801);
  2122. end;
  2123. {
  2124. ->
  2125. decw %si addw %dx,%si p
  2126. }
  2127. asml.remove(hp2);
  2128. hp2.Free;
  2129. end;
  2130. end;
  2131. end;
  2132. if GetNextInstruction_p and
  2133. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  2134. GetNextInstruction(hp1, hp2) and
  2135. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  2136. MatchOperand(Taicpu(p).oper[0]^,0) and
  2137. (Taicpu(p).oper[1]^.typ = top_reg) and
  2138. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  2139. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  2140. { mov reg1,0
  2141. bts reg1,operand1 --> mov reg1,operand2
  2142. or reg1,operand2 bts reg1,operand1}
  2143. begin
  2144. Taicpu(hp2).opcode:=A_MOV;
  2145. asml.remove(hp1);
  2146. insertllitem(hp2,hp2.next,hp1);
  2147. asml.remove(p);
  2148. p.free;
  2149. p:=hp1;
  2150. Result:=true;
  2151. exit;
  2152. end;
  2153. if GetNextInstruction_p and
  2154. MatchInstruction(hp1,A_LEA,[S_L]) and
  2155. MatchOpType(Taicpu(p),top_ref,top_reg) and
  2156. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  2157. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  2158. ) or
  2159. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  2160. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  2161. )
  2162. ) then
  2163. { mov reg1,ref
  2164. lea reg2,[reg1,reg2]
  2165. to
  2166. add reg2,ref}
  2167. begin
  2168. TransferUsedRegs(TmpUsedRegs);
  2169. { reg1 may not be used afterwards }
  2170. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  2171. begin
  2172. Taicpu(hp1).opcode:=A_ADD;
  2173. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  2174. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  2175. asml.remove(p);
  2176. p.free;
  2177. p:=hp1;
  2178. result:=true;
  2179. exit;
  2180. end;
  2181. end;
  2182. end;
  2183. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  2184. var
  2185. hp1 : tai;
  2186. begin
  2187. Result:=false;
  2188. if taicpu(p).ops <> 2 then
  2189. exit;
  2190. if GetNextInstruction(p,hp1) and
  2191. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  2192. (taicpu(hp1).ops = 2) then
  2193. begin
  2194. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2195. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2196. { movXX reg1, mem1 or movXX mem1, reg1
  2197. movXX mem2, reg2 movXX reg2, mem2}
  2198. begin
  2199. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2200. { movXX reg1, mem1 or movXX mem1, reg1
  2201. movXX mem2, reg1 movXX reg2, mem1}
  2202. begin
  2203. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2204. begin
  2205. { Removes the second statement from
  2206. movXX reg1, mem1/reg2
  2207. movXX mem1/reg2, reg1
  2208. }
  2209. if taicpu(p).oper[0]^.typ=top_reg then
  2210. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2211. { Removes the second statement from
  2212. movXX mem1/reg1, reg2
  2213. movXX reg2, mem1/reg1
  2214. }
  2215. if (taicpu(p).oper[1]^.typ=top_reg) and
  2216. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  2217. begin
  2218. asml.remove(p);
  2219. p.free;
  2220. GetNextInstruction(hp1,p);
  2221. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  2222. end
  2223. else
  2224. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  2225. asml.remove(hp1);
  2226. hp1.free;
  2227. Result:=true;
  2228. exit;
  2229. end
  2230. end;
  2231. end;
  2232. end;
  2233. end;
  2234. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  2235. var
  2236. hp1 : tai;
  2237. begin
  2238. result:=false;
  2239. { replace
  2240. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  2241. MovX %mreg2,%mreg1
  2242. dealloc %mreg2
  2243. by
  2244. <Op>X %mreg2,%mreg1
  2245. ?
  2246. }
  2247. if GetNextInstruction(p,hp1) and
  2248. { we mix single and double opperations here because we assume that the compiler
  2249. generates vmovapd only after double operations and vmovaps only after single operations }
  2250. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  2251. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2252. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  2253. (taicpu(p).oper[0]^.typ=top_reg) then
  2254. begin
  2255. TransferUsedRegs(TmpUsedRegs);
  2256. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2257. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2258. begin
  2259. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  2260. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2261. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  2262. asml.Remove(hp1);
  2263. hp1.Free;
  2264. result:=true;
  2265. end;
  2266. end;
  2267. end;
  2268. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  2269. var
  2270. hp1, hp2, hp3: tai;
  2271. l : ASizeInt;
  2272. ref: Integer;
  2273. saveref: treference;
  2274. begin
  2275. Result:=false;
  2276. { removes seg register prefixes from LEA operations, as they
  2277. don't do anything}
  2278. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  2279. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  2280. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2281. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  2282. { do not mess with leas acessing the stack pointer }
  2283. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  2284. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  2285. begin
  2286. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2287. (taicpu(p).oper[0]^.ref^.offset = 0) then
  2288. begin
  2289. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  2290. taicpu(p).oper[1]^.reg);
  2291. InsertLLItem(p.previous,p.next, hp1);
  2292. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  2293. p.free;
  2294. p:=hp1;
  2295. Result:=true;
  2296. exit;
  2297. end
  2298. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  2299. begin
  2300. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  2301. RemoveCurrentP(p);
  2302. Result:=true;
  2303. exit;
  2304. end
  2305. { continue to use lea to adjust the stack pointer,
  2306. it is the recommended way, but only if not optimizing for size }
  2307. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  2308. (cs_opt_size in current_settings.optimizerswitches) then
  2309. with taicpu(p).oper[0]^.ref^ do
  2310. if (base = taicpu(p).oper[1]^.reg) then
  2311. begin
  2312. l:=offset;
  2313. if (l=1) and UseIncDec then
  2314. begin
  2315. taicpu(p).opcode:=A_INC;
  2316. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2317. taicpu(p).ops:=1;
  2318. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  2319. end
  2320. else if (l=-1) and UseIncDec then
  2321. begin
  2322. taicpu(p).opcode:=A_DEC;
  2323. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2324. taicpu(p).ops:=1;
  2325. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2326. end
  2327. else
  2328. begin
  2329. if (l<0) and (l<>-2147483648) then
  2330. begin
  2331. taicpu(p).opcode:=A_SUB;
  2332. taicpu(p).loadConst(0,-l);
  2333. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2334. end
  2335. else
  2336. begin
  2337. taicpu(p).opcode:=A_ADD;
  2338. taicpu(p).loadConst(0,l);
  2339. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2340. end;
  2341. end;
  2342. Result:=true;
  2343. exit;
  2344. end;
  2345. end;
  2346. if GetNextInstruction(p,hp1) and
  2347. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  2348. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2349. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  2350. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  2351. begin
  2352. TransferUsedRegs(TmpUsedRegs);
  2353. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2354. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2355. begin
  2356. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2357. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  2358. asml.Remove(hp1);
  2359. hp1.Free;
  2360. result:=true;
  2361. end;
  2362. end;
  2363. { changes
  2364. lea offset1(regX), reg1
  2365. lea offset2(reg1), reg1
  2366. to
  2367. lea offset1+offset2(regX), reg1 }
  2368. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  2369. MatchInstruction(hp1,A_LEA,[S_L]) and
  2370. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2371. (taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg) and
  2372. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2373. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2374. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2375. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2376. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2377. (taicpu(p).oper[0]^.ref^.index=taicpu(hp1).oper[0]^.ref^.index) and
  2378. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp1).oper[0]^.ref^.relsymbol) and
  2379. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp1).oper[0]^.ref^.scalefactor) and
  2380. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp1).oper[0]^.ref^.segment) and
  2381. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp1).oper[0]^.ref^.symbol) then
  2382. begin
  2383. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea done',p);
  2384. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2385. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2386. RemoveCurrentP(p);
  2387. result:=true;
  2388. exit;
  2389. end;
  2390. { changes
  2391. lea <ref1>, reg1
  2392. <op> ...,<ref. with reg1>,...
  2393. to
  2394. <op> ...,<ref1>,... }
  2395. if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
  2396. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
  2397. GetNextInstruction(p,hp1) and
  2398. (hp1.typ=ait_instruction) and
  2399. not(MatchInstruction(hp1,A_LEA,[])) then
  2400. begin
  2401. { find a reference which uses reg1 }
  2402. if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
  2403. ref:=0
  2404. else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
  2405. ref:=1
  2406. else
  2407. ref:=-1;
  2408. if (ref<>-1) and
  2409. { reg1 must be either the base or the index }
  2410. ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
  2411. begin
  2412. { reg1 can be removed from the reference }
  2413. saveref:=taicpu(hp1).oper[ref]^.ref^;
  2414. if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
  2415. taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
  2416. else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
  2417. taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
  2418. else
  2419. Internalerror(2019111201);
  2420. { check if the can insert all data of the lea into the second instruction }
  2421. if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2422. ((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
  2423. ((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
  2424. ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
  2425. ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
  2426. ((taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2427. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
  2428. {$ifdef x86_64}
  2429. and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
  2430. and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
  2431. ((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
  2432. )
  2433. {$endif x86_64}
  2434. then
  2435. begin
  2436. { reg1 might not used by the second instruction after it is remove from the reference }
  2437. if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
  2438. begin
  2439. TransferUsedRegs(TmpUsedRegs);
  2440. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2441. { reg1 is not updated so it might not be used afterwards }
  2442. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2443. begin
  2444. DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
  2445. if taicpu(p).oper[0]^.ref^.base<>NR_NO then
  2446. taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2447. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  2448. taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  2449. if taicpu(p).oper[0]^.ref^.symbol<>nil then
  2450. taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
  2451. if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
  2452. taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
  2453. if not(taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) then
  2454. taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  2455. inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2456. RemoveCurrentP(p);
  2457. result:=true;
  2458. exit;
  2459. end
  2460. end;
  2461. end;
  2462. { recover }
  2463. taicpu(hp1).oper[ref]^.ref^:=saveref;
  2464. end;
  2465. end;
  2466. { replace
  2467. lea x(stackpointer),stackpointer
  2468. call procname
  2469. lea -x(stackpointer),stackpointer
  2470. ret
  2471. by
  2472. jmp procname
  2473. this should never hurt except when pic is used, not sure
  2474. how to handle it then
  2475. but do it only on level 4 because it destroys stack back traces
  2476. }
  2477. if (cs_opt_level4 in current_settings.optimizerswitches) and
  2478. not(cs_create_pic in current_settings.moduleswitches) and
  2479. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2480. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  2481. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2482. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2483. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2484. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2485. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2486. GetNextInstruction(p, hp1) and
  2487. MatchInstruction(hp1,A_CALL,[S_NO]) and
  2488. GetNextInstruction(hp1, hp2) and
  2489. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  2490. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2491. (taicpu(p).oper[0]^.ref^.base=taicpu(hp2).oper[0]^.ref^.base) and
  2492. (taicpu(p).oper[0]^.ref^.index=taicpu(hp2).oper[0]^.ref^.index) and
  2493. (taicpu(p).oper[0]^.ref^.offset=-taicpu(hp2).oper[0]^.ref^.offset) and
  2494. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp2).oper[0]^.ref^.relsymbol) and
  2495. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp2).oper[0]^.ref^.scalefactor) and
  2496. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp2).oper[0]^.ref^.segment) and
  2497. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp2).oper[0]^.ref^.symbol) and
  2498. GetNextInstruction(hp2, hp3) and
  2499. MatchInstruction(hp3,A_RET,[S_NO]) and
  2500. (taicpu(hp3).ops=0) then
  2501. begin
  2502. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  2503. taicpu(hp1).opcode:=A_JMP;
  2504. taicpu(hp1).is_jmp:=true;
  2505. asml.remove(p);
  2506. asml.remove(hp2);
  2507. asml.remove(hp3);
  2508. p.free;
  2509. hp2.free;
  2510. hp3.free;
  2511. p:=hp1;
  2512. Result:=true;
  2513. end;
  2514. end;
  2515. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  2516. var
  2517. hp1 : tai;
  2518. begin
  2519. DoSubAddOpt := False;
  2520. if GetLastInstruction(p, hp1) and
  2521. (hp1.typ = ait_instruction) and
  2522. (taicpu(hp1).opsize = taicpu(p).opsize) then
  2523. case taicpu(hp1).opcode Of
  2524. A_DEC:
  2525. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  2526. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2527. begin
  2528. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  2529. asml.remove(hp1);
  2530. hp1.free;
  2531. end;
  2532. A_SUB:
  2533. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2534. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2535. begin
  2536. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  2537. asml.remove(hp1);
  2538. hp1.free;
  2539. end;
  2540. A_ADD:
  2541. begin
  2542. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2543. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2544. begin
  2545. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  2546. asml.remove(hp1);
  2547. hp1.free;
  2548. if (taicpu(p).oper[0]^.val = 0) then
  2549. begin
  2550. hp1 := tai(p.next);
  2551. asml.remove(p);
  2552. p.free;
  2553. if not GetLastInstruction(hp1, p) then
  2554. p := hp1;
  2555. DoSubAddOpt := True;
  2556. end
  2557. end;
  2558. end;
  2559. else
  2560. ;
  2561. end;
  2562. end;
  2563. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  2564. {$ifdef i386}
  2565. var
  2566. hp1 : tai;
  2567. {$endif i386}
  2568. begin
  2569. Result:=false;
  2570. { * change "subl $2, %esp; pushw x" to "pushl x"}
  2571. { * change "sub/add const1, reg" or "dec reg" followed by
  2572. "sub const2, reg" to one "sub ..., reg" }
  2573. if MatchOpType(taicpu(p),top_const,top_reg) then
  2574. begin
  2575. {$ifdef i386}
  2576. if (taicpu(p).oper[0]^.val = 2) and
  2577. (taicpu(p).oper[1]^.reg = NR_ESP) and
  2578. { Don't do the sub/push optimization if the sub }
  2579. { comes from setting up the stack frame (JM) }
  2580. (not(GetLastInstruction(p,hp1)) or
  2581. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  2582. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  2583. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  2584. begin
  2585. hp1 := tai(p.next);
  2586. while Assigned(hp1) and
  2587. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  2588. not RegReadByInstruction(NR_ESP,hp1) and
  2589. not RegModifiedByInstruction(NR_ESP,hp1) do
  2590. hp1 := tai(hp1.next);
  2591. if Assigned(hp1) and
  2592. MatchInstruction(hp1,A_PUSH,[S_W]) then
  2593. begin
  2594. taicpu(hp1).changeopsize(S_L);
  2595. if taicpu(hp1).oper[0]^.typ=top_reg then
  2596. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  2597. hp1 := tai(p.next);
  2598. asml.remove(p);
  2599. p.free;
  2600. p := hp1;
  2601. Result:=true;
  2602. exit;
  2603. end;
  2604. end;
  2605. {$endif i386}
  2606. if DoSubAddOpt(p) then
  2607. Result:=true;
  2608. end;
  2609. end;
  2610. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  2611. var
  2612. TmpBool1,TmpBool2 : Boolean;
  2613. tmpref : treference;
  2614. hp1,hp2: tai;
  2615. begin
  2616. Result:=false;
  2617. if MatchOpType(taicpu(p),top_const,top_reg) and
  2618. (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  2619. (taicpu(p).oper[0]^.val <= 3) then
  2620. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  2621. begin
  2622. { should we check the next instruction? }
  2623. TmpBool1 := True;
  2624. { have we found an add/sub which could be
  2625. integrated in the lea? }
  2626. TmpBool2 := False;
  2627. reference_reset(tmpref,2,[]);
  2628. TmpRef.index := taicpu(p).oper[1]^.reg;
  2629. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2630. while TmpBool1 and
  2631. GetNextInstruction(p, hp1) and
  2632. (tai(hp1).typ = ait_instruction) and
  2633. ((((taicpu(hp1).opcode = A_ADD) or
  2634. (taicpu(hp1).opcode = A_SUB)) and
  2635. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  2636. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  2637. (((taicpu(hp1).opcode = A_INC) or
  2638. (taicpu(hp1).opcode = A_DEC)) and
  2639. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2640. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg)) or
  2641. ((taicpu(hp1).opcode = A_LEA) and
  2642. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  2643. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg))) and
  2644. (not GetNextInstruction(hp1,hp2) or
  2645. not instrReadsFlags(hp2)) Do
  2646. begin
  2647. TmpBool1 := False;
  2648. if taicpu(hp1).opcode=A_LEA then
  2649. begin
  2650. if (TmpRef.base = NR_NO) and
  2651. (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
  2652. (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
  2653. (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
  2654. ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
  2655. (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
  2656. begin
  2657. TmpBool1 := True;
  2658. TmpBool2 := True;
  2659. inc(TmpRef.offset, taicpu(hp1).oper[0]^.ref^.offset);
  2660. if taicpu(hp1).oper[0]^.ref^.scalefactor<>0 then
  2661. tmpref.scalefactor:=tmpref.scalefactor*taicpu(hp1).oper[0]^.ref^.scalefactor;
  2662. TmpRef.base := taicpu(hp1).oper[0]^.ref^.base;
  2663. asml.remove(hp1);
  2664. hp1.free;
  2665. end
  2666. end
  2667. else if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  2668. begin
  2669. TmpBool1 := True;
  2670. TmpBool2 := True;
  2671. case taicpu(hp1).opcode of
  2672. A_ADD:
  2673. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2674. A_SUB:
  2675. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2676. else
  2677. internalerror(2019050536);
  2678. end;
  2679. asml.remove(hp1);
  2680. hp1.free;
  2681. end
  2682. else
  2683. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2684. (((taicpu(hp1).opcode = A_ADD) and
  2685. (TmpRef.base = NR_NO)) or
  2686. (taicpu(hp1).opcode = A_INC) or
  2687. (taicpu(hp1).opcode = A_DEC)) then
  2688. begin
  2689. TmpBool1 := True;
  2690. TmpBool2 := True;
  2691. case taicpu(hp1).opcode of
  2692. A_ADD:
  2693. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  2694. A_INC:
  2695. inc(TmpRef.offset);
  2696. A_DEC:
  2697. dec(TmpRef.offset);
  2698. else
  2699. internalerror(2019050535);
  2700. end;
  2701. asml.remove(hp1);
  2702. hp1.free;
  2703. end;
  2704. end;
  2705. if TmpBool2
  2706. {$ifndef x86_64}
  2707. or
  2708. ((current_settings.optimizecputype < cpu_Pentium2) and
  2709. (taicpu(p).oper[0]^.val <= 3) and
  2710. not(cs_opt_size in current_settings.optimizerswitches))
  2711. {$endif x86_64}
  2712. then
  2713. begin
  2714. if not(TmpBool2) and
  2715. (taicpu(p).oper[0]^.val=1) then
  2716. begin
  2717. hp1:=taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2718. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  2719. end
  2720. else
  2721. hp1:=taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  2722. taicpu(p).oper[1]^.reg);
  2723. DebugMsg(SPeepholeOptimization + 'ShlAddLeaSubIncDec2Lea',p);
  2724. InsertLLItem(p.previous, p.next, hp1);
  2725. p.free;
  2726. p := hp1;
  2727. end;
  2728. end
  2729. {$ifndef x86_64}
  2730. else if (current_settings.optimizecputype < cpu_Pentium2) and
  2731. MatchOpType(taicpu(p),top_const,top_reg) then
  2732. begin
  2733. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  2734. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  2735. (unlike shl, which is only Tairable in the U pipe) }
  2736. if taicpu(p).oper[0]^.val=1 then
  2737. begin
  2738. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2739. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2740. InsertLLItem(p.previous, p.next, hp1);
  2741. p.free;
  2742. p := hp1;
  2743. end
  2744. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  2745. "shl $3, %reg" to "lea (,%reg,8), %reg }
  2746. else if (taicpu(p).opsize = S_L) and
  2747. (taicpu(p).oper[0]^.val<= 3) then
  2748. begin
  2749. reference_reset(tmpref,2,[]);
  2750. TmpRef.index := taicpu(p).oper[1]^.reg;
  2751. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2752. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  2753. InsertLLItem(p.previous, p.next, hp1);
  2754. p.free;
  2755. p := hp1;
  2756. end;
  2757. end
  2758. {$endif x86_64}
  2759. ;
  2760. end;
  2761. function TX86AsmOptimizer.OptPass1SETcc(var p: tai): boolean;
  2762. var
  2763. hp1,hp2,next: tai; SetC, JumpC: TAsmCond; Unconditional: Boolean;
  2764. begin
  2765. Result:=false;
  2766. if MatchOpType(taicpu(p),top_reg) and
  2767. GetNextInstruction(p, hp1) and
  2768. ((MatchInstruction(hp1, A_TEST, [S_B]) and
  2769. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2770. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg)) or
  2771. (MatchInstruction(hp1, A_CMP, [S_B]) and
  2772. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2773. (taicpu(hp1).oper[0]^.val=0))
  2774. ) and
  2775. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  2776. GetNextInstruction(hp1, hp2) and
  2777. MatchInstruction(hp2, A_Jcc, []) then
  2778. { Change from: To:
  2779. set(C) %reg j(~C) label
  2780. test %reg,%reg/cmp $0,%reg
  2781. je label
  2782. set(C) %reg j(C) label
  2783. test %reg,%reg/cmp $0,%reg
  2784. jne label
  2785. }
  2786. begin
  2787. next := tai(p.Next);
  2788. TransferUsedRegs(TmpUsedRegs);
  2789. UpdateUsedRegs(TmpUsedRegs, next);
  2790. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2791. JumpC := taicpu(hp2).condition;
  2792. Unconditional := False;
  2793. if conditions_equal(JumpC, C_E) then
  2794. SetC := inverse_cond(taicpu(p).condition)
  2795. else if conditions_equal(JumpC, C_NE) then
  2796. SetC := taicpu(p).condition
  2797. else
  2798. { We've got something weird here (and inefficent) }
  2799. begin
  2800. DebugMsg('DEBUG: Inefficient jump - check code generation', p);
  2801. SetC := C_NONE;
  2802. { JAE/JNB will always branch (use 'condition_in', since C_AE <> C_NB normally) }
  2803. if condition_in(C_AE, JumpC) then
  2804. Unconditional := True
  2805. else
  2806. { Not sure what to do with this jump - drop out }
  2807. Exit;
  2808. end;
  2809. asml.Remove(hp1);
  2810. hp1.Free;
  2811. if Unconditional then
  2812. MakeUnconditional(taicpu(hp2))
  2813. else
  2814. begin
  2815. if SetC = C_NONE then
  2816. InternalError(2018061401);
  2817. taicpu(hp2).SetCondition(SetC);
  2818. end;
  2819. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs) then
  2820. begin
  2821. asml.Remove(p);
  2822. UpdateUsedRegs(next);
  2823. p.Free;
  2824. Result := True;
  2825. p := hp2;
  2826. end;
  2827. DebugMsg(SPeepholeOptimization + 'SETcc/TESTCmp/Jcc -> Jcc',p);
  2828. end;
  2829. end;
  2830. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  2831. { returns true if a "continue" should be done after this optimization }
  2832. var
  2833. hp1, hp2: tai;
  2834. begin
  2835. Result := false;
  2836. if MatchOpType(taicpu(p),top_ref) and
  2837. GetNextInstruction(p, hp1) and
  2838. (hp1.typ = ait_instruction) and
  2839. (((taicpu(hp1).opcode = A_FLD) and
  2840. (taicpu(p).opcode = A_FSTP)) or
  2841. ((taicpu(p).opcode = A_FISTP) and
  2842. (taicpu(hp1).opcode = A_FILD))) and
  2843. MatchOpType(taicpu(hp1),top_ref) and
  2844. (taicpu(hp1).opsize = taicpu(p).opsize) and
  2845. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2846. begin
  2847. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  2848. if (taicpu(p).opsize=S_FX) and
  2849. GetNextInstruction(hp1, hp2) and
  2850. (hp2.typ = ait_instruction) and
  2851. IsExitCode(hp2) and
  2852. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  2853. not(assigned(current_procinfo.procdef.funcretsym) and
  2854. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  2855. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  2856. begin
  2857. asml.remove(p);
  2858. asml.remove(hp1);
  2859. p.free;
  2860. hp1.free;
  2861. p := hp2;
  2862. RemoveLastDeallocForFuncRes(p);
  2863. Result := true;
  2864. end
  2865. (* can't be done because the store operation rounds
  2866. else
  2867. { fst can't store an extended value! }
  2868. if (taicpu(p).opsize <> S_FX) and
  2869. (taicpu(p).opsize <> S_IQ) then
  2870. begin
  2871. if (taicpu(p).opcode = A_FSTP) then
  2872. taicpu(p).opcode := A_FST
  2873. else taicpu(p).opcode := A_FIST;
  2874. asml.remove(hp1);
  2875. hp1.free;
  2876. end
  2877. *)
  2878. end;
  2879. end;
  2880. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  2881. var
  2882. hp1, hp2: tai;
  2883. begin
  2884. result:=false;
  2885. if MatchOpType(taicpu(p),top_reg) and
  2886. GetNextInstruction(p, hp1) and
  2887. (hp1.typ = Ait_Instruction) and
  2888. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2889. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  2890. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  2891. { change to
  2892. fld reg fxxx reg,st
  2893. fxxxp st, st1 (hp1)
  2894. Remark: non commutative operations must be reversed!
  2895. }
  2896. begin
  2897. case taicpu(hp1).opcode Of
  2898. A_FMULP,A_FADDP,
  2899. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2900. begin
  2901. case taicpu(hp1).opcode Of
  2902. A_FADDP: taicpu(hp1).opcode := A_FADD;
  2903. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  2904. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  2905. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  2906. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  2907. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  2908. else
  2909. internalerror(2019050534);
  2910. end;
  2911. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  2912. taicpu(hp1).oper[1]^.reg := NR_ST;
  2913. asml.remove(p);
  2914. p.free;
  2915. p := hp1;
  2916. Result:=true;
  2917. exit;
  2918. end;
  2919. else
  2920. ;
  2921. end;
  2922. end
  2923. else
  2924. if MatchOpType(taicpu(p),top_ref) and
  2925. GetNextInstruction(p, hp2) and
  2926. (hp2.typ = Ait_Instruction) and
  2927. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2928. (taicpu(p).opsize in [S_FS, S_FL]) and
  2929. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  2930. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  2931. if GetLastInstruction(p, hp1) and
  2932. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  2933. MatchOpType(taicpu(hp1),top_ref) and
  2934. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2935. if ((taicpu(hp2).opcode = A_FMULP) or
  2936. (taicpu(hp2).opcode = A_FADDP)) then
  2937. { change to
  2938. fld/fst mem1 (hp1) fld/fst mem1
  2939. fld mem1 (p) fadd/
  2940. faddp/ fmul st, st
  2941. fmulp st, st1 (hp2) }
  2942. begin
  2943. asml.remove(p);
  2944. p.free;
  2945. p := hp1;
  2946. if (taicpu(hp2).opcode = A_FADDP) then
  2947. taicpu(hp2).opcode := A_FADD
  2948. else
  2949. taicpu(hp2).opcode := A_FMUL;
  2950. taicpu(hp2).oper[1]^.reg := NR_ST;
  2951. end
  2952. else
  2953. { change to
  2954. fld/fst mem1 (hp1) fld/fst mem1
  2955. fld mem1 (p) fld st}
  2956. begin
  2957. taicpu(p).changeopsize(S_FL);
  2958. taicpu(p).loadreg(0,NR_ST);
  2959. end
  2960. else
  2961. begin
  2962. case taicpu(hp2).opcode Of
  2963. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2964. { change to
  2965. fld/fst mem1 (hp1) fld/fst mem1
  2966. fld mem2 (p) fxxx mem2
  2967. fxxxp st, st1 (hp2) }
  2968. begin
  2969. case taicpu(hp2).opcode Of
  2970. A_FADDP: taicpu(p).opcode := A_FADD;
  2971. A_FMULP: taicpu(p).opcode := A_FMUL;
  2972. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  2973. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  2974. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  2975. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  2976. else
  2977. internalerror(2019050533);
  2978. end;
  2979. asml.remove(hp2);
  2980. hp2.free;
  2981. end
  2982. else
  2983. ;
  2984. end
  2985. end
  2986. end;
  2987. function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
  2988. var
  2989. v: TCGInt;
  2990. hp1, hp2, hp3, hp4: tai;
  2991. begin
  2992. Result:=false;
  2993. { cmp register,$8000 neg register
  2994. je target --> jo target
  2995. .... only if register is deallocated before jump.}
  2996. case Taicpu(p).opsize of
  2997. S_B: v:=$80;
  2998. S_W: v:=$8000;
  2999. S_L: v:=qword($80000000);
  3000. { actually, this will never happen: cmp with 64 bit constants is not possible }
  3001. S_Q : v:=Int64($8000000000000000);
  3002. else
  3003. internalerror(2013112905);
  3004. end;
  3005. if MatchOpType(taicpu(p),Top_const,top_reg) and
  3006. (taicpu(p).oper[0]^.val=v) and
  3007. GetNextInstruction(p, hp1) and
  3008. MatchInstruction(hp1,A_Jcc,[]) and
  3009. (Taicpu(hp1).condition in [C_E,C_NE]) then
  3010. begin
  3011. TransferUsedRegs(TmpUsedRegs);
  3012. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  3013. if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
  3014. begin
  3015. DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
  3016. Taicpu(p).opcode:=A_NEG;
  3017. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  3018. Taicpu(p).clearop(1);
  3019. Taicpu(p).ops:=1;
  3020. if Taicpu(hp1).condition=C_E then
  3021. Taicpu(hp1).condition:=C_O
  3022. else
  3023. Taicpu(hp1).condition:=C_NO;
  3024. Result:=true;
  3025. exit;
  3026. end;
  3027. end;
  3028. end;
  3029. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  3030. function IsXCHGAcceptable: Boolean; inline;
  3031. begin
  3032. { Always accept if optimising for size }
  3033. Result := (cs_opt_size in current_settings.optimizerswitches) or
  3034. (
  3035. {$ifdef x86_64}
  3036. { XCHG takes 3 cycles on AMD Athlon64 }
  3037. (current_settings.optimizecputype >= cpu_core_i)
  3038. {$else x86_64}
  3039. { From the Pentium M onwards, XCHG only has a latency of 2 rather
  3040. than 3, so it becomes a saving compared to three MOVs with two of
  3041. them able to execute simultaneously. [Kit] }
  3042. (current_settings.optimizecputype >= cpu_PentiumM)
  3043. {$endif x86_64}
  3044. );
  3045. end;
  3046. var
  3047. hp1,hp2: tai;
  3048. {$ifdef x86_64}
  3049. hp3: tai;
  3050. {$endif x86_64}
  3051. begin
  3052. Result:=false;
  3053. if not GetNextInstruction(p, hp1) then
  3054. Exit;
  3055. if MatchInstruction(hp1, A_JMP, [S_NO]) then
  3056. begin
  3057. { Sometimes the MOVs that OptPass2JMP produces can be improved
  3058. further, but we can't just put this jump optimisation in pass 1
  3059. because it tends to perform worse when conditional jumps are
  3060. nearby (e.g. when converting CMOV instructions). [Kit] }
  3061. if OptPass2JMP(hp1) then
  3062. { call OptPass1MOV once to potentially merge any MOVs that were created }
  3063. Result := OptPass1MOV(p)
  3064. { OptPass2MOV will now exit but will be called again if OptPass1MOV
  3065. returned True and the instruction is still a MOV, thus checking
  3066. the optimisations below }
  3067. else
  3068. { Since OptPass2JMP returned false, no optimisations were done to
  3069. the jump. Additionally, a label will definitely follow the jump
  3070. (although it may have become dead), so skip ahead as far as
  3071. possible }
  3072. begin
  3073. while (p <> hp1) do
  3074. begin
  3075. { Nothing changed between the MOV and the JMP, so
  3076. don't bother with "UpdateUsedRegsAndOptimize" }
  3077. UpdateUsedRegs(p);
  3078. p := tai(p.Next);
  3079. end;
  3080. { Use "UpdateUsedRegsAndOptimize" here though, because the
  3081. label might now be dead and can be stripped out }
  3082. p := tai(UpdateUsedRegsAndOptimize(hp1).Next);
  3083. { If p is a label, then Result will be False and program flow
  3084. will move onto the next list entry in "PeepHoleOptPass2" }
  3085. if (p = BlockEnd) or not (p.typ in [ait_align, ait_label]) then
  3086. Result := True;
  3087. end;
  3088. end
  3089. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3090. {$ifdef x86_64}
  3091. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  3092. {$else x86_64}
  3093. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  3094. {$endif x86_64}
  3095. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3096. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  3097. { mov reg1, reg2 mov reg1, reg2
  3098. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  3099. begin
  3100. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  3101. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  3102. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  3103. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  3104. TransferUsedRegs(TmpUsedRegs);
  3105. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3106. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  3107. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  3108. then
  3109. begin
  3110. asml.remove(p);
  3111. p.free;
  3112. p := hp1;
  3113. Result:=true;
  3114. end;
  3115. exit;
  3116. end
  3117. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3118. IsXCHGAcceptable and
  3119. { XCHG doesn't support 8-byte registers }
  3120. (taicpu(p).opsize <> S_B) and
  3121. MatchInstruction(hp1, A_MOV, []) and
  3122. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3123. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  3124. GetNextInstruction(hp1, hp2) and
  3125. MatchInstruction(hp2, A_MOV, []) and
  3126. { Don't need to call MatchOpType for hp2 because the operand matches below cover for it }
  3127. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  3128. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) then
  3129. begin
  3130. { mov %reg1,%reg2
  3131. mov %reg3,%reg1 -> xchg %reg3,%reg1
  3132. mov %reg2,%reg3
  3133. (%reg2 not used afterwards)
  3134. Note that xchg takes 3 cycles to execute, and generally mov's take
  3135. only one cycle apiece, but the first two mov's can be executed in
  3136. parallel, only taking 2 cycles overall. Older processors should
  3137. therefore only optimise for size. [Kit]
  3138. }
  3139. TransferUsedRegs(TmpUsedRegs);
  3140. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3141. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3142. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs) then
  3143. begin
  3144. DebugMsg(SPeepholeOptimization + 'MovMovMov2XChg', p);
  3145. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp1, UsedRegs);
  3146. taicpu(hp1).opcode := A_XCHG;
  3147. asml.Remove(p);
  3148. asml.Remove(hp2);
  3149. p.Free;
  3150. hp2.Free;
  3151. p := hp1;
  3152. Result := True;
  3153. Exit;
  3154. end;
  3155. end
  3156. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3157. {$ifdef x86_64}
  3158. MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
  3159. {$else x86_64}
  3160. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  3161. {$endif x86_64}
  3162. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  3163. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  3164. or
  3165. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  3166. ) and
  3167. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  3168. { mov reg1, reg2
  3169. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  3170. begin
  3171. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  3172. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  3173. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  3174. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  3175. DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
  3176. asml.remove(p);
  3177. p.free;
  3178. p := hp1;
  3179. Result:=true;
  3180. exit;
  3181. end
  3182. else if (taicpu(p).oper[0]^.typ = top_ref) and
  3183. (hp1.typ = ait_instruction) and
  3184. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  3185. doing it separately in both branches allows to do the cheap checks
  3186. with low probability earlier }
  3187. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  3188. GetNextInstruction(hp1,hp2) and
  3189. MatchInstruction(hp2,A_MOV,[])
  3190. ) or
  3191. ((taicpu(hp1).opcode=A_LEA) and
  3192. GetNextInstruction(hp1,hp2) and
  3193. MatchInstruction(hp2,A_MOV,[]) and
  3194. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  3195. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  3196. ) or
  3197. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  3198. taicpu(p).oper[1]^.reg) and
  3199. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  3200. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  3201. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  3202. ) and
  3203. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  3204. )
  3205. ) and
  3206. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  3207. (taicpu(hp2).oper[1]^.typ = top_ref) then
  3208. begin
  3209. TransferUsedRegs(TmpUsedRegs);
  3210. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  3211. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  3212. if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and
  3213. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then
  3214. { change mov (ref), reg
  3215. add/sub/or/... reg2/$const, reg
  3216. mov reg, (ref)
  3217. # release reg
  3218. to add/sub/or/... reg2/$const, (ref) }
  3219. begin
  3220. case taicpu(hp1).opcode of
  3221. A_INC,A_DEC,A_NOT,A_NEG :
  3222. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  3223. A_LEA :
  3224. begin
  3225. taicpu(hp1).opcode:=A_ADD;
  3226. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  3227. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  3228. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  3229. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  3230. else
  3231. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  3232. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  3233. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  3234. end
  3235. else
  3236. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  3237. end;
  3238. asml.remove(p);
  3239. asml.remove(hp2);
  3240. p.free;
  3241. hp2.free;
  3242. p := hp1
  3243. end;
  3244. Exit;
  3245. {$ifdef x86_64}
  3246. end
  3247. else if (taicpu(p).opsize = S_L) and
  3248. (taicpu(p).oper[1]^.typ = top_reg) and
  3249. (
  3250. MatchInstruction(hp1, A_MOV,[]) and
  3251. (taicpu(hp1).opsize = S_L) and
  3252. (taicpu(hp1).oper[1]^.typ = top_reg)
  3253. ) and (
  3254. GetNextInstruction(hp1, hp2) and
  3255. (tai(hp2).typ=ait_instruction) and
  3256. (taicpu(hp2).opsize = S_Q) and
  3257. (
  3258. (
  3259. MatchInstruction(hp2, A_ADD,[]) and
  3260. (taicpu(hp2).opsize = S_Q) and
  3261. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  3262. (
  3263. (
  3264. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  3265. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3266. ) or (
  3267. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3268. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  3269. )
  3270. )
  3271. ) or (
  3272. MatchInstruction(hp2, A_LEA,[]) and
  3273. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  3274. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  3275. (
  3276. (
  3277. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  3278. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3279. ) or (
  3280. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3281. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  3282. )
  3283. ) and (
  3284. (
  3285. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3286. ) or (
  3287. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  3288. )
  3289. )
  3290. )
  3291. )
  3292. ) and (
  3293. GetNextInstruction(hp2, hp3) and
  3294. MatchInstruction(hp3, A_SHR,[]) and
  3295. (taicpu(hp3).opsize = S_Q) and
  3296. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  3297. (taicpu(hp3).oper[0]^.val = 1) and
  3298. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  3299. ) then
  3300. begin
  3301. { Change movl x, reg1d movl x, reg1d
  3302. movl y, reg2d movl y, reg2d
  3303. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  3304. shrq $1, reg1q shrq $1, reg1q
  3305. ( reg1d and reg2d can be switched around in the first two instructions )
  3306. To movl x, reg1d
  3307. addl y, reg1d
  3308. rcrl $1, reg1d
  3309. This corresponds to the common expression (x + y) shr 1, where
  3310. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  3311. smaller code, but won't account for x + y causing an overflow). [Kit]
  3312. }
  3313. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  3314. { Change first MOV command to have the same register as the final output }
  3315. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  3316. else
  3317. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  3318. { Change second MOV command to an ADD command. This is easier than
  3319. converting the existing command because it means we don't have to
  3320. touch 'y', which might be a complicated reference, and also the
  3321. fact that the third command might either be ADD or LEA. [Kit] }
  3322. taicpu(hp1).opcode := A_ADD;
  3323. { Delete old ADD/LEA instruction }
  3324. asml.remove(hp2);
  3325. hp2.free;
  3326. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  3327. taicpu(hp3).opcode := A_RCR;
  3328. taicpu(hp3).changeopsize(S_L);
  3329. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  3330. {$endif x86_64}
  3331. end;
  3332. end;
  3333. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  3334. var
  3335. hp1 : tai;
  3336. begin
  3337. Result:=false;
  3338. if (taicpu(p).ops >= 2) and
  3339. ((taicpu(p).oper[0]^.typ = top_const) or
  3340. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  3341. (taicpu(p).oper[1]^.typ = top_reg) and
  3342. ((taicpu(p).ops = 2) or
  3343. ((taicpu(p).oper[2]^.typ = top_reg) and
  3344. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  3345. GetLastInstruction(p,hp1) and
  3346. MatchInstruction(hp1,A_MOV,[]) and
  3347. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3348. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3349. begin
  3350. TransferUsedRegs(TmpUsedRegs);
  3351. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) or
  3352. ((taicpu(p).ops = 3) and (taicpu(p).oper[1]^.reg=taicpu(p).oper[2]^.reg)) then
  3353. { change
  3354. mov reg1,reg2
  3355. imul y,reg2 to imul y,reg1,reg2 }
  3356. begin
  3357. taicpu(p).ops := 3;
  3358. taicpu(p).loadreg(2,taicpu(p).oper[1]^.reg);
  3359. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  3360. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  3361. asml.remove(hp1);
  3362. hp1.free;
  3363. result:=true;
  3364. end;
  3365. end;
  3366. end;
  3367. procedure TX86AsmOptimizer.ConvertJumpToRET(const p: tai; const ret_p: tai);
  3368. var
  3369. ThisLabel: TAsmLabel;
  3370. begin
  3371. ThisLabel := tasmlabel(taicpu(p).oper[0]^.ref^.symbol);
  3372. ThisLabel.decrefs;
  3373. taicpu(p).opcode := A_RET;
  3374. taicpu(p).is_jmp := false;
  3375. taicpu(p).ops := taicpu(ret_p).ops;
  3376. case taicpu(ret_p).ops of
  3377. 0:
  3378. taicpu(p).clearop(0);
  3379. 1:
  3380. taicpu(p).loadconst(0,taicpu(ret_p).oper[0]^.val);
  3381. else
  3382. internalerror(2016041301);
  3383. end;
  3384. { If the original label is now dead, it might turn out that the label
  3385. immediately follows p. As a result, everything beyond it, which will
  3386. be just some final register configuration and a RET instruction, is
  3387. now dead code. [Kit] }
  3388. { NOTE: This is much faster than introducing a OptPass2RET routine and
  3389. running RemoveDeadCodeAfterJump for each RET instruction, because
  3390. this optimisation rarely happens and most RETs appear at the end of
  3391. routines where there is nothing that can be stripped. [Kit] }
  3392. if not ThisLabel.is_used then
  3393. RemoveDeadCodeAfterJump(p);
  3394. end;
  3395. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  3396. var
  3397. hp1, hp2 : tai;
  3398. begin
  3399. result:=false;
  3400. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  3401. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  3402. begin
  3403. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  3404. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ = ait_instruction) then
  3405. begin
  3406. case taicpu(hp1).opcode of
  3407. A_RET:
  3408. {
  3409. change
  3410. jmp .L1
  3411. ...
  3412. .L1:
  3413. ret
  3414. into
  3415. ret
  3416. }
  3417. begin
  3418. ConvertJumpToRET(p, hp1);
  3419. result:=true;
  3420. end;
  3421. A_MOV:
  3422. {
  3423. change
  3424. jmp .L1
  3425. ...
  3426. .L1:
  3427. mov ##, ##
  3428. ret
  3429. into
  3430. mov ##, ##
  3431. ret
  3432. }
  3433. { This optimisation tends to increase code size if the pass 1 MOV optimisations aren't
  3434. re-run, so only do this particular optimisation if optimising for speed or when
  3435. optimisations are very in-depth. [Kit] }
  3436. if (current_settings.optimizerswitches * [cs_opt_level3, cs_opt_size]) <> [cs_opt_size] then
  3437. begin
  3438. GetNextInstruction(hp1, hp2);
  3439. if not Assigned(hp2) then
  3440. Exit;
  3441. if (hp2.typ in [ait_label, ait_align]) then
  3442. SkipLabels(hp2,hp2);
  3443. if Assigned(hp2) and MatchInstruction(hp2, A_RET, [S_NO]) then
  3444. begin
  3445. { Duplicate the MOV instruction }
  3446. asml.InsertBefore(hp1.getcopy, p);
  3447. { Now change the jump into a RET instruction }
  3448. ConvertJumpToRET(p, hp2);
  3449. result:=true;
  3450. end;
  3451. end;
  3452. else
  3453. { Do nothing };
  3454. end;
  3455. end;
  3456. end;
  3457. end;
  3458. function CanBeCMOV(p : tai) : boolean;
  3459. begin
  3460. CanBeCMOV:=assigned(p) and
  3461. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  3462. { we can't use cmov ref,reg because
  3463. ref could be nil and cmov still throws an exception
  3464. if ref=nil but the mov isn't done (FK)
  3465. or ((taicpu(p).oper[0]^.typ = top_ref) and
  3466. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  3467. }
  3468. (MatchOpType(taicpu(p),top_reg,top_reg) or
  3469. { allow references, but only pure symbols or got rel. addressing with RIP as based,
  3470. it is not expected that this can cause a seg. violation }
  3471. (MatchOpType(taicpu(p),top_ref,top_reg) and
  3472. (((taicpu(p).oper[0]^.ref^.base=NR_NO) and (taicpu(p).oper[0]^.ref^.refaddr=addr_no)){$ifdef x86_64} or
  3473. ((taicpu(p).oper[0]^.ref^.base=NR_RIP) and (taicpu(p).oper[0]^.ref^.refaddr=addr_pic)){$endif x86_64}
  3474. ) and
  3475. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  3476. (taicpu(p).oper[0]^.ref^.offset=0)
  3477. )
  3478. );
  3479. end;
  3480. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  3481. var
  3482. hp1,hp2,hp3,hp4,hpmov2: tai;
  3483. carryadd_opcode : TAsmOp;
  3484. l : Longint;
  3485. condition : TAsmCond;
  3486. symbol: TAsmSymbol;
  3487. begin
  3488. result:=false;
  3489. symbol:=nil;
  3490. if GetNextInstruction(p,hp1) then
  3491. begin
  3492. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  3493. if (hp1.typ=ait_instruction) and
  3494. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  3495. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  3496. { jb @@1 cmc
  3497. inc/dec operand --> adc/sbb operand,0
  3498. @@1:
  3499. ... and ...
  3500. jnb @@1
  3501. inc/dec operand --> adc/sbb operand,0
  3502. @@1: }
  3503. begin
  3504. carryadd_opcode:=A_NONE;
  3505. if Taicpu(p).condition in [C_NAE,C_B] then
  3506. begin
  3507. if Taicpu(hp1).opcode=A_INC then
  3508. carryadd_opcode:=A_ADC;
  3509. if Taicpu(hp1).opcode=A_DEC then
  3510. carryadd_opcode:=A_SBB;
  3511. if carryadd_opcode<>A_NONE then
  3512. begin
  3513. Taicpu(p).clearop(0);
  3514. Taicpu(p).ops:=0;
  3515. Taicpu(p).is_jmp:=false;
  3516. Taicpu(p).opcode:=A_CMC;
  3517. Taicpu(p).condition:=C_NONE;
  3518. Taicpu(hp1).ops:=2;
  3519. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  3520. Taicpu(hp1).loadconst(0,0);
  3521. Taicpu(hp1).opcode:=carryadd_opcode;
  3522. result:=true;
  3523. exit;
  3524. end;
  3525. end;
  3526. if Taicpu(p).condition in [C_AE,C_NB] then
  3527. begin
  3528. if Taicpu(hp1).opcode=A_INC then
  3529. carryadd_opcode:=A_ADC;
  3530. if Taicpu(hp1).opcode=A_DEC then
  3531. carryadd_opcode:=A_SBB;
  3532. if carryadd_opcode<>A_NONE then
  3533. begin
  3534. asml.remove(p);
  3535. p.free;
  3536. Taicpu(hp1).ops:=2;
  3537. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  3538. Taicpu(hp1).loadconst(0,0);
  3539. Taicpu(hp1).opcode:=carryadd_opcode;
  3540. p:=hp1;
  3541. result:=true;
  3542. exit;
  3543. end;
  3544. end;
  3545. end;
  3546. { Detect the following:
  3547. jmp<cond> @Lbl1
  3548. jmp @Lbl2
  3549. ...
  3550. @Lbl1:
  3551. ret
  3552. Change to:
  3553. jmp<inv_cond> @Lbl2
  3554. ret
  3555. }
  3556. if MatchInstruction(hp1,A_JMP,[]) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  3557. begin
  3558. hp2:=getlabelwithsym(TAsmLabel(symbol));
  3559. if Assigned(hp2) and SkipLabels(hp2,hp2) and
  3560. MatchInstruction(hp2,A_RET,[S_NO]) then
  3561. begin
  3562. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  3563. { Change label address to that of the unconditional jump }
  3564. taicpu(p).loadoper(0, taicpu(hp1).oper[0]^);
  3565. TAsmLabel(symbol).DecRefs;
  3566. taicpu(hp1).opcode := A_RET;
  3567. taicpu(hp1).is_jmp := false;
  3568. taicpu(hp1).ops := taicpu(hp2).ops;
  3569. DebugMsg(SPeepholeOptimization+'JccJmpRet2J!ccRet',p);
  3570. case taicpu(hp2).ops of
  3571. 0:
  3572. taicpu(hp1).clearop(0);
  3573. 1:
  3574. taicpu(hp1).loadconst(0,taicpu(hp2).oper[0]^.val);
  3575. else
  3576. internalerror(2016041302);
  3577. end;
  3578. end;
  3579. end;
  3580. end;
  3581. {$ifndef i8086}
  3582. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  3583. begin
  3584. { check for
  3585. jCC xxx
  3586. <several movs>
  3587. xxx:
  3588. }
  3589. l:=0;
  3590. GetNextInstruction(p, hp1);
  3591. while assigned(hp1) and
  3592. CanBeCMOV(hp1) and
  3593. { stop on labels }
  3594. not(hp1.typ=ait_label) do
  3595. begin
  3596. inc(l);
  3597. GetNextInstruction(hp1,hp1);
  3598. end;
  3599. if assigned(hp1) then
  3600. begin
  3601. if FindLabel(tasmlabel(symbol),hp1) then
  3602. begin
  3603. if (l<=4) and (l>0) then
  3604. begin
  3605. condition:=inverse_cond(taicpu(p).condition);
  3606. GetNextInstruction(p,hp1);
  3607. repeat
  3608. if not Assigned(hp1) then
  3609. InternalError(2018062900);
  3610. taicpu(hp1).opcode:=A_CMOVcc;
  3611. taicpu(hp1).condition:=condition;
  3612. UpdateUsedRegs(hp1);
  3613. GetNextInstruction(hp1,hp1);
  3614. until not(CanBeCMOV(hp1));
  3615. { Remember what hp1 is in case there's multiple aligns to get rid of }
  3616. hp2 := hp1;
  3617. repeat
  3618. if not Assigned(hp2) then
  3619. InternalError(2018062910);
  3620. case hp2.typ of
  3621. ait_label:
  3622. { What we expected - break out of the loop (it won't be a dead label at the top of
  3623. a cluster because that was optimised at an earlier stage) }
  3624. Break;
  3625. ait_align:
  3626. { Go to the next entry until a label is found (may be multiple aligns before it) }
  3627. begin
  3628. hp2 := tai(hp2.Next);
  3629. Continue;
  3630. end;
  3631. else
  3632. begin
  3633. { Might be a comment or temporary allocation entry }
  3634. if not (hp2.typ in SkipInstr) then
  3635. InternalError(2018062911);
  3636. hp2 := tai(hp2.Next);
  3637. Continue;
  3638. end;
  3639. end;
  3640. until False;
  3641. { Now we can safely decrement the reference count }
  3642. tasmlabel(symbol).decrefs;
  3643. DebugMsg(SPeepholeOptimization+'JccMov2CMov',p);
  3644. { Remove the original jump }
  3645. asml.Remove(p);
  3646. p.Free;
  3647. GetNextInstruction(hp2, p); { Instruction after the label }
  3648. { Remove the label if this is its final reference }
  3649. if (tasmlabel(symbol).getrefs=0) then
  3650. StripLabelFast(hp1);
  3651. if Assigned(p) then
  3652. begin
  3653. UpdateUsedRegs(p);
  3654. result:=true;
  3655. end;
  3656. exit;
  3657. end;
  3658. end
  3659. else
  3660. begin
  3661. { check further for
  3662. jCC xxx
  3663. <several movs 1>
  3664. jmp yyy
  3665. xxx:
  3666. <several movs 2>
  3667. yyy:
  3668. }
  3669. { hp2 points to jmp yyy }
  3670. hp2:=hp1;
  3671. { skip hp1 to xxx (or an align right before it) }
  3672. GetNextInstruction(hp1, hp1);
  3673. if assigned(hp2) and
  3674. assigned(hp1) and
  3675. (l<=3) and
  3676. (hp2.typ=ait_instruction) and
  3677. (taicpu(hp2).is_jmp) and
  3678. (taicpu(hp2).condition=C_None) and
  3679. { real label and jump, no further references to the
  3680. label are allowed }
  3681. (tasmlabel(symbol).getrefs=1) and
  3682. FindLabel(tasmlabel(symbol),hp1) then
  3683. begin
  3684. l:=0;
  3685. { skip hp1 to <several moves 2> }
  3686. if (hp1.typ = ait_align) then
  3687. GetNextInstruction(hp1, hp1);
  3688. GetNextInstruction(hp1, hpmov2);
  3689. hp1 := hpmov2;
  3690. while assigned(hp1) and
  3691. CanBeCMOV(hp1) do
  3692. begin
  3693. inc(l);
  3694. GetNextInstruction(hp1, hp1);
  3695. end;
  3696. { hp1 points to yyy (or an align right before it) }
  3697. hp3 := hp1;
  3698. if assigned(hp1) and
  3699. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  3700. begin
  3701. condition:=inverse_cond(taicpu(p).condition);
  3702. GetNextInstruction(p,hp1);
  3703. repeat
  3704. taicpu(hp1).opcode:=A_CMOVcc;
  3705. taicpu(hp1).condition:=condition;
  3706. UpdateUsedRegs(hp1);
  3707. GetNextInstruction(hp1,hp1);
  3708. until not(assigned(hp1)) or
  3709. not(CanBeCMOV(hp1));
  3710. condition:=inverse_cond(condition);
  3711. hp1 := hpmov2;
  3712. { hp1 is now at <several movs 2> }
  3713. while Assigned(hp1) and CanBeCMOV(hp1) do
  3714. begin
  3715. taicpu(hp1).opcode:=A_CMOVcc;
  3716. taicpu(hp1).condition:=condition;
  3717. UpdateUsedRegs(hp1);
  3718. GetNextInstruction(hp1,hp1);
  3719. end;
  3720. hp1 := p;
  3721. { Get first instruction after label }
  3722. GetNextInstruction(hp3, p);
  3723. if assigned(p) and (hp3.typ = ait_align) then
  3724. GetNextInstruction(p, p);
  3725. { Don't dereference yet, as doing so will cause
  3726. GetNextInstruction to skip the label and
  3727. optional align marker. [Kit] }
  3728. GetNextInstruction(hp2, hp4);
  3729. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2CMovCMov',hp1);
  3730. { remove jCC }
  3731. asml.remove(hp1);
  3732. hp1.free;
  3733. { Now we can safely decrement it }
  3734. tasmlabel(symbol).decrefs;
  3735. { Remove label xxx (it will have a ref of zero due to the initial check }
  3736. StripLabelFast(hp4);
  3737. { remove jmp }
  3738. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  3739. asml.remove(hp2);
  3740. hp2.free;
  3741. { As before, now we can safely decrement it }
  3742. tasmlabel(symbol).decrefs;
  3743. { Remove label yyy (and the optional alignment) if its reference falls to zero }
  3744. if tasmlabel(symbol).getrefs = 0 then
  3745. StripLabelFast(hp3);
  3746. if Assigned(p) then
  3747. begin
  3748. UpdateUsedRegs(p);
  3749. result:=true;
  3750. end;
  3751. exit;
  3752. end;
  3753. end;
  3754. end;
  3755. end;
  3756. end;
  3757. {$endif i8086}
  3758. end;
  3759. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  3760. var
  3761. hp1,hp2: tai;
  3762. begin
  3763. result:=false;
  3764. if (taicpu(p).oper[1]^.typ = top_reg) and
  3765. GetNextInstruction(p,hp1) and
  3766. (hp1.typ = ait_instruction) and
  3767. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  3768. GetNextInstruction(hp1,hp2) and
  3769. MatchInstruction(hp2,A_MOV,[]) and
  3770. (taicpu(hp2).oper[0]^.typ = top_reg) and
  3771. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  3772. {$ifdef i386}
  3773. { not all registers have byte size sub registers on i386 }
  3774. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  3775. {$endif i386}
  3776. (((taicpu(hp1).ops=2) and
  3777. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  3778. ((taicpu(hp1).ops=1) and
  3779. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  3780. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  3781. begin
  3782. { change movsX/movzX reg/ref, reg2
  3783. add/sub/or/... reg3/$const, reg2
  3784. mov reg2 reg/ref
  3785. to add/sub/or/... reg3/$const, reg/ref }
  3786. { by example:
  3787. movswl %si,%eax movswl %si,%eax p
  3788. decl %eax addl %edx,%eax hp1
  3789. movw %ax,%si movw %ax,%si hp2
  3790. ->
  3791. movswl %si,%eax movswl %si,%eax p
  3792. decw %eax addw %edx,%eax hp1
  3793. movw %ax,%si movw %ax,%si hp2
  3794. }
  3795. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3796. {
  3797. ->
  3798. movswl %si,%eax movswl %si,%eax p
  3799. decw %si addw %dx,%si hp1
  3800. movw %ax,%si movw %ax,%si hp2
  3801. }
  3802. case taicpu(hp1).ops of
  3803. 1:
  3804. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  3805. 2:
  3806. begin
  3807. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  3808. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  3809. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3810. end;
  3811. else
  3812. internalerror(2008042701);
  3813. end;
  3814. {
  3815. ->
  3816. decw %si addw %dx,%si p
  3817. }
  3818. DebugMsg(SPeepholeOptimization + 'var3',p);
  3819. asml.remove(p);
  3820. asml.remove(hp2);
  3821. p.free;
  3822. hp2.free;
  3823. p:=hp1;
  3824. end
  3825. else if taicpu(p).opcode=A_MOVZX then
  3826. begin
  3827. { removes superfluous And's after movzx's }
  3828. if (taicpu(p).oper[1]^.typ = top_reg) and
  3829. GetNextInstruction(p, hp1) and
  3830. (tai(hp1).typ = ait_instruction) and
  3831. (taicpu(hp1).opcode = A_AND) and
  3832. (taicpu(hp1).oper[0]^.typ = top_const) and
  3833. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3834. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3835. begin
  3836. case taicpu(p).opsize Of
  3837. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  3838. if (taicpu(hp1).oper[0]^.val = $ff) then
  3839. begin
  3840. DebugMsg(SPeepholeOptimization + 'var4',p);
  3841. asml.remove(hp1);
  3842. hp1.free;
  3843. end;
  3844. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  3845. if (taicpu(hp1).oper[0]^.val = $ffff) then
  3846. begin
  3847. DebugMsg(SPeepholeOptimization + 'var5',p);
  3848. asml.remove(hp1);
  3849. hp1.free;
  3850. end;
  3851. {$ifdef x86_64}
  3852. S_LQ:
  3853. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  3854. begin
  3855. if (cs_asm_source in current_settings.globalswitches) then
  3856. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  3857. asml.remove(hp1);
  3858. hp1.Free;
  3859. end;
  3860. {$endif x86_64}
  3861. else
  3862. ;
  3863. end;
  3864. end;
  3865. { changes some movzx constructs to faster synonims (all examples
  3866. are given with eax/ax, but are also valid for other registers)}
  3867. if (taicpu(p).oper[1]^.typ = top_reg) then
  3868. if (taicpu(p).oper[0]^.typ = top_reg) then
  3869. case taicpu(p).opsize of
  3870. S_BW:
  3871. begin
  3872. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3873. not(cs_opt_size in current_settings.optimizerswitches) then
  3874. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  3875. begin
  3876. taicpu(p).opcode := A_AND;
  3877. taicpu(p).changeopsize(S_W);
  3878. taicpu(p).loadConst(0,$ff);
  3879. DebugMsg(SPeepholeOptimization + 'var7',p);
  3880. end
  3881. else if GetNextInstruction(p, hp1) and
  3882. (tai(hp1).typ = ait_instruction) and
  3883. (taicpu(hp1).opcode = A_AND) and
  3884. (taicpu(hp1).oper[0]^.typ = top_const) and
  3885. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3886. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3887. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  3888. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  3889. begin
  3890. DebugMsg(SPeepholeOptimization + 'var8',p);
  3891. taicpu(p).opcode := A_MOV;
  3892. taicpu(p).changeopsize(S_W);
  3893. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  3894. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3895. end;
  3896. end;
  3897. S_BL:
  3898. begin
  3899. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3900. not(cs_opt_size in current_settings.optimizerswitches) then
  3901. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  3902. begin
  3903. taicpu(p).opcode := A_AND;
  3904. taicpu(p).changeopsize(S_L);
  3905. taicpu(p).loadConst(0,$ff)
  3906. end
  3907. else if GetNextInstruction(p, hp1) and
  3908. (tai(hp1).typ = ait_instruction) and
  3909. (taicpu(hp1).opcode = A_AND) and
  3910. (taicpu(hp1).oper[0]^.typ = top_const) and
  3911. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3912. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3913. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  3914. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  3915. begin
  3916. DebugMsg(SPeepholeOptimization + 'var10',p);
  3917. taicpu(p).opcode := A_MOV;
  3918. taicpu(p).changeopsize(S_L);
  3919. { do not use R_SUBWHOLE
  3920. as movl %rdx,%eax
  3921. is invalid in assembler PM }
  3922. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3923. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3924. end
  3925. end;
  3926. {$ifndef i8086}
  3927. S_WL:
  3928. begin
  3929. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3930. not(cs_opt_size in current_settings.optimizerswitches) then
  3931. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  3932. begin
  3933. DebugMsg(SPeepholeOptimization + 'var11',p);
  3934. taicpu(p).opcode := A_AND;
  3935. taicpu(p).changeopsize(S_L);
  3936. taicpu(p).loadConst(0,$ffff);
  3937. end
  3938. else if GetNextInstruction(p, hp1) and
  3939. (tai(hp1).typ = ait_instruction) and
  3940. (taicpu(hp1).opcode = A_AND) and
  3941. (taicpu(hp1).oper[0]^.typ = top_const) and
  3942. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3943. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3944. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  3945. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  3946. begin
  3947. DebugMsg(SPeepholeOptimization + 'var12',p);
  3948. taicpu(p).opcode := A_MOV;
  3949. taicpu(p).changeopsize(S_L);
  3950. { do not use R_SUBWHOLE
  3951. as movl %rdx,%eax
  3952. is invalid in assembler PM }
  3953. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3954. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3955. end;
  3956. end;
  3957. {$endif i8086}
  3958. else
  3959. ;
  3960. end
  3961. else if (taicpu(p).oper[0]^.typ = top_ref) then
  3962. begin
  3963. if GetNextInstruction(p, hp1) and
  3964. (tai(hp1).typ = ait_instruction) and
  3965. (taicpu(hp1).opcode = A_AND) and
  3966. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3967. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3968. begin
  3969. //taicpu(p).opcode := A_MOV;
  3970. case taicpu(p).opsize Of
  3971. S_BL:
  3972. begin
  3973. DebugMsg(SPeepholeOptimization + 'var13',p);
  3974. taicpu(hp1).changeopsize(S_L);
  3975. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3976. end;
  3977. S_WL:
  3978. begin
  3979. DebugMsg(SPeepholeOptimization + 'var14',p);
  3980. taicpu(hp1).changeopsize(S_L);
  3981. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3982. end;
  3983. S_BW:
  3984. begin
  3985. DebugMsg(SPeepholeOptimization + 'var15',p);
  3986. taicpu(hp1).changeopsize(S_W);
  3987. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3988. end;
  3989. {$ifdef x86_64}
  3990. S_BQ:
  3991. begin
  3992. DebugMsg(SPeepholeOptimization + 'var16',p);
  3993. taicpu(hp1).changeopsize(S_Q);
  3994. taicpu(hp1).loadConst(
  3995. 0, taicpu(hp1).oper[0]^.val and $ff);
  3996. end;
  3997. S_WQ:
  3998. begin
  3999. DebugMsg(SPeepholeOptimization + 'var17',p);
  4000. taicpu(hp1).changeopsize(S_Q);
  4001. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  4002. end;
  4003. S_LQ:
  4004. begin
  4005. DebugMsg(SPeepholeOptimization + 'var18',p);
  4006. taicpu(hp1).changeopsize(S_Q);
  4007. taicpu(hp1).loadConst(
  4008. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  4009. end;
  4010. {$endif x86_64}
  4011. else
  4012. Internalerror(2017050704)
  4013. end;
  4014. end;
  4015. end;
  4016. end;
  4017. end;
  4018. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  4019. var
  4020. hp1 : tai;
  4021. MaskLength : Cardinal;
  4022. begin
  4023. Result:=false;
  4024. if GetNextInstruction(p, hp1) then
  4025. begin
  4026. if MatchOpType(taicpu(p),top_const,top_reg) and
  4027. MatchInstruction(hp1,A_AND,[]) and
  4028. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4029. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4030. { the second register must contain the first one, so compare their subreg types }
  4031. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  4032. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  4033. { change
  4034. and const1, reg
  4035. and const2, reg
  4036. to
  4037. and (const1 and const2), reg
  4038. }
  4039. begin
  4040. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  4041. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  4042. asml.remove(p);
  4043. p.Free;
  4044. p:=hp1;
  4045. Result:=true;
  4046. exit;
  4047. end
  4048. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4049. MatchInstruction(hp1,A_MOVZX,[]) and
  4050. (taicpu(hp1).oper[0]^.typ = top_reg) and
  4051. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  4052. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4053. (((taicpu(p).opsize=S_W) and
  4054. (taicpu(hp1).opsize=S_BW)) or
  4055. ((taicpu(p).opsize=S_L) and
  4056. (taicpu(hp1).opsize in [S_WL,S_BL]))
  4057. {$ifdef x86_64}
  4058. or
  4059. ((taicpu(p).opsize=S_Q) and
  4060. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  4061. {$endif x86_64}
  4062. ) then
  4063. begin
  4064. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  4065. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  4066. ) or
  4067. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  4068. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  4069. then
  4070. begin
  4071. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  4072. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  4073. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  4074. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  4075. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  4076. }
  4077. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  4078. asml.remove(hp1);
  4079. hp1.free;
  4080. Exit;
  4081. end;
  4082. end
  4083. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4084. MatchInstruction(hp1,A_SHL,[]) and
  4085. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4086. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  4087. begin
  4088. {$ifopt R+}
  4089. {$define RANGE_WAS_ON}
  4090. {$R-}
  4091. {$endif}
  4092. { get length of potential and mask }
  4093. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  4094. { really a mask? }
  4095. {$ifdef RANGE_WAS_ON}
  4096. {$R+}
  4097. {$endif}
  4098. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  4099. { unmasked part shifted out? }
  4100. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  4101. begin
  4102. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  4103. { take care of the register (de)allocs following p }
  4104. UpdateUsedRegs(tai(p.next));
  4105. asml.remove(p);
  4106. p.free;
  4107. p:=hp1;
  4108. Result:=true;
  4109. exit;
  4110. end;
  4111. end
  4112. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4113. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  4114. (taicpu(hp1).oper[0]^.typ = top_reg) and
  4115. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  4116. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4117. (((taicpu(p).opsize=S_W) and
  4118. (taicpu(hp1).opsize=S_BW)) or
  4119. ((taicpu(p).opsize=S_L) and
  4120. (taicpu(hp1).opsize in [S_WL,S_BL]))
  4121. {$ifdef x86_64}
  4122. or
  4123. ((taicpu(p).opsize=S_Q) and
  4124. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  4125. {$endif x86_64}
  4126. ) then
  4127. begin
  4128. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  4129. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  4130. ) or
  4131. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  4132. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  4133. {$ifdef x86_64}
  4134. or
  4135. (((taicpu(hp1).opsize)=S_LQ) and
  4136. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  4137. )
  4138. {$endif x86_64}
  4139. then
  4140. begin
  4141. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  4142. asml.remove(hp1);
  4143. hp1.free;
  4144. Exit;
  4145. end;
  4146. end
  4147. else if (taicpu(p).oper[1]^.typ = top_reg) and
  4148. (hp1.typ = ait_instruction) and
  4149. (taicpu(hp1).is_jmp) and
  4150. (taicpu(hp1).opcode<>A_JMP) and
  4151. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  4152. begin
  4153. { change
  4154. and x, reg
  4155. jxx
  4156. to
  4157. test x, reg
  4158. jxx
  4159. if reg is deallocated before the
  4160. jump, but only if it's a conditional jump (PFV)
  4161. }
  4162. taicpu(p).opcode := A_TEST;
  4163. Exit;
  4164. end;
  4165. end;
  4166. { Lone AND tests }
  4167. if MatchOpType(taicpu(p),top_const,top_reg) then
  4168. begin
  4169. {
  4170. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  4171. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  4172. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  4173. }
  4174. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  4175. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  4176. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  4177. begin
  4178. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg)
  4179. end;
  4180. end;
  4181. end;
  4182. function TX86AsmOptimizer.OptPass2Lea(var p : tai) : Boolean;
  4183. begin
  4184. Result:=false;
  4185. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  4186. MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  4187. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  4188. begin
  4189. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  4190. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  4191. taicpu(p).opcode:=A_ADD;
  4192. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  4193. result:=true;
  4194. end
  4195. else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  4196. MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  4197. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  4198. begin
  4199. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  4200. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  4201. taicpu(p).opcode:=A_ADD;
  4202. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  4203. result:=true;
  4204. end;
  4205. end;
  4206. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  4207. function SkipSimpleInstructions(var hp1 : tai) : Boolean;
  4208. begin
  4209. { we can skip all instructions not messing with the stack pointer }
  4210. while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
  4211. A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
  4212. A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
  4213. A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
  4214. ({(taicpu(hp1).ops=0) or }
  4215. ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
  4216. (MatchOpType(taicpu(hp1),top_ref,top_reg))
  4217. ) and }
  4218. not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
  4219. )
  4220. ) do
  4221. GetNextInstruction(hp1,hp1);
  4222. Result:=assigned(hp1);
  4223. end;
  4224. var
  4225. hp1, hp2, hp3: tai;
  4226. begin
  4227. Result:=false;
  4228. { replace
  4229. leal(q) x(<stackpointer>),<stackpointer>
  4230. call procname
  4231. leal(q) -x(<stackpointer>),<stackpointer>
  4232. ret
  4233. by
  4234. jmp procname
  4235. but do it only on level 4 because it destroys stack back traces
  4236. }
  4237. if (cs_opt_level4 in current_settings.optimizerswitches) and
  4238. MatchOpType(taicpu(p),top_ref,top_reg) and
  4239. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  4240. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  4241. { the -8 or -24 are not required, but bail out early if possible,
  4242. higher values are unlikely }
  4243. ((taicpu(p).oper[0]^.ref^.offset=-8) or
  4244. (taicpu(p).oper[0]^.ref^.offset=-24)) and
  4245. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  4246. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  4247. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  4248. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  4249. GetNextInstruction(p, hp1) and
  4250. { trick to skip label }
  4251. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  4252. SkipSimpleInstructions(hp1) and
  4253. MatchInstruction(hp1,A_CALL,[S_NO]) and
  4254. GetNextInstruction(hp1, hp2) and
  4255. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  4256. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  4257. (taicpu(hp2).oper[0]^.ref^.offset=-taicpu(p).oper[0]^.ref^.offset) and
  4258. (taicpu(hp2).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  4259. (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
  4260. (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
  4261. (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
  4262. (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
  4263. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  4264. GetNextInstruction(hp2, hp3) and
  4265. { trick to skip label }
  4266. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  4267. MatchInstruction(hp3,A_RET,[S_NO]) and
  4268. (taicpu(hp3).ops=0) then
  4269. begin
  4270. taicpu(hp1).opcode := A_JMP;
  4271. taicpu(hp1).is_jmp := true;
  4272. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  4273. RemoveCurrentP(p);
  4274. AsmL.Remove(hp2);
  4275. hp2.free;
  4276. AsmL.Remove(hp3);
  4277. hp3.free;
  4278. Result:=true;
  4279. end;
  4280. end;
  4281. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  4282. var
  4283. Value, RegName: string;
  4284. begin
  4285. Result:=false;
  4286. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  4287. begin
  4288. case taicpu(p).oper[0]^.val of
  4289. 0:
  4290. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  4291. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  4292. begin
  4293. { change "mov $0,%reg" into "xor %reg,%reg" }
  4294. taicpu(p).opcode := A_XOR;
  4295. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  4296. Result := True;
  4297. end;
  4298. $1..$FFFFFFFF:
  4299. begin
  4300. { Code size reduction by J. Gareth "Kit" Moreton }
  4301. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  4302. case taicpu(p).opsize of
  4303. S_Q:
  4304. begin
  4305. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  4306. Value := debug_tostr(taicpu(p).oper[0]^.val);
  4307. { The actual optimization }
  4308. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4309. taicpu(p).changeopsize(S_L);
  4310. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  4311. Result := True;
  4312. end;
  4313. else
  4314. { Do nothing };
  4315. end;
  4316. end;
  4317. -1:
  4318. { Don't make this optimisation if the CPU flags are required, since OR scrambles them }
  4319. if (cs_opt_size in current_settings.optimizerswitches) and
  4320. (taicpu(p).opsize <> S_B) and
  4321. not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  4322. begin
  4323. { change "mov $-1,%reg" into "or $-1,%reg" }
  4324. { NOTES:
  4325. - No size saving is made when changing a Word-sized assignment unless the register is AX (smaller encoding)
  4326. - This operation creates a false dependency on the register, so only do it when optimising for size
  4327. - It is possible to set memory operands using this method, but this creates an even greater false dependency, so don't do this at all
  4328. }
  4329. taicpu(p).opcode := A_OR;
  4330. Result := True;
  4331. end;
  4332. end;
  4333. end;
  4334. end;
  4335. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  4336. begin
  4337. Result:=false;
  4338. { change "cmp $0, %reg" to "test %reg, %reg" }
  4339. if MatchOpType(taicpu(p),top_const,top_reg) and
  4340. (taicpu(p).oper[0]^.val = 0) then
  4341. begin
  4342. taicpu(p).opcode := A_TEST;
  4343. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  4344. Result:=true;
  4345. end;
  4346. end;
  4347. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  4348. var
  4349. IsTestConstX : Boolean;
  4350. hp1,hp2 : tai;
  4351. begin
  4352. Result:=false;
  4353. { removes the line marked with (x) from the sequence
  4354. and/or/xor/add/sub/... $x, %y
  4355. test/or %y, %y | test $-1, %y (x)
  4356. j(n)z _Label
  4357. as the first instruction already adjusts the ZF
  4358. %y operand may also be a reference }
  4359. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  4360. MatchOperand(taicpu(p).oper[0]^,-1);
  4361. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  4362. GetLastInstruction(p, hp1) and
  4363. (tai(hp1).typ = ait_instruction) and
  4364. GetNextInstruction(p,hp2) and
  4365. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  4366. case taicpu(hp1).opcode Of
  4367. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  4368. begin
  4369. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  4370. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4371. { and in case of carry for A(E)/B(E)/C/NC }
  4372. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  4373. ((taicpu(hp1).opcode <> A_ADD) and
  4374. (taicpu(hp1).opcode <> A_SUB))) then
  4375. begin
  4376. hp1 := tai(p.next);
  4377. asml.remove(p);
  4378. p.free;
  4379. p := tai(hp1);
  4380. Result:=true;
  4381. end;
  4382. end;
  4383. A_SHL, A_SAL, A_SHR, A_SAR:
  4384. begin
  4385. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  4386. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  4387. { therefore, it's only safe to do this optimization for }
  4388. { shifts by a (nonzero) constant }
  4389. (taicpu(hp1).oper[0]^.typ = top_const) and
  4390. (taicpu(hp1).oper[0]^.val <> 0) and
  4391. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4392. { and in case of carry for A(E)/B(E)/C/NC }
  4393. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  4394. begin
  4395. hp1 := tai(p.next);
  4396. asml.remove(p);
  4397. p.free;
  4398. p := tai(hp1);
  4399. Result:=true;
  4400. end;
  4401. end;
  4402. A_DEC, A_INC, A_NEG:
  4403. begin
  4404. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  4405. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4406. { and in case of carry for A(E)/B(E)/C/NC }
  4407. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  4408. begin
  4409. case taicpu(hp1).opcode of
  4410. A_DEC, A_INC:
  4411. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  4412. begin
  4413. case taicpu(hp1).opcode Of
  4414. A_DEC: taicpu(hp1).opcode := A_SUB;
  4415. A_INC: taicpu(hp1).opcode := A_ADD;
  4416. else
  4417. ;
  4418. end;
  4419. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  4420. taicpu(hp1).loadConst(0,1);
  4421. taicpu(hp1).ops:=2;
  4422. end;
  4423. else
  4424. ;
  4425. end;
  4426. hp1 := tai(p.next);
  4427. asml.remove(p);
  4428. p.free;
  4429. p := tai(hp1);
  4430. Result:=true;
  4431. end;
  4432. end
  4433. else
  4434. { change "test $-1,%reg" into "test %reg,%reg" }
  4435. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  4436. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  4437. end { case }
  4438. { change "test $-1,%reg" into "test %reg,%reg" }
  4439. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  4440. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  4441. end;
  4442. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  4443. var
  4444. hp1 : tai;
  4445. {$ifndef x86_64}
  4446. hp2 : taicpu;
  4447. {$endif x86_64}
  4448. begin
  4449. Result:=false;
  4450. {$ifndef x86_64}
  4451. { don't do this on modern CPUs, this really hurts them due to
  4452. broken call/ret pairing }
  4453. if (current_settings.optimizecputype < cpu_Pentium2) and
  4454. not(cs_create_pic in current_settings.moduleswitches) and
  4455. GetNextInstruction(p, hp1) and
  4456. MatchInstruction(hp1,A_JMP,[S_NO]) and
  4457. MatchOpType(taicpu(hp1),top_ref) and
  4458. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  4459. begin
  4460. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  4461. InsertLLItem(p.previous, p, hp2);
  4462. taicpu(p).opcode := A_JMP;
  4463. taicpu(p).is_jmp := true;
  4464. asml.remove(hp1);
  4465. hp1.free;
  4466. Result:=true;
  4467. end
  4468. else
  4469. {$endif x86_64}
  4470. { replace
  4471. call procname
  4472. ret
  4473. by
  4474. jmp procname
  4475. but do it only on level 4 because it destroys stack back traces
  4476. }
  4477. if (cs_opt_level4 in current_settings.optimizerswitches) and
  4478. GetNextInstruction(p, hp1) and
  4479. MatchInstruction(hp1,A_RET,[S_NO]) and
  4480. (taicpu(hp1).ops=0) then
  4481. begin
  4482. taicpu(p).opcode := A_JMP;
  4483. taicpu(p).is_jmp := true;
  4484. DebugMsg(SPeepholeOptimization + 'CallRet2Jmp done',p);
  4485. asml.remove(hp1);
  4486. hp1.free;
  4487. Result:=true;
  4488. end;
  4489. end;
  4490. {$ifdef x86_64}
  4491. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  4492. var
  4493. PreMessage: string;
  4494. begin
  4495. Result := False;
  4496. { Code size reduction by J. Gareth "Kit" Moreton }
  4497. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  4498. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  4499. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  4500. then
  4501. begin
  4502. { Has 64-bit register name and opcode suffix }
  4503. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  4504. { The actual optimization }
  4505. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4506. if taicpu(p).opsize = S_BQ then
  4507. taicpu(p).changeopsize(S_BL)
  4508. else
  4509. taicpu(p).changeopsize(S_WL);
  4510. DebugMsg(SPeepholeOptimization + PreMessage +
  4511. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  4512. end;
  4513. end;
  4514. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  4515. var
  4516. PreMessage, RegName: string;
  4517. begin
  4518. { Code size reduction by J. Gareth "Kit" Moreton }
  4519. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  4520. as this removes the REX prefix }
  4521. Result := False;
  4522. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  4523. Exit;
  4524. if taicpu(p).oper[0]^.typ <> top_reg then
  4525. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  4526. InternalError(2018011500);
  4527. case taicpu(p).opsize of
  4528. S_Q:
  4529. begin
  4530. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  4531. begin
  4532. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  4533. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  4534. { The actual optimization }
  4535. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  4536. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4537. taicpu(p).changeopsize(S_L);
  4538. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  4539. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  4540. end;
  4541. end;
  4542. else
  4543. ;
  4544. end;
  4545. end;
  4546. {$endif}
  4547. procedure TX86AsmOptimizer.OptReferences;
  4548. var
  4549. p: tai;
  4550. i: Integer;
  4551. begin
  4552. p := BlockStart;
  4553. while (p <> BlockEnd) Do
  4554. begin
  4555. if p.typ=ait_instruction then
  4556. begin
  4557. for i:=0 to taicpu(p).ops-1 do
  4558. if taicpu(p).oper[i]^.typ=top_ref then
  4559. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  4560. end;
  4561. p:=tai(p.next);
  4562. end;
  4563. end;
  4564. end.