aoptx86.pas 205 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  33. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  34. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  35. protected
  36. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  37. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  38. { checks whether reading the value in reg1 depends on the value of reg2. This
  39. is very similar to SuperRegisterEquals, except it takes into account that
  40. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  41. depend on the value in AH). }
  42. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  43. procedure DebugMsg(const s : string; p : tai);inline;
  44. class function IsExitCode(p : tai) : boolean; static;
  45. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean; static;
  46. procedure RemoveLastDeallocForFuncRes(p : tai);
  47. function DoSubAddOpt(var p : tai) : Boolean;
  48. function PrePeepholeOptSxx(var p : tai) : boolean;
  49. function PrePeepholeOptIMUL(var p : tai) : boolean;
  50. function OptPass1AND(var p : tai) : boolean;
  51. function OptPass1_V_MOVAP(var p : tai) : boolean;
  52. function OptPass1VOP(var p : tai) : boolean;
  53. function OptPass1MOV(var p : tai) : boolean;
  54. function OptPass1Movx(var p : tai) : boolean;
  55. function OptPass1MOVXX(var p : tai) : boolean;
  56. function OptPass1OP(var p : tai) : boolean;
  57. function OptPass1LEA(var p : tai) : boolean;
  58. function OptPass1Sub(var p : tai) : boolean;
  59. function OptPass1SHLSAL(var p : tai) : boolean;
  60. function OptPass1SETcc(var p : tai) : boolean;
  61. function OptPass1FSTP(var p : tai) : boolean;
  62. function OptPass1FLD(var p : tai) : boolean;
  63. function OptPass1Cmp(var p : tai) : boolean;
  64. function OptPass2MOV(var p : tai) : boolean;
  65. function OptPass2Imul(var p : tai) : boolean;
  66. function OptPass2Jmp(var p : tai) : boolean;
  67. function OptPass2Jcc(var p : tai) : boolean;
  68. function OptPass2Lea(var p: tai): Boolean;
  69. function PostPeepholeOptMov(var p : tai) : Boolean;
  70. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  71. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  72. function PostPeepholeOptXor(var p : tai) : Boolean;
  73. {$endif}
  74. function PostPeepholeOptCmp(var p : tai) : Boolean;
  75. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  76. function PostPeepholeOptCall(var p : tai) : Boolean;
  77. function PostPeepholeOptLea(var p : tai) : Boolean;
  78. procedure OptReferences;
  79. procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
  80. end;
  81. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  82. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  83. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  84. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  85. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  86. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  87. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  88. function RefsEqual(const r1, r2: treference): boolean;
  89. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  90. { returns true, if ref is a reference using only the registers passed as base and index
  91. and having an offset }
  92. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  93. implementation
  94. uses
  95. cutils,verbose,
  96. globals,
  97. cpuinfo,
  98. procinfo,
  99. aasmbase,
  100. aoptutils,
  101. symconst,symsym,
  102. cgx86,
  103. itcpugas;
  104. {$ifdef DEBUG_AOPTCPU}
  105. const
  106. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  107. {$else DEBUG_AOPTCPU}
  108. { Empty strings help the optimizer to remove string concatenations that won't
  109. ever appear to the user on release builds. [Kit] }
  110. const
  111. SPeepholeOptimization = '';
  112. {$endif DEBUG_AOPTCPU}
  113. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  114. begin
  115. result :=
  116. (instr.typ = ait_instruction) and
  117. (taicpu(instr).opcode = op) and
  118. ((opsize = []) or (taicpu(instr).opsize in opsize));
  119. end;
  120. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  121. begin
  122. result :=
  123. (instr.typ = ait_instruction) and
  124. ((taicpu(instr).opcode = op1) or
  125. (taicpu(instr).opcode = op2)
  126. ) and
  127. ((opsize = []) or (taicpu(instr).opsize in opsize));
  128. end;
  129. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  130. begin
  131. result :=
  132. (instr.typ = ait_instruction) and
  133. ((taicpu(instr).opcode = op1) or
  134. (taicpu(instr).opcode = op2) or
  135. (taicpu(instr).opcode = op3)
  136. ) and
  137. ((opsize = []) or (taicpu(instr).opsize in opsize));
  138. end;
  139. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  140. const opsize : topsizes) : boolean;
  141. var
  142. op : TAsmOp;
  143. begin
  144. result:=false;
  145. for op in ops do
  146. begin
  147. if (instr.typ = ait_instruction) and
  148. (taicpu(instr).opcode = op) and
  149. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  150. begin
  151. result:=true;
  152. exit;
  153. end;
  154. end;
  155. end;
  156. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  157. begin
  158. result := (oper.typ = top_reg) and (oper.reg = reg);
  159. end;
  160. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  161. begin
  162. result := (oper.typ = top_const) and (oper.val = a);
  163. end;
  164. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  165. begin
  166. result := oper1.typ = oper2.typ;
  167. if result then
  168. case oper1.typ of
  169. top_const:
  170. Result:=oper1.val = oper2.val;
  171. top_reg:
  172. Result:=oper1.reg = oper2.reg;
  173. top_ref:
  174. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  175. else
  176. internalerror(2013102801);
  177. end
  178. end;
  179. function RefsEqual(const r1, r2: treference): boolean;
  180. begin
  181. RefsEqual :=
  182. (r1.offset = r2.offset) and
  183. (r1.segment = r2.segment) and (r1.base = r2.base) and
  184. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  185. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  186. (r1.relsymbol = r2.relsymbol) and
  187. (r1.volatility=[]) and
  188. (r2.volatility=[]);
  189. end;
  190. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  191. begin
  192. Result:=(ref.offset=0) and
  193. (ref.scalefactor in [0,1]) and
  194. (ref.segment=NR_NO) and
  195. (ref.symbol=nil) and
  196. (ref.relsymbol=nil) and
  197. ((base=NR_INVALID) or
  198. (ref.base=base)) and
  199. ((index=NR_INVALID) or
  200. (ref.index=index)) and
  201. (ref.volatility=[]);
  202. end;
  203. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  204. begin
  205. Result:=(ref.scalefactor in [0,1]) and
  206. (ref.segment=NR_NO) and
  207. (ref.symbol=nil) and
  208. (ref.relsymbol=nil) and
  209. ((base=NR_INVALID) or
  210. (ref.base=base)) and
  211. ((index=NR_INVALID) or
  212. (ref.index=index)) and
  213. (ref.volatility=[]);
  214. end;
  215. function InstrReadsFlags(p: tai): boolean;
  216. begin
  217. InstrReadsFlags := true;
  218. case p.typ of
  219. ait_instruction:
  220. if InsProp[taicpu(p).opcode].Ch*
  221. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  222. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  223. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  224. exit;
  225. ait_label:
  226. exit;
  227. else
  228. ;
  229. end;
  230. InstrReadsFlags := false;
  231. end;
  232. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  233. begin
  234. Next:=Current;
  235. repeat
  236. Result:=GetNextInstruction(Next,Next);
  237. until not (Result) or
  238. not(cs_opt_level3 in current_settings.optimizerswitches) or
  239. (Next.typ<>ait_instruction) or
  240. RegInInstruction(reg,Next) or
  241. is_calljmp(taicpu(Next).opcode);
  242. end;
  243. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  244. begin
  245. Result:=RegReadByInstruction(reg,hp);
  246. end;
  247. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  248. var
  249. p: taicpu;
  250. opcount: longint;
  251. begin
  252. RegReadByInstruction := false;
  253. if hp.typ <> ait_instruction then
  254. exit;
  255. p := taicpu(hp);
  256. case p.opcode of
  257. A_CALL:
  258. regreadbyinstruction := true;
  259. A_IMUL:
  260. case p.ops of
  261. 1:
  262. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  263. (
  264. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  265. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  266. );
  267. 2,3:
  268. regReadByInstruction :=
  269. reginop(reg,p.oper[0]^) or
  270. reginop(reg,p.oper[1]^);
  271. else
  272. InternalError(2019112801);
  273. end;
  274. A_MUL:
  275. begin
  276. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  277. (
  278. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  279. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  280. );
  281. end;
  282. A_IDIV,A_DIV:
  283. begin
  284. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  285. (
  286. (getregtype(reg)=R_INTREGISTER) and
  287. (
  288. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  289. )
  290. );
  291. end;
  292. else
  293. begin
  294. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  295. begin
  296. RegReadByInstruction := false;
  297. exit;
  298. end;
  299. for opcount := 0 to p.ops-1 do
  300. if (p.oper[opCount]^.typ = top_ref) and
  301. RegInRef(reg,p.oper[opcount]^.ref^) then
  302. begin
  303. RegReadByInstruction := true;
  304. exit
  305. end;
  306. { special handling for SSE MOVSD }
  307. if (p.opcode=A_MOVSD) and (p.ops>0) then
  308. begin
  309. if p.ops<>2 then
  310. internalerror(2017042702);
  311. regReadByInstruction := reginop(reg,p.oper[0]^) or
  312. (
  313. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  314. );
  315. exit;
  316. end;
  317. with insprop[p.opcode] do
  318. begin
  319. if getregtype(reg)=R_INTREGISTER then
  320. begin
  321. case getsupreg(reg) of
  322. RS_EAX:
  323. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  324. begin
  325. RegReadByInstruction := true;
  326. exit
  327. end;
  328. RS_ECX:
  329. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  330. begin
  331. RegReadByInstruction := true;
  332. exit
  333. end;
  334. RS_EDX:
  335. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  336. begin
  337. RegReadByInstruction := true;
  338. exit
  339. end;
  340. RS_EBX:
  341. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  342. begin
  343. RegReadByInstruction := true;
  344. exit
  345. end;
  346. RS_ESP:
  347. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  348. begin
  349. RegReadByInstruction := true;
  350. exit
  351. end;
  352. RS_EBP:
  353. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  354. begin
  355. RegReadByInstruction := true;
  356. exit
  357. end;
  358. RS_ESI:
  359. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  360. begin
  361. RegReadByInstruction := true;
  362. exit
  363. end;
  364. RS_EDI:
  365. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  366. begin
  367. RegReadByInstruction := true;
  368. exit
  369. end;
  370. end;
  371. end;
  372. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  373. begin
  374. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  375. begin
  376. case p.condition of
  377. C_A,C_NBE, { CF=0 and ZF=0 }
  378. C_BE,C_NA: { CF=1 or ZF=1 }
  379. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  380. C_AE,C_NB,C_NC, { CF=0 }
  381. C_B,C_NAE,C_C: { CF=1 }
  382. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  383. C_NE,C_NZ, { ZF=0 }
  384. C_E,C_Z: { ZF=1 }
  385. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  386. C_G,C_NLE, { ZF=0 and SF=OF }
  387. C_LE,C_NG: { ZF=1 or SF<>OF }
  388. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  389. C_GE,C_NL, { SF=OF }
  390. C_L,C_NGE: { SF<>OF }
  391. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  392. C_NO, { OF=0 }
  393. C_O: { OF=1 }
  394. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  395. C_NP,C_PO, { PF=0 }
  396. C_P,C_PE: { PF=1 }
  397. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  398. C_NS, { SF=0 }
  399. C_S: { SF=1 }
  400. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  401. else
  402. internalerror(2017042701);
  403. end;
  404. if RegReadByInstruction then
  405. exit;
  406. end;
  407. case getsubreg(reg) of
  408. R_SUBW,R_SUBD,R_SUBQ:
  409. RegReadByInstruction :=
  410. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  411. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  412. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  413. R_SUBFLAGCARRY:
  414. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  415. R_SUBFLAGPARITY:
  416. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  417. R_SUBFLAGAUXILIARY:
  418. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  419. R_SUBFLAGZERO:
  420. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  421. R_SUBFLAGSIGN:
  422. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  423. R_SUBFLAGOVERFLOW:
  424. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  425. R_SUBFLAGINTERRUPT:
  426. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  427. R_SUBFLAGDIRECTION:
  428. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  429. else
  430. internalerror(2017042601);
  431. end;
  432. exit;
  433. end;
  434. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  435. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  436. (p.oper[0]^.reg=p.oper[1]^.reg) then
  437. exit;
  438. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  439. begin
  440. RegReadByInstruction := true;
  441. exit
  442. end;
  443. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  444. begin
  445. RegReadByInstruction := true;
  446. exit
  447. end;
  448. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  449. begin
  450. RegReadByInstruction := true;
  451. exit
  452. end;
  453. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  454. begin
  455. RegReadByInstruction := true;
  456. exit
  457. end;
  458. end;
  459. end;
  460. end;
  461. end;
  462. function TX86AsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  463. begin
  464. result:=false;
  465. if p1.typ<>ait_instruction then
  466. exit;
  467. if (Ch_All in insprop[taicpu(p1).opcode].Ch) then
  468. exit(true);
  469. if (getregtype(reg)=R_INTREGISTER) and
  470. { change information for xmm movsd are not correct }
  471. ((taicpu(p1).opcode<>A_MOVSD) or (taicpu(p1).ops=0)) then
  472. begin
  473. case getsupreg(reg) of
  474. { RS_EAX = RS_RAX on x86-64 }
  475. RS_EAX:
  476. result:=([Ch_REAX,Ch_RRAX,Ch_WEAX,Ch_WRAX,Ch_RWEAX,Ch_RWRAX,Ch_MEAX,Ch_MRAX]*insprop[taicpu(p1).opcode].Ch)<>[];
  477. RS_ECX:
  478. result:=([Ch_RECX,Ch_RRCX,Ch_WECX,Ch_WRCX,Ch_RWECX,Ch_RWRCX,Ch_MECX,Ch_MRCX]*insprop[taicpu(p1).opcode].Ch)<>[];
  479. RS_EDX:
  480. result:=([Ch_REDX,Ch_RRDX,Ch_WEDX,Ch_WRDX,Ch_RWEDX,Ch_RWRDX,Ch_MEDX,Ch_MRDX]*insprop[taicpu(p1).opcode].Ch)<>[];
  481. RS_EBX:
  482. result:=([Ch_REBX,Ch_RRBX,Ch_WEBX,Ch_WRBX,Ch_RWEBX,Ch_RWRBX,Ch_MEBX,Ch_MRBX]*insprop[taicpu(p1).opcode].Ch)<>[];
  483. RS_ESP:
  484. result:=([Ch_RESP,Ch_RRSP,Ch_WESP,Ch_WRSP,Ch_RWESP,Ch_RWRSP,Ch_MESP,Ch_MRSP]*insprop[taicpu(p1).opcode].Ch)<>[];
  485. RS_EBP:
  486. result:=([Ch_REBP,Ch_RRBP,Ch_WEBP,Ch_WRBP,Ch_RWEBP,Ch_RWRBP,Ch_MEBP,Ch_MRBP]*insprop[taicpu(p1).opcode].Ch)<>[];
  487. RS_ESI:
  488. result:=([Ch_RESI,Ch_RRSI,Ch_WESI,Ch_WRSI,Ch_RWESI,Ch_RWRSI,Ch_MESI,Ch_MRSI,Ch_RMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  489. RS_EDI:
  490. result:=([Ch_REDI,Ch_RRDI,Ch_WEDI,Ch_WRDI,Ch_RWEDI,Ch_RWRDI,Ch_MEDI,Ch_MRDI,Ch_WMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  491. else
  492. ;
  493. end;
  494. if result then
  495. exit;
  496. end
  497. else if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  498. begin
  499. if ([Ch_RFlags,Ch_WFlags,Ch_RWFlags,Ch_RFLAGScc]*insprop[taicpu(p1).opcode].Ch)<>[] then
  500. exit(true);
  501. case getsubreg(reg) of
  502. R_SUBFLAGCARRY:
  503. Result:=([Ch_RCarryFlag,Ch_RWCarryFlag,Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  504. R_SUBFLAGPARITY:
  505. Result:=([Ch_RParityFlag,Ch_RWParityFlag,Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  506. R_SUBFLAGAUXILIARY:
  507. Result:=([Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  508. R_SUBFLAGZERO:
  509. Result:=([Ch_RZeroFlag,Ch_RWZeroFlag,Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  510. R_SUBFLAGSIGN:
  511. Result:=([Ch_RSignFlag,Ch_RWSignFlag,Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  512. R_SUBFLAGOVERFLOW:
  513. Result:=([Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  514. R_SUBFLAGINTERRUPT:
  515. Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  516. R_SUBFLAGDIRECTION:
  517. Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  518. else
  519. ;
  520. end;
  521. if result then
  522. exit;
  523. end
  524. else if (getregtype(reg)=R_FPUREGISTER) and (Ch_FPU in insprop[taicpu(p1).opcode].Ch) then
  525. exit(true);
  526. Result:=inherited RegInInstruction(Reg, p1);
  527. end;
  528. function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  529. begin
  530. Result := False;
  531. if p1.typ <> ait_instruction then
  532. exit;
  533. with insprop[taicpu(p1).opcode] do
  534. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  535. begin
  536. case getsubreg(reg) of
  537. R_SUBW,R_SUBD,R_SUBQ:
  538. Result :=
  539. [Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
  540. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  541. Ch_W0DirFlag,Ch_W1DirFlag,Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  542. R_SUBFLAGCARRY:
  543. Result:=[Ch_WCarryFlag,Ch_RWCarryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  544. R_SUBFLAGPARITY:
  545. Result:=[Ch_WParityFlag,Ch_RWParityFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  546. R_SUBFLAGAUXILIARY:
  547. Result:=[Ch_WAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  548. R_SUBFLAGZERO:
  549. Result:=[Ch_WZeroFlag,Ch_RWZeroFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  550. R_SUBFLAGSIGN:
  551. Result:=[Ch_WSignFlag,Ch_RWSignFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  552. R_SUBFLAGOVERFLOW:
  553. Result:=[Ch_WOverflowFlag,Ch_RWOverflowFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  554. R_SUBFLAGINTERRUPT:
  555. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  556. R_SUBFLAGDIRECTION:
  557. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  558. else
  559. internalerror(2017042602);
  560. end;
  561. exit;
  562. end;
  563. case taicpu(p1).opcode of
  564. A_CALL:
  565. { We could potentially set Result to False if the register in
  566. question is non-volatile for the subroutine's calling convention,
  567. but this would require detecting the calling convention in use and
  568. also assuming that the routine doesn't contain malformed assembly
  569. language, for example... so it could only be done under -O4 as it
  570. would be considered a side-effect. [Kit] }
  571. Result := True;
  572. A_MOVSD:
  573. { special handling for SSE MOVSD }
  574. if (taicpu(p1).ops>0) then
  575. begin
  576. if taicpu(p1).ops<>2 then
  577. internalerror(2017042703);
  578. Result := (taicpu(p1).oper[1]^.typ=top_reg) and reginop(reg,taicpu(p1).oper[1]^);
  579. end;
  580. else
  581. ;
  582. end;
  583. if Result then
  584. exit;
  585. with insprop[taicpu(p1).opcode] do
  586. begin
  587. if getregtype(reg)=R_INTREGISTER then
  588. begin
  589. case getsupreg(reg) of
  590. RS_EAX:
  591. if [Ch_WEAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  592. begin
  593. Result := True;
  594. exit
  595. end;
  596. RS_ECX:
  597. if [Ch_WECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  598. begin
  599. Result := True;
  600. exit
  601. end;
  602. RS_EDX:
  603. if [Ch_WEDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  604. begin
  605. Result := True;
  606. exit
  607. end;
  608. RS_EBX:
  609. if [Ch_WEBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  610. begin
  611. Result := True;
  612. exit
  613. end;
  614. RS_ESP:
  615. if [Ch_WESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  616. begin
  617. Result := True;
  618. exit
  619. end;
  620. RS_EBP:
  621. if [Ch_WEBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  622. begin
  623. Result := True;
  624. exit
  625. end;
  626. RS_ESI:
  627. if [Ch_WESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  628. begin
  629. Result := True;
  630. exit
  631. end;
  632. RS_EDI:
  633. if [Ch_WEDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  634. begin
  635. Result := True;
  636. exit
  637. end;
  638. end;
  639. end;
  640. if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
  641. begin
  642. Result := true;
  643. exit
  644. end;
  645. if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
  646. begin
  647. Result := true;
  648. exit
  649. end;
  650. if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
  651. begin
  652. Result := true;
  653. exit
  654. end;
  655. if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
  656. begin
  657. Result := true;
  658. exit
  659. end;
  660. end;
  661. end;
  662. {$ifdef DEBUG_AOPTCPU}
  663. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  664. begin
  665. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  666. end;
  667. function debug_tostr(i: tcgint): string; inline;
  668. begin
  669. Result := tostr(i);
  670. end;
  671. function debug_regname(r: TRegister): string; inline;
  672. begin
  673. Result := '%' + std_regname(r);
  674. end;
  675. { Debug output function - creates a string representation of an operator }
  676. function debug_operstr(oper: TOper): string;
  677. begin
  678. case oper.typ of
  679. top_const:
  680. Result := '$' + debug_tostr(oper.val);
  681. top_reg:
  682. Result := debug_regname(oper.reg);
  683. top_ref:
  684. begin
  685. if oper.ref^.offset <> 0 then
  686. Result := debug_tostr(oper.ref^.offset) + '('
  687. else
  688. Result := '(';
  689. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  690. begin
  691. Result := Result + debug_regname(oper.ref^.base);
  692. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  693. Result := Result + ',' + debug_regname(oper.ref^.index);
  694. end
  695. else
  696. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  697. Result := Result + debug_regname(oper.ref^.index);
  698. if (oper.ref^.scalefactor > 1) then
  699. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  700. else
  701. Result := Result + ')';
  702. end;
  703. else
  704. Result := '[UNKNOWN]';
  705. end;
  706. end;
  707. function debug_op2str(opcode: tasmop): string; inline;
  708. begin
  709. Result := std_op2str[opcode];
  710. end;
  711. function debug_opsize2str(opsize: topsize): string; inline;
  712. begin
  713. Result := gas_opsize2str[opsize];
  714. end;
  715. {$else DEBUG_AOPTCPU}
  716. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  717. begin
  718. end;
  719. function debug_tostr(i: tcgint): string; inline;
  720. begin
  721. Result := '';
  722. end;
  723. function debug_regname(r: TRegister): string; inline;
  724. begin
  725. Result := '';
  726. end;
  727. function debug_operstr(oper: TOper): string; inline;
  728. begin
  729. Result := '';
  730. end;
  731. function debug_op2str(opcode: tasmop): string; inline;
  732. begin
  733. Result := '';
  734. end;
  735. function debug_opsize2str(opsize: topsize): string; inline;
  736. begin
  737. Result := '';
  738. end;
  739. {$endif DEBUG_AOPTCPU}
  740. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  741. begin
  742. if not SuperRegistersEqual(reg1,reg2) then
  743. exit(false);
  744. if getregtype(reg1)<>R_INTREGISTER then
  745. exit(true); {because SuperRegisterEqual is true}
  746. case getsubreg(reg1) of
  747. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  748. higher, it preserves the high bits, so the new value depends on
  749. reg2's previous value. In other words, it is equivalent to doing:
  750. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  751. R_SUBL:
  752. exit(getsubreg(reg2)=R_SUBL);
  753. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  754. higher, it actually does a:
  755. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  756. R_SUBH:
  757. exit(getsubreg(reg2)=R_SUBH);
  758. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  759. bits of reg2:
  760. reg2 := (reg2 and $ffff0000) or word(reg1); }
  761. R_SUBW:
  762. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  763. { a write to R_SUBD always overwrites every other subregister,
  764. because it clears the high 32 bits of R_SUBQ on x86_64 }
  765. R_SUBD,
  766. R_SUBQ:
  767. exit(true);
  768. else
  769. internalerror(2017042801);
  770. end;
  771. end;
  772. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  773. begin
  774. if not SuperRegistersEqual(reg1,reg2) then
  775. exit(false);
  776. if getregtype(reg1)<>R_INTREGISTER then
  777. exit(true); {because SuperRegisterEqual is true}
  778. case getsubreg(reg1) of
  779. R_SUBL:
  780. exit(getsubreg(reg2)<>R_SUBH);
  781. R_SUBH:
  782. exit(getsubreg(reg2)<>R_SUBL);
  783. R_SUBW,
  784. R_SUBD,
  785. R_SUBQ:
  786. exit(true);
  787. else
  788. internalerror(2017042802);
  789. end;
  790. end;
  791. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  792. var
  793. hp1 : tai;
  794. l : TCGInt;
  795. begin
  796. result:=false;
  797. { changes the code sequence
  798. shr/sar const1, x
  799. shl const2, x
  800. to
  801. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  802. if GetNextInstruction(p, hp1) and
  803. MatchInstruction(hp1,A_SHL,[]) and
  804. (taicpu(p).oper[0]^.typ = top_const) and
  805. (taicpu(hp1).oper[0]^.typ = top_const) and
  806. (taicpu(hp1).opsize = taicpu(p).opsize) and
  807. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  808. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  809. begin
  810. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  811. not(cs_opt_size in current_settings.optimizerswitches) then
  812. begin
  813. { shr/sar const1, %reg
  814. shl const2, %reg
  815. with const1 > const2 }
  816. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  817. taicpu(hp1).opcode := A_AND;
  818. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  819. case taicpu(p).opsize Of
  820. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  821. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  822. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  823. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  824. else
  825. Internalerror(2017050703)
  826. end;
  827. end
  828. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  829. not(cs_opt_size in current_settings.optimizerswitches) then
  830. begin
  831. { shr/sar const1, %reg
  832. shl const2, %reg
  833. with const1 < const2 }
  834. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  835. taicpu(p).opcode := A_AND;
  836. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  837. case taicpu(p).opsize Of
  838. S_B: taicpu(p).loadConst(0,l Xor $ff);
  839. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  840. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  841. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  842. else
  843. Internalerror(2017050702)
  844. end;
  845. end
  846. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  847. begin
  848. { shr/sar const1, %reg
  849. shl const2, %reg
  850. with const1 = const2 }
  851. taicpu(p).opcode := A_AND;
  852. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  853. case taicpu(p).opsize Of
  854. S_B: taicpu(p).loadConst(0,l Xor $ff);
  855. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  856. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  857. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  858. else
  859. Internalerror(2017050701)
  860. end;
  861. asml.remove(hp1);
  862. hp1.free;
  863. end;
  864. end;
  865. end;
  866. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  867. var
  868. opsize : topsize;
  869. hp1 : tai;
  870. tmpref : treference;
  871. ShiftValue : Cardinal;
  872. BaseValue : TCGInt;
  873. begin
  874. result:=false;
  875. opsize:=taicpu(p).opsize;
  876. { changes certain "imul const, %reg"'s to lea sequences }
  877. if (MatchOpType(taicpu(p),top_const,top_reg) or
  878. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  879. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  880. if (taicpu(p).oper[0]^.val = 1) then
  881. if (taicpu(p).ops = 2) then
  882. { remove "imul $1, reg" }
  883. begin
  884. hp1 := tai(p.Next);
  885. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  886. RemoveCurrentP(p);
  887. result:=true;
  888. end
  889. else
  890. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  891. begin
  892. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  893. InsertLLItem(p.previous, p.next, hp1);
  894. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  895. p.free;
  896. p := hp1;
  897. end
  898. else if ((taicpu(p).ops <= 2) or
  899. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  900. not(cs_opt_size in current_settings.optimizerswitches) and
  901. (not(GetNextInstruction(p, hp1)) or
  902. not((tai(hp1).typ = ait_instruction) and
  903. ((taicpu(hp1).opcode=A_Jcc) and
  904. (taicpu(hp1).condition in [C_O,C_NO])))) then
  905. begin
  906. {
  907. imul X, reg1, reg2 to
  908. lea (reg1,reg1,Y), reg2
  909. shl ZZ,reg2
  910. imul XX, reg1 to
  911. lea (reg1,reg1,YY), reg1
  912. shl ZZ,reg2
  913. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  914. it does not exist as a separate optimization target in FPC though.
  915. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  916. at most two zeros
  917. }
  918. reference_reset(tmpref,1,[]);
  919. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  920. begin
  921. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  922. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  923. TmpRef.base := taicpu(p).oper[1]^.reg;
  924. TmpRef.index := taicpu(p).oper[1]^.reg;
  925. if not(BaseValue in [3,5,9]) then
  926. Internalerror(2018110101);
  927. TmpRef.ScaleFactor := BaseValue-1;
  928. if (taicpu(p).ops = 2) then
  929. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  930. else
  931. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  932. AsmL.InsertAfter(hp1,p);
  933. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  934. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  935. RemoveCurrentP(p);
  936. if ShiftValue>0 then
  937. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  938. end;
  939. end;
  940. end;
  941. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  942. var
  943. p: taicpu;
  944. begin
  945. if not assigned(hp) or
  946. (hp.typ <> ait_instruction) then
  947. begin
  948. Result := false;
  949. exit;
  950. end;
  951. p := taicpu(hp);
  952. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  953. with insprop[p.opcode] do
  954. begin
  955. case getsubreg(reg) of
  956. R_SUBW,R_SUBD,R_SUBQ:
  957. Result:=
  958. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  959. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  960. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  961. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  962. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  963. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  964. R_SUBFLAGCARRY:
  965. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  966. R_SUBFLAGPARITY:
  967. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  968. R_SUBFLAGAUXILIARY:
  969. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  970. R_SUBFLAGZERO:
  971. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  972. R_SUBFLAGSIGN:
  973. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  974. R_SUBFLAGOVERFLOW:
  975. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  976. R_SUBFLAGINTERRUPT:
  977. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  978. R_SUBFLAGDIRECTION:
  979. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  980. else
  981. begin
  982. writeln(getsubreg(reg));
  983. internalerror(2017050501);
  984. end;
  985. end;
  986. exit;
  987. end;
  988. Result :=
  989. (((p.opcode = A_MOV) or
  990. (p.opcode = A_MOVZX) or
  991. (p.opcode = A_MOVSX) or
  992. (p.opcode = A_LEA) or
  993. (p.opcode = A_VMOVSS) or
  994. (p.opcode = A_VMOVSD) or
  995. (p.opcode = A_VMOVAPD) or
  996. (p.opcode = A_VMOVAPS) or
  997. (p.opcode = A_VMOVQ) or
  998. (p.opcode = A_MOVSS) or
  999. (p.opcode = A_MOVSD) or
  1000. (p.opcode = A_MOVQ) or
  1001. (p.opcode = A_MOVAPD) or
  1002. (p.opcode = A_MOVAPS) or
  1003. {$ifndef x86_64}
  1004. (p.opcode = A_LDS) or
  1005. (p.opcode = A_LES) or
  1006. {$endif not x86_64}
  1007. (p.opcode = A_LFS) or
  1008. (p.opcode = A_LGS) or
  1009. (p.opcode = A_LSS)) and
  1010. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  1011. (p.oper[1]^.typ = top_reg) and
  1012. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  1013. ((p.oper[0]^.typ = top_const) or
  1014. ((p.oper[0]^.typ = top_reg) and
  1015. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  1016. ((p.oper[0]^.typ = top_ref) and
  1017. not RegInRef(reg,p.oper[0]^.ref^)))) or
  1018. ((p.opcode = A_POP) and
  1019. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  1020. ((p.opcode = A_IMUL) and
  1021. (p.ops=3) and
  1022. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  1023. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  1024. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  1025. ((((p.opcode = A_IMUL) or
  1026. (p.opcode = A_MUL)) and
  1027. (p.ops=1)) and
  1028. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  1029. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  1030. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  1031. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  1032. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  1033. {$ifdef x86_64}
  1034. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  1035. {$endif x86_64}
  1036. )) or
  1037. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  1038. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  1039. {$ifdef x86_64}
  1040. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  1041. {$endif x86_64}
  1042. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  1043. {$ifndef x86_64}
  1044. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1045. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1046. {$endif not x86_64}
  1047. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1048. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1049. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1050. {$ifndef x86_64}
  1051. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  1052. {$endif not x86_64}
  1053. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  1054. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  1055. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  1056. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  1057. {$ifdef x86_64}
  1058. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  1059. {$endif x86_64}
  1060. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  1061. (((p.opcode = A_FSTSW) or
  1062. (p.opcode = A_FNSTSW)) and
  1063. (p.oper[0]^.typ=top_reg) and
  1064. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  1065. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  1066. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  1067. (p.oper[0]^.reg=p.oper[1]^.reg) and
  1068. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  1069. end;
  1070. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  1071. var
  1072. hp2,hp3 : tai;
  1073. begin
  1074. { some x86-64 issue a NOP before the real exit code }
  1075. if MatchInstruction(p,A_NOP,[]) then
  1076. GetNextInstruction(p,p);
  1077. result:=assigned(p) and (p.typ=ait_instruction) and
  1078. ((taicpu(p).opcode = A_RET) or
  1079. ((taicpu(p).opcode=A_LEAVE) and
  1080. GetNextInstruction(p,hp2) and
  1081. MatchInstruction(hp2,A_RET,[S_NO])
  1082. ) or
  1083. (((taicpu(p).opcode=A_LEA) and
  1084. MatchOpType(taicpu(p),top_ref,top_reg) and
  1085. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  1086. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1087. ) and
  1088. GetNextInstruction(p,hp2) and
  1089. MatchInstruction(hp2,A_RET,[S_NO])
  1090. ) or
  1091. ((((taicpu(p).opcode=A_MOV) and
  1092. MatchOpType(taicpu(p),top_reg,top_reg) and
  1093. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  1094. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  1095. ((taicpu(p).opcode=A_LEA) and
  1096. MatchOpType(taicpu(p),top_ref,top_reg) and
  1097. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  1098. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1099. )
  1100. ) and
  1101. GetNextInstruction(p,hp2) and
  1102. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  1103. MatchOpType(taicpu(hp2),top_reg) and
  1104. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  1105. GetNextInstruction(hp2,hp3) and
  1106. MatchInstruction(hp3,A_RET,[S_NO])
  1107. )
  1108. );
  1109. end;
  1110. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1111. begin
  1112. isFoldableArithOp := False;
  1113. case hp1.opcode of
  1114. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1115. isFoldableArithOp :=
  1116. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1117. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1118. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1119. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1120. (taicpu(hp1).oper[1]^.reg = reg);
  1121. A_INC,A_DEC,A_NEG,A_NOT:
  1122. isFoldableArithOp :=
  1123. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1124. (taicpu(hp1).oper[0]^.reg = reg);
  1125. else
  1126. ;
  1127. end;
  1128. end;
  1129. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  1130. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  1131. var
  1132. hp2: tai;
  1133. begin
  1134. hp2 := p;
  1135. repeat
  1136. hp2 := tai(hp2.previous);
  1137. if assigned(hp2) and
  1138. (hp2.typ = ait_regalloc) and
  1139. (tai_regalloc(hp2).ratype=ra_dealloc) and
  1140. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  1141. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  1142. begin
  1143. asml.remove(hp2);
  1144. hp2.free;
  1145. break;
  1146. end;
  1147. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  1148. end;
  1149. begin
  1150. case current_procinfo.procdef.returndef.typ of
  1151. arraydef,recorddef,pointerdef,
  1152. stringdef,enumdef,procdef,objectdef,errordef,
  1153. filedef,setdef,procvardef,
  1154. classrefdef,forwarddef:
  1155. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1156. orddef:
  1157. if current_procinfo.procdef.returndef.size <> 0 then
  1158. begin
  1159. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1160. { for int64/qword }
  1161. if current_procinfo.procdef.returndef.size = 8 then
  1162. DoRemoveLastDeallocForFuncRes(RS_EDX);
  1163. end;
  1164. else
  1165. ;
  1166. end;
  1167. end;
  1168. function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
  1169. var
  1170. hp1,hp2 : tai;
  1171. begin
  1172. result:=false;
  1173. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1174. begin
  1175. { vmova* reg1,reg1
  1176. =>
  1177. <nop> }
  1178. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1179. begin
  1180. GetNextInstruction(p,hp1);
  1181. asml.Remove(p);
  1182. p.Free;
  1183. p:=hp1;
  1184. result:=true;
  1185. end
  1186. else if GetNextInstruction(p,hp1) then
  1187. begin
  1188. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1189. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1190. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1191. begin
  1192. { vmova* reg1,reg2
  1193. vmova* reg2,reg3
  1194. dealloc reg2
  1195. =>
  1196. vmova* reg1,reg3 }
  1197. TransferUsedRegs(TmpUsedRegs);
  1198. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1199. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1200. begin
  1201. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1202. asml.Remove(hp1);
  1203. hp1.Free;
  1204. result:=true;
  1205. end
  1206. { special case:
  1207. vmova* reg1,reg2
  1208. vmova* reg2,reg1
  1209. =>
  1210. vmova* reg1,reg2 }
  1211. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  1212. begin
  1213. asml.Remove(hp1);
  1214. hp1.Free;
  1215. result:=true;
  1216. end
  1217. end
  1218. else if MatchInstruction(hp1,[A_VFMADDPD,
  1219. A_VFMADD132PD,
  1220. A_VFMADD132PS,
  1221. A_VFMADD132SD,
  1222. A_VFMADD132SS,
  1223. A_VFMADD213PD,
  1224. A_VFMADD213PS,
  1225. A_VFMADD213SD,
  1226. A_VFMADD213SS,
  1227. A_VFMADD231PD,
  1228. A_VFMADD231PS,
  1229. A_VFMADD231SD,
  1230. A_VFMADD231SS,
  1231. A_VFMADDSUB132PD,
  1232. A_VFMADDSUB132PS,
  1233. A_VFMADDSUB213PD,
  1234. A_VFMADDSUB213PS,
  1235. A_VFMADDSUB231PD,
  1236. A_VFMADDSUB231PS,
  1237. A_VFMSUB132PD,
  1238. A_VFMSUB132PS,
  1239. A_VFMSUB132SD,
  1240. A_VFMSUB132SS,
  1241. A_VFMSUB213PD,
  1242. A_VFMSUB213PS,
  1243. A_VFMSUB213SD,
  1244. A_VFMSUB213SS,
  1245. A_VFMSUB231PD,
  1246. A_VFMSUB231PS,
  1247. A_VFMSUB231SD,
  1248. A_VFMSUB231SS,
  1249. A_VFMSUBADD132PD,
  1250. A_VFMSUBADD132PS,
  1251. A_VFMSUBADD213PD,
  1252. A_VFMSUBADD213PS,
  1253. A_VFMSUBADD231PD,
  1254. A_VFMSUBADD231PS,
  1255. A_VFNMADD132PD,
  1256. A_VFNMADD132PS,
  1257. A_VFNMADD132SD,
  1258. A_VFNMADD132SS,
  1259. A_VFNMADD213PD,
  1260. A_VFNMADD213PS,
  1261. A_VFNMADD213SD,
  1262. A_VFNMADD213SS,
  1263. A_VFNMADD231PD,
  1264. A_VFNMADD231PS,
  1265. A_VFNMADD231SD,
  1266. A_VFNMADD231SS,
  1267. A_VFNMSUB132PD,
  1268. A_VFNMSUB132PS,
  1269. A_VFNMSUB132SD,
  1270. A_VFNMSUB132SS,
  1271. A_VFNMSUB213PD,
  1272. A_VFNMSUB213PS,
  1273. A_VFNMSUB213SD,
  1274. A_VFNMSUB213SS,
  1275. A_VFNMSUB231PD,
  1276. A_VFNMSUB231PS,
  1277. A_VFNMSUB231SD,
  1278. A_VFNMSUB231SS],[S_NO]) and
  1279. { we mix single and double opperations here because we assume that the compiler
  1280. generates vmovapd only after double operations and vmovaps only after single operations }
  1281. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1282. GetNextInstruction(hp1,hp2) and
  1283. MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and
  1284. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1285. begin
  1286. TransferUsedRegs(TmpUsedRegs);
  1287. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1288. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1289. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  1290. then
  1291. begin
  1292. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1293. asml.Remove(p);
  1294. p.Free;
  1295. asml.Remove(hp2);
  1296. hp2.Free;
  1297. p:=hp1;
  1298. end;
  1299. end
  1300. else if (hp1.typ = ait_instruction) and
  1301. GetNextInstruction(hp1, hp2) and
  1302. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  1303. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1304. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1305. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1306. (((taicpu(p).opcode=A_MOVAPS) and
  1307. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  1308. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  1309. ((taicpu(p).opcode=A_MOVAPD) and
  1310. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  1311. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  1312. ) then
  1313. { change
  1314. movapX reg,reg2
  1315. addsX/subsX/... reg3, reg2
  1316. movapX reg2,reg
  1317. to
  1318. addsX/subsX/... reg3,reg
  1319. }
  1320. begin
  1321. TransferUsedRegs(TmpUsedRegs);
  1322. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1323. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1324. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1325. begin
  1326. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1327. debug_op2str(taicpu(p).opcode)+' '+
  1328. debug_op2str(taicpu(hp1).opcode)+' '+
  1329. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1330. { we cannot eliminate the first move if
  1331. the operations uses the same register for source and dest }
  1332. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1333. begin
  1334. asml.remove(p);
  1335. p.Free;
  1336. end;
  1337. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1338. asml.remove(hp2);
  1339. hp2.Free;
  1340. p:=hp1;
  1341. result:=true;
  1342. end;
  1343. end;
  1344. end;
  1345. end;
  1346. end;
  1347. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1348. var
  1349. hp1 : tai;
  1350. begin
  1351. result:=false;
  1352. { replace
  1353. V<Op>X %mreg1,%mreg2,%mreg3
  1354. VMovX %mreg3,%mreg4
  1355. dealloc %mreg3
  1356. by
  1357. V<Op>X %mreg1,%mreg2,%mreg4
  1358. ?
  1359. }
  1360. if GetNextInstruction(p,hp1) and
  1361. { we mix single and double operations here because we assume that the compiler
  1362. generates vmovapd only after double operations and vmovaps only after single operations }
  1363. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1364. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1365. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1366. begin
  1367. TransferUsedRegs(TmpUsedRegs);
  1368. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1369. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1370. ) then
  1371. begin
  1372. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1373. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1374. asml.Remove(hp1);
  1375. hp1.Free;
  1376. result:=true;
  1377. end;
  1378. end;
  1379. end;
  1380. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1381. var
  1382. hp1, hp2: tai;
  1383. GetNextInstruction_p: Boolean;
  1384. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  1385. NewSize: topsize;
  1386. begin
  1387. Result:=false;
  1388. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1389. { remove mov reg1,reg1? }
  1390. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  1391. then
  1392. begin
  1393. DebugMsg(SPeepholeOptimization + 'Mov2Nop done',p);
  1394. { take care of the register (de)allocs following p }
  1395. UpdateUsedRegs(tai(p.next));
  1396. asml.remove(p);
  1397. p.free;
  1398. p:=hp1;
  1399. Result:=true;
  1400. exit;
  1401. end;
  1402. if GetNextInstruction_p and
  1403. MatchInstruction(hp1,A_AND,[]) and
  1404. (taicpu(p).oper[1]^.typ = top_reg) and
  1405. MatchOpType(taicpu(hp1),top_const,top_reg) then
  1406. begin
  1407. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1408. begin
  1409. case taicpu(p).opsize of
  1410. S_L:
  1411. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1412. begin
  1413. { Optimize out:
  1414. mov x, %reg
  1415. and ffffffffh, %reg
  1416. }
  1417. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1418. asml.remove(hp1);
  1419. hp1.free;
  1420. Result:=true;
  1421. exit;
  1422. end;
  1423. S_Q: { TODO: Confirm if this is even possible }
  1424. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1425. begin
  1426. { Optimize out:
  1427. mov x, %reg
  1428. and ffffffffffffffffh, %reg
  1429. }
  1430. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1431. asml.remove(hp1);
  1432. hp1.free;
  1433. Result:=true;
  1434. exit;
  1435. end;
  1436. else
  1437. ;
  1438. end;
  1439. end
  1440. else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1441. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1442. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1443. then
  1444. begin
  1445. InputVal := debug_operstr(taicpu(p).oper[0]^);
  1446. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  1447. case taicpu(p).opsize of
  1448. S_B:
  1449. if (taicpu(hp1).oper[0]^.val = $ff) then
  1450. begin
  1451. { Convert:
  1452. movb x, %regl movb x, %regl
  1453. andw ffh, %regw andl ffh, %regd
  1454. To:
  1455. movzbw x, %regd movzbl x, %regd
  1456. (Identical registers, just different sizes)
  1457. }
  1458. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1459. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1460. case taicpu(hp1).opsize of
  1461. S_W: NewSize := S_BW;
  1462. S_L: NewSize := S_BL;
  1463. {$ifdef x86_64}
  1464. S_Q: NewSize := S_BQ;
  1465. {$endif x86_64}
  1466. else
  1467. InternalError(2018011510);
  1468. end;
  1469. end
  1470. else
  1471. NewSize := S_NO;
  1472. S_W:
  1473. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1474. begin
  1475. { Convert:
  1476. movw x, %regw
  1477. andl ffffh, %regd
  1478. To:
  1479. movzwl x, %regd
  1480. (Identical registers, just different sizes)
  1481. }
  1482. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  1483. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  1484. case taicpu(hp1).opsize of
  1485. S_L: NewSize := S_WL;
  1486. {$ifdef x86_64}
  1487. S_Q: NewSize := S_WQ;
  1488. {$endif x86_64}
  1489. else
  1490. InternalError(2018011511);
  1491. end;
  1492. end
  1493. else
  1494. NewSize := S_NO;
  1495. else
  1496. NewSize := S_NO;
  1497. end;
  1498. if NewSize <> S_NO then
  1499. begin
  1500. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  1501. { The actual optimization }
  1502. taicpu(p).opcode := A_MOVZX;
  1503. taicpu(p).changeopsize(NewSize);
  1504. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  1505. { Safeguard if "and" is followed by a conditional command }
  1506. TransferUsedRegs(TmpUsedRegs);
  1507. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  1508. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  1509. begin
  1510. { At this point, the "and" command is effectively equivalent to
  1511. "test %reg,%reg". This will be handled separately by the
  1512. Peephole Optimizer. [Kit] }
  1513. DebugMsg(SPeepholeOptimization + PreMessage +
  1514. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1515. end
  1516. else
  1517. begin
  1518. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  1519. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1520. asml.Remove(hp1);
  1521. hp1.Free;
  1522. end;
  1523. Result := True;
  1524. Exit;
  1525. end;
  1526. end;
  1527. end;
  1528. { Next instruction is also a MOV ? }
  1529. if GetNextInstruction_p and
  1530. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1531. begin
  1532. if (taicpu(p).oper[1]^.typ = top_reg) and
  1533. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1534. begin
  1535. TransferUsedRegs(TmpUsedRegs);
  1536. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1537. { we have
  1538. mov x, %treg
  1539. mov %treg, y
  1540. }
  1541. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1542. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1543. { we've got
  1544. mov x, %treg
  1545. mov %treg, y
  1546. with %treg is not used after }
  1547. case taicpu(p).oper[0]^.typ Of
  1548. top_reg:
  1549. begin
  1550. { change
  1551. mov %reg, %treg
  1552. mov %treg, y
  1553. to
  1554. mov %reg, y
  1555. }
  1556. if taicpu(hp1).oper[1]^.typ=top_reg then
  1557. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1558. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1559. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
  1560. asml.remove(hp1);
  1561. hp1.free;
  1562. Result:=true;
  1563. Exit;
  1564. end;
  1565. top_const:
  1566. begin
  1567. { change
  1568. mov const, %treg
  1569. mov %treg, y
  1570. to
  1571. mov const, y
  1572. }
  1573. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  1574. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1575. begin
  1576. if taicpu(hp1).oper[1]^.typ=top_reg then
  1577. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1578. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1579. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  1580. asml.remove(hp1);
  1581. hp1.free;
  1582. Result:=true;
  1583. Exit;
  1584. end;
  1585. end;
  1586. top_ref:
  1587. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1588. begin
  1589. { change
  1590. mov mem, %treg
  1591. mov %treg, %reg
  1592. to
  1593. mov mem, %reg"
  1594. }
  1595. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1596. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  1597. asml.remove(hp1);
  1598. hp1.free;
  1599. Result:=true;
  1600. Exit;
  1601. end;
  1602. else
  1603. ;
  1604. end;
  1605. end;
  1606. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1607. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1608. { mov reg1, mem1 or mov mem1, reg1
  1609. mov mem2, reg2 mov reg2, mem2}
  1610. begin
  1611. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1612. { mov reg1, mem1 or mov mem1, reg1
  1613. mov mem2, reg1 mov reg2, mem1}
  1614. begin
  1615. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1616. { Removes the second statement from
  1617. mov reg1, mem1/reg2
  1618. mov mem1/reg2, reg1 }
  1619. begin
  1620. if taicpu(p).oper[0]^.typ=top_reg then
  1621. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1622. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  1623. asml.remove(hp1);
  1624. hp1.free;
  1625. Result:=true;
  1626. exit;
  1627. end
  1628. else
  1629. begin
  1630. TransferUsedRegs(TmpUsedRegs);
  1631. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1632. if (taicpu(p).oper[1]^.typ = top_ref) and
  1633. { mov reg1, mem1
  1634. mov mem2, reg1 }
  1635. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1636. GetNextInstruction(hp1, hp2) and
  1637. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1638. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1639. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1640. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1641. { change to
  1642. mov reg1, mem1 mov reg1, mem1
  1643. mov mem2, reg1 cmp reg1, mem2
  1644. cmp mem1, reg1
  1645. }
  1646. begin
  1647. asml.remove(hp2);
  1648. hp2.free;
  1649. taicpu(hp1).opcode := A_CMP;
  1650. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1651. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1652. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1653. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  1654. end;
  1655. end;
  1656. end
  1657. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1658. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1659. begin
  1660. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1661. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1662. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  1663. end
  1664. else
  1665. begin
  1666. TransferUsedRegs(TmpUsedRegs);
  1667. if GetNextInstruction(hp1, hp2) and
  1668. MatchOpType(taicpu(p),top_ref,top_reg) and
  1669. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1670. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1671. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1672. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1673. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1674. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1675. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1676. { mov mem1, %reg1
  1677. mov %reg1, mem2
  1678. mov mem2, reg2
  1679. to:
  1680. mov mem1, reg2
  1681. mov reg2, mem2}
  1682. begin
  1683. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1684. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  1685. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1686. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1687. asml.remove(hp2);
  1688. hp2.free;
  1689. end
  1690. {$ifdef i386}
  1691. { this is enabled for i386 only, as the rules to create the reg sets below
  1692. are too complicated for x86-64, so this makes this code too error prone
  1693. on x86-64
  1694. }
  1695. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1696. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1697. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1698. { mov mem1, reg1 mov mem1, reg1
  1699. mov reg1, mem2 mov reg1, mem2
  1700. mov mem2, reg2 mov mem2, reg1
  1701. to: to:
  1702. mov mem1, reg1 mov mem1, reg1
  1703. mov mem1, reg2 mov reg1, mem2
  1704. mov reg1, mem2
  1705. or (if mem1 depends on reg1
  1706. and/or if mem2 depends on reg2)
  1707. to:
  1708. mov mem1, reg1
  1709. mov reg1, mem2
  1710. mov reg1, reg2
  1711. }
  1712. begin
  1713. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1714. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1715. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1716. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1717. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1718. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1719. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1720. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1721. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1722. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1723. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1724. end
  1725. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1726. begin
  1727. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1728. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1729. end
  1730. else
  1731. begin
  1732. asml.remove(hp2);
  1733. hp2.free;
  1734. end
  1735. {$endif i386}
  1736. ;
  1737. end;
  1738. end;
  1739. (* { movl [mem1],reg1
  1740. movl [mem1],reg2
  1741. to
  1742. movl [mem1],reg1
  1743. movl reg1,reg2
  1744. }
  1745. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1746. (taicpu(p).oper[1]^.typ = top_reg) and
  1747. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1748. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1749. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1750. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1751. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1752. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1753. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1754. else*)
  1755. { movl const1,[mem1]
  1756. movl [mem1],reg1
  1757. to
  1758. movl const1,reg1
  1759. movl reg1,[mem1]
  1760. }
  1761. if MatchOpType(Taicpu(p),top_const,top_ref) and
  1762. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1763. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1764. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1765. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1766. begin
  1767. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1768. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1769. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1770. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1771. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1772. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  1773. Result:=true;
  1774. exit;
  1775. end;
  1776. {
  1777. mov* x,reg1
  1778. mov* y,reg1
  1779. to
  1780. mov* y,reg1
  1781. }
  1782. if (taicpu(p).oper[1]^.typ=top_reg) and
  1783. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1784. not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
  1785. begin
  1786. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
  1787. { take care of the register (de)allocs following p }
  1788. UpdateUsedRegs(tai(p.next));
  1789. asml.remove(p);
  1790. p.free;
  1791. p:=hp1;
  1792. Result:=true;
  1793. exit;
  1794. end;
  1795. end;
  1796. { search further than the next instruction for a mov }
  1797. if (cs_opt_level3 in current_settings.optimizerswitches) and
  1798. { check as much as possible before the expensive GetNextInstructionUsingReg call }
  1799. (taicpu(p).oper[1]^.typ = top_reg) and
  1800. (taicpu(p).oper[0]^.typ in [top_reg,top_const]) and
  1801. { we work with hp2 here, so hp1 can be still used later on when
  1802. checking for GetNextInstruction_p }
  1803. GetNextInstructionUsingReg(p,hp2,taicpu(p).oper[1]^.reg) and
  1804. MatchInstruction(hp2,A_MOV,[]) and
  1805. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1806. ((taicpu(p).oper[0]^.typ=top_const) or
  1807. ((taicpu(p).oper[0]^.typ=top_reg) and
  1808. not(RegUsedBetween(taicpu(p).oper[0]^.reg, p, hp2))
  1809. )
  1810. ) then
  1811. begin
  1812. TransferUsedRegs(TmpUsedRegs);
  1813. { we have
  1814. mov x, %treg
  1815. mov %treg, y
  1816. }
  1817. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp2).oper[1]^)) and
  1818. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs)) then
  1819. { we've got
  1820. mov x, %treg
  1821. mov %treg, y
  1822. with %treg is not used after }
  1823. case taicpu(p).oper[0]^.typ Of
  1824. top_reg:
  1825. begin
  1826. { change
  1827. mov %reg, %treg
  1828. mov %treg, y
  1829. to
  1830. mov %reg, y
  1831. }
  1832. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp2,usedregs);
  1833. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  1834. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
  1835. { take care of the register (de)allocs following p }
  1836. UpdateUsedRegs(tai(p.next));
  1837. asml.remove(p);
  1838. p.free;
  1839. p:=hp1;
  1840. Result:=true;
  1841. Exit;
  1842. end;
  1843. top_const:
  1844. begin
  1845. { change
  1846. mov const, %treg
  1847. mov %treg, y
  1848. to
  1849. mov const, y
  1850. }
  1851. if (taicpu(hp2).oper[1]^.typ=top_reg) or
  1852. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1853. begin
  1854. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  1855. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 7 done',p);
  1856. { take care of the register (de)allocs following p }
  1857. UpdateUsedRegs(tai(p.next));
  1858. asml.remove(p);
  1859. p.free;
  1860. p:=hp1;
  1861. Result:=true;
  1862. Exit;
  1863. end;
  1864. end;
  1865. else
  1866. Internalerror(2019103001);
  1867. end;
  1868. end;
  1869. { Change
  1870. mov %reg1, %reg2
  1871. xxx %reg2, ???
  1872. to
  1873. mov %reg1, %reg2
  1874. xxx %reg1, ???
  1875. to avoid a write/read penalty
  1876. }
  1877. if GetNextInstruction_p and
  1878. MatchOpType(taicpu(p),top_reg,top_reg) and
  1879. ((MatchInstruction(hp1,A_OR,A_AND,A_TEST,[]) and
  1880. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1881. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1882. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg)) or
  1883. (MatchInstruction(hp1,A_CMP,[]) and
  1884. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1885. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1886. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg)
  1887. )
  1888. ) then
  1889. { we have
  1890. mov %reg1, %reg2
  1891. test/or/and %reg2, %reg2
  1892. }
  1893. begin
  1894. TransferUsedRegs(TmpUsedRegs);
  1895. { reg1 will be used after the first instruction,
  1896. so update the allocation info }
  1897. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1898. if GetNextInstruction(hp1, hp2) and
  1899. (hp2.typ = ait_instruction) and
  1900. taicpu(hp2).is_jmp and
  1901. not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1902. { change
  1903. mov %reg1, %reg2
  1904. test/or/and %reg2, %reg2
  1905. jxx
  1906. to
  1907. test %reg1, %reg1
  1908. jxx
  1909. }
  1910. begin
  1911. if taicpu(hp1).opcode<>A_CMP then
  1912. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1913. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1914. DebugMsg(SPeepholeOptimization + 'MovTest/Cmp/Or/AndJxx2Test/Cmp/Or/AndJxx done',p);
  1915. asml.remove(p);
  1916. p.free;
  1917. p := hp1;
  1918. Exit;
  1919. end
  1920. else
  1921. { change
  1922. mov %reg1, %reg2
  1923. test/or/and %reg2, %reg2
  1924. to
  1925. mov %reg1, %reg2
  1926. test/or/and %reg1, %reg1
  1927. }
  1928. begin
  1929. if taicpu(hp1).opcode<>A_CMP then
  1930. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1931. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1932. DebugMsg(SPeepholeOptimization + 'MovTest/Cmp/Or/AndJxx2MovTest/Cmp/Or/AndJxx done',p);
  1933. end;
  1934. end;
  1935. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1936. x >= RetOffset) as it doesn't do anything (it writes either to a
  1937. parameter or to the temporary storage room for the function
  1938. result)
  1939. }
  1940. if GetNextInstruction_p and
  1941. IsExitCode(hp1) and
  1942. MatchOpType(taicpu(p),top_reg,top_ref) and
  1943. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1944. not(assigned(current_procinfo.procdef.funcretsym) and
  1945. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1946. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1947. begin
  1948. asml.remove(p);
  1949. p.free;
  1950. p:=hp1;
  1951. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  1952. RemoveLastDeallocForFuncRes(p);
  1953. Result:=true;
  1954. exit;
  1955. end;
  1956. if GetNextInstruction_p and
  1957. MatchOpType(taicpu(p),top_reg,top_ref) and
  1958. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1959. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1960. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1961. begin
  1962. { change
  1963. mov reg1, mem1
  1964. test/cmp x, mem1
  1965. to
  1966. mov reg1, mem1
  1967. test/cmp x, reg1
  1968. }
  1969. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1970. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  1971. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1972. exit;
  1973. end;
  1974. if GetNextInstruction_p and
  1975. (taicpu(p).oper[1]^.typ = top_reg) and
  1976. (hp1.typ = ait_instruction) and
  1977. GetNextInstruction(hp1, hp2) and
  1978. MatchInstruction(hp2,A_MOV,[]) and
  1979. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1980. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1981. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  1982. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1983. ) then
  1984. begin
  1985. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1986. (taicpu(hp2).oper[0]^.typ=top_reg) then
  1987. { change movsX/movzX reg/ref, reg2
  1988. add/sub/or/... reg3/$const, reg2
  1989. mov reg2 reg/ref
  1990. dealloc reg2
  1991. to
  1992. add/sub/or/... reg3/$const, reg/ref }
  1993. begin
  1994. TransferUsedRegs(TmpUsedRegs);
  1995. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1996. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1997. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1998. begin
  1999. { by example:
  2000. movswl %si,%eax movswl %si,%eax p
  2001. decl %eax addl %edx,%eax hp1
  2002. movw %ax,%si movw %ax,%si hp2
  2003. ->
  2004. movswl %si,%eax movswl %si,%eax p
  2005. decw %eax addw %edx,%eax hp1
  2006. movw %ax,%si movw %ax,%si hp2
  2007. }
  2008. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  2009. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  2010. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  2011. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  2012. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2013. {
  2014. ->
  2015. movswl %si,%eax movswl %si,%eax p
  2016. decw %si addw %dx,%si hp1
  2017. movw %ax,%si movw %ax,%si hp2
  2018. }
  2019. case taicpu(hp1).ops of
  2020. 1:
  2021. begin
  2022. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  2023. if taicpu(hp1).oper[0]^.typ=top_reg then
  2024. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2025. end;
  2026. 2:
  2027. begin
  2028. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  2029. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  2030. (taicpu(hp1).opcode<>A_SHL) and
  2031. (taicpu(hp1).opcode<>A_SHR) and
  2032. (taicpu(hp1).opcode<>A_SAR) then
  2033. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2034. end;
  2035. else
  2036. internalerror(2008042701);
  2037. end;
  2038. {
  2039. ->
  2040. decw %si addw %dx,%si p
  2041. }
  2042. asml.remove(hp2);
  2043. hp2.Free;
  2044. RemoveCurrentP(p);
  2045. Result:=True;
  2046. Exit;
  2047. end;
  2048. end;
  2049. if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2050. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  2051. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  2052. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  2053. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  2054. )
  2055. {$ifdef i386}
  2056. { byte registers of esi, edi, ebp, esp are not available on i386 }
  2057. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  2058. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  2059. {$endif i386}
  2060. then
  2061. { change movsX/movzX reg/ref, reg2
  2062. add/sub/or/... regX/$const, reg2
  2063. mov reg2, reg3
  2064. dealloc reg2
  2065. to
  2066. movsX/movzX reg/ref, reg3
  2067. add/sub/or/... reg3/$const, reg3
  2068. }
  2069. begin
  2070. TransferUsedRegs(TmpUsedRegs);
  2071. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2072. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2073. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  2074. begin
  2075. { by example:
  2076. movswl %si,%eax movswl %si,%eax p
  2077. decl %eax addl %edx,%eax hp1
  2078. movw %ax,%si movw %ax,%si hp2
  2079. ->
  2080. movswl %si,%eax movswl %si,%eax p
  2081. decw %eax addw %edx,%eax hp1
  2082. movw %ax,%si movw %ax,%si hp2
  2083. }
  2084. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  2085. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  2086. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  2087. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  2088. { limit size of constants as well to avoid assembler errors, but
  2089. check opsize to avoid overflow when left shifting the 1 }
  2090. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=63) then
  2091. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl topsize2memsize[taicpu(hp2).opsize])-1);
  2092. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2093. taicpu(p).changeopsize(taicpu(hp2).opsize);
  2094. if taicpu(p).oper[0]^.typ=top_reg then
  2095. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2096. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  2097. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  2098. {
  2099. ->
  2100. movswl %si,%eax movswl %si,%eax p
  2101. decw %si addw %dx,%si hp1
  2102. movw %ax,%si movw %ax,%si hp2
  2103. }
  2104. case taicpu(hp1).ops of
  2105. 1:
  2106. begin
  2107. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  2108. if taicpu(hp1).oper[0]^.typ=top_reg then
  2109. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2110. end;
  2111. 2:
  2112. begin
  2113. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  2114. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  2115. (taicpu(hp1).opcode<>A_SHL) and
  2116. (taicpu(hp1).opcode<>A_SHR) and
  2117. (taicpu(hp1).opcode<>A_SAR) then
  2118. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2119. end;
  2120. else
  2121. internalerror(2018111801);
  2122. end;
  2123. {
  2124. ->
  2125. decw %si addw %dx,%si p
  2126. }
  2127. asml.remove(hp2);
  2128. hp2.Free;
  2129. end;
  2130. end;
  2131. end;
  2132. if GetNextInstruction_p and
  2133. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  2134. GetNextInstruction(hp1, hp2) and
  2135. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  2136. MatchOperand(Taicpu(p).oper[0]^,0) and
  2137. (Taicpu(p).oper[1]^.typ = top_reg) and
  2138. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  2139. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  2140. { mov reg1,0
  2141. bts reg1,operand1 --> mov reg1,operand2
  2142. or reg1,operand2 bts reg1,operand1}
  2143. begin
  2144. Taicpu(hp2).opcode:=A_MOV;
  2145. asml.remove(hp1);
  2146. insertllitem(hp2,hp2.next,hp1);
  2147. asml.remove(p);
  2148. p.free;
  2149. p:=hp1;
  2150. Result:=true;
  2151. exit;
  2152. end;
  2153. if GetNextInstruction_p and
  2154. MatchInstruction(hp1,A_LEA,[S_L]) and
  2155. MatchOpType(Taicpu(p),top_ref,top_reg) and
  2156. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  2157. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  2158. ) or
  2159. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  2160. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  2161. )
  2162. ) then
  2163. { mov reg1,ref
  2164. lea reg2,[reg1,reg2]
  2165. to
  2166. add reg2,ref}
  2167. begin
  2168. TransferUsedRegs(TmpUsedRegs);
  2169. { reg1 may not be used afterwards }
  2170. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  2171. begin
  2172. Taicpu(hp1).opcode:=A_ADD;
  2173. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  2174. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  2175. asml.remove(p);
  2176. p.free;
  2177. p:=hp1;
  2178. result:=true;
  2179. exit;
  2180. end;
  2181. end;
  2182. end;
  2183. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  2184. var
  2185. hp1 : tai;
  2186. begin
  2187. Result:=false;
  2188. if taicpu(p).ops <> 2 then
  2189. exit;
  2190. if GetNextInstruction(p,hp1) and
  2191. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  2192. (taicpu(hp1).ops = 2) then
  2193. begin
  2194. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2195. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2196. { movXX reg1, mem1 or movXX mem1, reg1
  2197. movXX mem2, reg2 movXX reg2, mem2}
  2198. begin
  2199. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2200. { movXX reg1, mem1 or movXX mem1, reg1
  2201. movXX mem2, reg1 movXX reg2, mem1}
  2202. begin
  2203. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2204. begin
  2205. { Removes the second statement from
  2206. movXX reg1, mem1/reg2
  2207. movXX mem1/reg2, reg1
  2208. }
  2209. if taicpu(p).oper[0]^.typ=top_reg then
  2210. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2211. { Removes the second statement from
  2212. movXX mem1/reg1, reg2
  2213. movXX reg2, mem1/reg1
  2214. }
  2215. if (taicpu(p).oper[1]^.typ=top_reg) and
  2216. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  2217. begin
  2218. asml.remove(p);
  2219. p.free;
  2220. GetNextInstruction(hp1,p);
  2221. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  2222. end
  2223. else
  2224. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  2225. asml.remove(hp1);
  2226. hp1.free;
  2227. Result:=true;
  2228. exit;
  2229. end
  2230. end;
  2231. end;
  2232. end;
  2233. end;
  2234. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  2235. var
  2236. hp1 : tai;
  2237. begin
  2238. result:=false;
  2239. { replace
  2240. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  2241. MovX %mreg2,%mreg1
  2242. dealloc %mreg2
  2243. by
  2244. <Op>X %mreg2,%mreg1
  2245. ?
  2246. }
  2247. if GetNextInstruction(p,hp1) and
  2248. { we mix single and double opperations here because we assume that the compiler
  2249. generates vmovapd only after double operations and vmovaps only after single operations }
  2250. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  2251. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2252. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  2253. (taicpu(p).oper[0]^.typ=top_reg) then
  2254. begin
  2255. TransferUsedRegs(TmpUsedRegs);
  2256. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2257. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2258. begin
  2259. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  2260. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2261. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  2262. asml.Remove(hp1);
  2263. hp1.Free;
  2264. result:=true;
  2265. end;
  2266. end;
  2267. end;
  2268. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  2269. var
  2270. hp1, hp2, hp3: tai;
  2271. l : ASizeInt;
  2272. ref: Integer;
  2273. saveref: treference;
  2274. begin
  2275. Result:=false;
  2276. { removes seg register prefixes from LEA operations, as they
  2277. don't do anything}
  2278. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  2279. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  2280. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2281. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  2282. { do not mess with leas acessing the stack pointer }
  2283. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  2284. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  2285. begin
  2286. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2287. (taicpu(p).oper[0]^.ref^.offset = 0) then
  2288. begin
  2289. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  2290. taicpu(p).oper[1]^.reg);
  2291. InsertLLItem(p.previous,p.next, hp1);
  2292. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  2293. p.free;
  2294. p:=hp1;
  2295. Result:=true;
  2296. exit;
  2297. end
  2298. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  2299. begin
  2300. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  2301. RemoveCurrentP(p);
  2302. Result:=true;
  2303. exit;
  2304. end
  2305. { continue to use lea to adjust the stack pointer,
  2306. it is the recommended way, but only if not optimizing for size }
  2307. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  2308. (cs_opt_size in current_settings.optimizerswitches) then
  2309. with taicpu(p).oper[0]^.ref^ do
  2310. if (base = taicpu(p).oper[1]^.reg) then
  2311. begin
  2312. l:=offset;
  2313. if (l=1) and UseIncDec then
  2314. begin
  2315. taicpu(p).opcode:=A_INC;
  2316. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2317. taicpu(p).ops:=1;
  2318. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  2319. end
  2320. else if (l=-1) and UseIncDec then
  2321. begin
  2322. taicpu(p).opcode:=A_DEC;
  2323. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2324. taicpu(p).ops:=1;
  2325. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2326. end
  2327. else
  2328. begin
  2329. if (l<0) and (l<>-2147483648) then
  2330. begin
  2331. taicpu(p).opcode:=A_SUB;
  2332. taicpu(p).loadConst(0,-l);
  2333. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2334. end
  2335. else
  2336. begin
  2337. taicpu(p).opcode:=A_ADD;
  2338. taicpu(p).loadConst(0,l);
  2339. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2340. end;
  2341. end;
  2342. Result:=true;
  2343. exit;
  2344. end;
  2345. end;
  2346. if GetNextInstruction(p,hp1) and
  2347. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  2348. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2349. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  2350. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  2351. begin
  2352. TransferUsedRegs(TmpUsedRegs);
  2353. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2354. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2355. begin
  2356. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2357. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  2358. asml.Remove(hp1);
  2359. hp1.Free;
  2360. result:=true;
  2361. end;
  2362. end;
  2363. { changes
  2364. lea offset1(regX), reg1
  2365. lea offset2(reg1), reg1
  2366. to
  2367. lea offset1+offset2(regX), reg1 }
  2368. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  2369. MatchInstruction(hp1,A_LEA,[S_L]) and
  2370. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2371. (taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg) and
  2372. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2373. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2374. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2375. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2376. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2377. (taicpu(p).oper[0]^.ref^.index=taicpu(hp1).oper[0]^.ref^.index) and
  2378. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp1).oper[0]^.ref^.relsymbol) and
  2379. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp1).oper[0]^.ref^.scalefactor) and
  2380. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp1).oper[0]^.ref^.segment) and
  2381. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp1).oper[0]^.ref^.symbol) then
  2382. begin
  2383. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea done',p);
  2384. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2385. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2386. RemoveCurrentP(p);
  2387. result:=true;
  2388. exit;
  2389. end;
  2390. { changes
  2391. lea <ref1>, reg1
  2392. <op> ...,<ref. with reg1>,...
  2393. to
  2394. <op> ...,<ref1>,... }
  2395. if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
  2396. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
  2397. GetNextInstruction(p,hp1) and
  2398. (hp1.typ=ait_instruction) and
  2399. not(MatchInstruction(hp1,A_LEA,[])) then
  2400. begin
  2401. { find a reference which uses reg1 }
  2402. if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
  2403. ref:=0
  2404. else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
  2405. ref:=1
  2406. else
  2407. ref:=-1;
  2408. if (ref<>-1) and
  2409. { reg1 must be either the base or the index }
  2410. ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
  2411. begin
  2412. { reg1 can be removed from the reference }
  2413. saveref:=taicpu(hp1).oper[ref]^.ref^;
  2414. if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
  2415. taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
  2416. else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
  2417. taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
  2418. else
  2419. Internalerror(2019111201);
  2420. { check if the can insert all data of the lea into the second instruction }
  2421. if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2422. ((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
  2423. ((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
  2424. ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
  2425. ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
  2426. ((taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2427. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
  2428. {$ifdef x86_64}
  2429. and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
  2430. and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
  2431. ((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
  2432. )
  2433. {$endif x86_64}
  2434. then
  2435. begin
  2436. { reg1 might not used by the second instruction after it is remove from the reference }
  2437. if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
  2438. begin
  2439. TransferUsedRegs(TmpUsedRegs);
  2440. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2441. { reg1 is not updated so it might not be used afterwards }
  2442. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2443. begin
  2444. DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
  2445. if taicpu(p).oper[0]^.ref^.base<>NR_NO then
  2446. taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2447. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  2448. taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  2449. if taicpu(p).oper[0]^.ref^.symbol<>nil then
  2450. taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
  2451. if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
  2452. taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
  2453. if not(taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) then
  2454. taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  2455. inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2456. RemoveCurrentP(p);
  2457. result:=true;
  2458. exit;
  2459. end
  2460. end;
  2461. end;
  2462. { recover }
  2463. taicpu(hp1).oper[ref]^.ref^:=saveref;
  2464. end;
  2465. end;
  2466. { replace
  2467. lea x(stackpointer),stackpointer
  2468. call procname
  2469. lea -x(stackpointer),stackpointer
  2470. ret
  2471. by
  2472. jmp procname
  2473. this should never hurt except when pic is used, not sure
  2474. how to handle it then
  2475. but do it only on level 4 because it destroys stack back traces
  2476. }
  2477. if (cs_opt_level4 in current_settings.optimizerswitches) and
  2478. not(cs_create_pic in current_settings.moduleswitches) and
  2479. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2480. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  2481. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2482. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2483. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2484. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2485. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2486. GetNextInstruction(p, hp1) and
  2487. MatchInstruction(hp1,A_CALL,[S_NO]) and
  2488. GetNextInstruction(hp1, hp2) and
  2489. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  2490. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2491. (taicpu(p).oper[0]^.ref^.base=taicpu(hp2).oper[0]^.ref^.base) and
  2492. (taicpu(p).oper[0]^.ref^.index=taicpu(hp2).oper[0]^.ref^.index) and
  2493. (taicpu(p).oper[0]^.ref^.offset=-taicpu(hp2).oper[0]^.ref^.offset) and
  2494. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp2).oper[0]^.ref^.relsymbol) and
  2495. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp2).oper[0]^.ref^.scalefactor) and
  2496. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp2).oper[0]^.ref^.segment) and
  2497. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp2).oper[0]^.ref^.symbol) and
  2498. GetNextInstruction(hp2, hp3) and
  2499. MatchInstruction(hp3,A_RET,[S_NO]) and
  2500. (taicpu(hp3).ops=0) then
  2501. begin
  2502. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  2503. taicpu(hp1).opcode:=A_JMP;
  2504. taicpu(hp1).is_jmp:=true;
  2505. asml.remove(p);
  2506. asml.remove(hp2);
  2507. asml.remove(hp3);
  2508. p.free;
  2509. hp2.free;
  2510. hp3.free;
  2511. p:=hp1;
  2512. Result:=true;
  2513. end;
  2514. end;
  2515. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  2516. var
  2517. hp1 : tai;
  2518. begin
  2519. DoSubAddOpt := False;
  2520. if GetLastInstruction(p, hp1) and
  2521. (hp1.typ = ait_instruction) and
  2522. (taicpu(hp1).opsize = taicpu(p).opsize) then
  2523. case taicpu(hp1).opcode Of
  2524. A_DEC:
  2525. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  2526. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2527. begin
  2528. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  2529. asml.remove(hp1);
  2530. hp1.free;
  2531. end;
  2532. A_SUB:
  2533. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2534. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2535. begin
  2536. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  2537. asml.remove(hp1);
  2538. hp1.free;
  2539. end;
  2540. A_ADD:
  2541. begin
  2542. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2543. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2544. begin
  2545. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  2546. asml.remove(hp1);
  2547. hp1.free;
  2548. if (taicpu(p).oper[0]^.val = 0) then
  2549. begin
  2550. hp1 := tai(p.next);
  2551. asml.remove(p);
  2552. p.free;
  2553. if not GetLastInstruction(hp1, p) then
  2554. p := hp1;
  2555. DoSubAddOpt := True;
  2556. end
  2557. end;
  2558. end;
  2559. else
  2560. ;
  2561. end;
  2562. end;
  2563. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  2564. {$ifdef i386}
  2565. var
  2566. hp1 : tai;
  2567. {$endif i386}
  2568. begin
  2569. Result:=false;
  2570. { * change "subl $2, %esp; pushw x" to "pushl x"}
  2571. { * change "sub/add const1, reg" or "dec reg" followed by
  2572. "sub const2, reg" to one "sub ..., reg" }
  2573. if MatchOpType(taicpu(p),top_const,top_reg) then
  2574. begin
  2575. {$ifdef i386}
  2576. if (taicpu(p).oper[0]^.val = 2) and
  2577. (taicpu(p).oper[1]^.reg = NR_ESP) and
  2578. { Don't do the sub/push optimization if the sub }
  2579. { comes from setting up the stack frame (JM) }
  2580. (not(GetLastInstruction(p,hp1)) or
  2581. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  2582. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  2583. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  2584. begin
  2585. hp1 := tai(p.next);
  2586. while Assigned(hp1) and
  2587. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  2588. not RegReadByInstruction(NR_ESP,hp1) and
  2589. not RegModifiedByInstruction(NR_ESP,hp1) do
  2590. hp1 := tai(hp1.next);
  2591. if Assigned(hp1) and
  2592. MatchInstruction(hp1,A_PUSH,[S_W]) then
  2593. begin
  2594. taicpu(hp1).changeopsize(S_L);
  2595. if taicpu(hp1).oper[0]^.typ=top_reg then
  2596. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  2597. hp1 := tai(p.next);
  2598. asml.remove(p);
  2599. p.free;
  2600. p := hp1;
  2601. Result:=true;
  2602. exit;
  2603. end;
  2604. end;
  2605. {$endif i386}
  2606. if DoSubAddOpt(p) then
  2607. Result:=true;
  2608. end;
  2609. end;
  2610. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  2611. var
  2612. TmpBool1,TmpBool2 : Boolean;
  2613. tmpref : treference;
  2614. hp1,hp2: tai;
  2615. begin
  2616. Result:=false;
  2617. if MatchOpType(taicpu(p),top_const,top_reg) and
  2618. (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  2619. (taicpu(p).oper[0]^.val <= 3) then
  2620. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  2621. begin
  2622. { should we check the next instruction? }
  2623. TmpBool1 := True;
  2624. { have we found an add/sub which could be
  2625. integrated in the lea? }
  2626. TmpBool2 := False;
  2627. reference_reset(tmpref,2,[]);
  2628. TmpRef.index := taicpu(p).oper[1]^.reg;
  2629. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2630. while TmpBool1 and
  2631. GetNextInstruction(p, hp1) and
  2632. (tai(hp1).typ = ait_instruction) and
  2633. ((((taicpu(hp1).opcode = A_ADD) or
  2634. (taicpu(hp1).opcode = A_SUB)) and
  2635. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  2636. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  2637. (((taicpu(hp1).opcode = A_INC) or
  2638. (taicpu(hp1).opcode = A_DEC)) and
  2639. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2640. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg)) or
  2641. ((taicpu(hp1).opcode = A_LEA) and
  2642. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  2643. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg))) and
  2644. (not GetNextInstruction(hp1,hp2) or
  2645. not instrReadsFlags(hp2)) Do
  2646. begin
  2647. TmpBool1 := False;
  2648. if taicpu(hp1).opcode=A_LEA then
  2649. begin
  2650. if (TmpRef.base = NR_NO) and
  2651. (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
  2652. (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
  2653. (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
  2654. ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
  2655. (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
  2656. begin
  2657. TmpBool1 := True;
  2658. TmpBool2 := True;
  2659. inc(TmpRef.offset, taicpu(hp1).oper[0]^.ref^.offset);
  2660. if taicpu(hp1).oper[0]^.ref^.scalefactor<>0 then
  2661. tmpref.scalefactor:=tmpref.scalefactor*taicpu(hp1).oper[0]^.ref^.scalefactor;
  2662. TmpRef.base := taicpu(hp1).oper[0]^.ref^.base;
  2663. asml.remove(hp1);
  2664. hp1.free;
  2665. end
  2666. end
  2667. else if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  2668. begin
  2669. TmpBool1 := True;
  2670. TmpBool2 := True;
  2671. case taicpu(hp1).opcode of
  2672. A_ADD:
  2673. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2674. A_SUB:
  2675. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2676. else
  2677. internalerror(2019050536);
  2678. end;
  2679. asml.remove(hp1);
  2680. hp1.free;
  2681. end
  2682. else
  2683. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2684. (((taicpu(hp1).opcode = A_ADD) and
  2685. (TmpRef.base = NR_NO)) or
  2686. (taicpu(hp1).opcode = A_INC) or
  2687. (taicpu(hp1).opcode = A_DEC)) then
  2688. begin
  2689. TmpBool1 := True;
  2690. TmpBool2 := True;
  2691. case taicpu(hp1).opcode of
  2692. A_ADD:
  2693. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  2694. A_INC:
  2695. inc(TmpRef.offset);
  2696. A_DEC:
  2697. dec(TmpRef.offset);
  2698. else
  2699. internalerror(2019050535);
  2700. end;
  2701. asml.remove(hp1);
  2702. hp1.free;
  2703. end;
  2704. end;
  2705. if TmpBool2
  2706. {$ifndef x86_64}
  2707. or
  2708. ((current_settings.optimizecputype < cpu_Pentium2) and
  2709. (taicpu(p).oper[0]^.val <= 3) and
  2710. not(cs_opt_size in current_settings.optimizerswitches))
  2711. {$endif x86_64}
  2712. then
  2713. begin
  2714. if not(TmpBool2) and
  2715. (taicpu(p).oper[0]^.val=1) then
  2716. begin
  2717. hp1:=taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2718. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  2719. end
  2720. else
  2721. hp1:=taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  2722. taicpu(p).oper[1]^.reg);
  2723. DebugMsg(SPeepholeOptimization + 'ShlAddLeaSubIncDec2Lea',p);
  2724. InsertLLItem(p.previous, p.next, hp1);
  2725. p.free;
  2726. p := hp1;
  2727. end;
  2728. end
  2729. {$ifndef x86_64}
  2730. else if (current_settings.optimizecputype < cpu_Pentium2) and
  2731. MatchOpType(taicpu(p),top_const,top_reg) then
  2732. begin
  2733. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  2734. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  2735. (unlike shl, which is only Tairable in the U pipe) }
  2736. if taicpu(p).oper[0]^.val=1 then
  2737. begin
  2738. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2739. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2740. InsertLLItem(p.previous, p.next, hp1);
  2741. p.free;
  2742. p := hp1;
  2743. end
  2744. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  2745. "shl $3, %reg" to "lea (,%reg,8), %reg }
  2746. else if (taicpu(p).opsize = S_L) and
  2747. (taicpu(p).oper[0]^.val<= 3) then
  2748. begin
  2749. reference_reset(tmpref,2,[]);
  2750. TmpRef.index := taicpu(p).oper[1]^.reg;
  2751. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2752. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  2753. InsertLLItem(p.previous, p.next, hp1);
  2754. p.free;
  2755. p := hp1;
  2756. end;
  2757. end
  2758. {$endif x86_64}
  2759. ;
  2760. end;
  2761. function TX86AsmOptimizer.OptPass1SETcc(var p: tai): boolean;
  2762. var
  2763. hp1,hp2,next: tai; SetC, JumpC: TAsmCond; Unconditional: Boolean;
  2764. begin
  2765. Result:=false;
  2766. if MatchOpType(taicpu(p),top_reg) and
  2767. GetNextInstruction(p, hp1) and
  2768. ((MatchInstruction(hp1, A_TEST, [S_B]) and
  2769. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2770. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg)) or
  2771. (MatchInstruction(hp1, A_CMP, [S_B]) and
  2772. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2773. (taicpu(hp1).oper[0]^.val=0))
  2774. ) and
  2775. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  2776. GetNextInstruction(hp1, hp2) and
  2777. MatchInstruction(hp2, A_Jcc, []) then
  2778. { Change from: To:
  2779. set(C) %reg j(~C) label
  2780. test %reg,%reg/cmp $0,%reg
  2781. je label
  2782. set(C) %reg j(C) label
  2783. test %reg,%reg/cmp $0,%reg
  2784. jne label
  2785. }
  2786. begin
  2787. next := tai(p.Next);
  2788. TransferUsedRegs(TmpUsedRegs);
  2789. UpdateUsedRegs(TmpUsedRegs, next);
  2790. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2791. JumpC := taicpu(hp2).condition;
  2792. Unconditional := False;
  2793. if conditions_equal(JumpC, C_E) then
  2794. SetC := inverse_cond(taicpu(p).condition)
  2795. else if conditions_equal(JumpC, C_NE) then
  2796. SetC := taicpu(p).condition
  2797. else
  2798. { We've got something weird here (and inefficent) }
  2799. begin
  2800. DebugMsg('DEBUG: Inefficient jump - check code generation', p);
  2801. SetC := C_NONE;
  2802. { JAE/JNB will always branch (use 'condition_in', since C_AE <> C_NB normally) }
  2803. if condition_in(C_AE, JumpC) then
  2804. Unconditional := True
  2805. else
  2806. { Not sure what to do with this jump - drop out }
  2807. Exit;
  2808. end;
  2809. asml.Remove(hp1);
  2810. hp1.Free;
  2811. if Unconditional then
  2812. MakeUnconditional(taicpu(hp2))
  2813. else
  2814. begin
  2815. if SetC = C_NONE then
  2816. InternalError(2018061401);
  2817. taicpu(hp2).SetCondition(SetC);
  2818. end;
  2819. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs) then
  2820. begin
  2821. asml.Remove(p);
  2822. UpdateUsedRegs(next);
  2823. p.Free;
  2824. Result := True;
  2825. p := hp2;
  2826. end;
  2827. DebugMsg(SPeepholeOptimization + 'SETcc/TESTCmp/Jcc -> Jcc',p);
  2828. end;
  2829. end;
  2830. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  2831. { returns true if a "continue" should be done after this optimization }
  2832. var
  2833. hp1, hp2: tai;
  2834. begin
  2835. Result := false;
  2836. if MatchOpType(taicpu(p),top_ref) and
  2837. GetNextInstruction(p, hp1) and
  2838. (hp1.typ = ait_instruction) and
  2839. (((taicpu(hp1).opcode = A_FLD) and
  2840. (taicpu(p).opcode = A_FSTP)) or
  2841. ((taicpu(p).opcode = A_FISTP) and
  2842. (taicpu(hp1).opcode = A_FILD))) and
  2843. MatchOpType(taicpu(hp1),top_ref) and
  2844. (taicpu(hp1).opsize = taicpu(p).opsize) and
  2845. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2846. begin
  2847. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  2848. if (taicpu(p).opsize=S_FX) and
  2849. GetNextInstruction(hp1, hp2) and
  2850. (hp2.typ = ait_instruction) and
  2851. IsExitCode(hp2) and
  2852. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  2853. not(assigned(current_procinfo.procdef.funcretsym) and
  2854. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  2855. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  2856. begin
  2857. asml.remove(p);
  2858. asml.remove(hp1);
  2859. p.free;
  2860. hp1.free;
  2861. p := hp2;
  2862. RemoveLastDeallocForFuncRes(p);
  2863. Result := true;
  2864. end
  2865. (* can't be done because the store operation rounds
  2866. else
  2867. { fst can't store an extended value! }
  2868. if (taicpu(p).opsize <> S_FX) and
  2869. (taicpu(p).opsize <> S_IQ) then
  2870. begin
  2871. if (taicpu(p).opcode = A_FSTP) then
  2872. taicpu(p).opcode := A_FST
  2873. else taicpu(p).opcode := A_FIST;
  2874. asml.remove(hp1);
  2875. hp1.free;
  2876. end
  2877. *)
  2878. end;
  2879. end;
  2880. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  2881. var
  2882. hp1, hp2: tai;
  2883. begin
  2884. result:=false;
  2885. if MatchOpType(taicpu(p),top_reg) and
  2886. GetNextInstruction(p, hp1) and
  2887. (hp1.typ = Ait_Instruction) and
  2888. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2889. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  2890. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  2891. { change to
  2892. fld reg fxxx reg,st
  2893. fxxxp st, st1 (hp1)
  2894. Remark: non commutative operations must be reversed!
  2895. }
  2896. begin
  2897. case taicpu(hp1).opcode Of
  2898. A_FMULP,A_FADDP,
  2899. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2900. begin
  2901. case taicpu(hp1).opcode Of
  2902. A_FADDP: taicpu(hp1).opcode := A_FADD;
  2903. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  2904. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  2905. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  2906. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  2907. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  2908. else
  2909. internalerror(2019050534);
  2910. end;
  2911. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  2912. taicpu(hp1).oper[1]^.reg := NR_ST;
  2913. asml.remove(p);
  2914. p.free;
  2915. p := hp1;
  2916. Result:=true;
  2917. exit;
  2918. end;
  2919. else
  2920. ;
  2921. end;
  2922. end
  2923. else
  2924. if MatchOpType(taicpu(p),top_ref) and
  2925. GetNextInstruction(p, hp2) and
  2926. (hp2.typ = Ait_Instruction) and
  2927. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2928. (taicpu(p).opsize in [S_FS, S_FL]) and
  2929. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  2930. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  2931. if GetLastInstruction(p, hp1) and
  2932. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  2933. MatchOpType(taicpu(hp1),top_ref) and
  2934. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2935. if ((taicpu(hp2).opcode = A_FMULP) or
  2936. (taicpu(hp2).opcode = A_FADDP)) then
  2937. { change to
  2938. fld/fst mem1 (hp1) fld/fst mem1
  2939. fld mem1 (p) fadd/
  2940. faddp/ fmul st, st
  2941. fmulp st, st1 (hp2) }
  2942. begin
  2943. asml.remove(p);
  2944. p.free;
  2945. p := hp1;
  2946. if (taicpu(hp2).opcode = A_FADDP) then
  2947. taicpu(hp2).opcode := A_FADD
  2948. else
  2949. taicpu(hp2).opcode := A_FMUL;
  2950. taicpu(hp2).oper[1]^.reg := NR_ST;
  2951. end
  2952. else
  2953. { change to
  2954. fld/fst mem1 (hp1) fld/fst mem1
  2955. fld mem1 (p) fld st}
  2956. begin
  2957. taicpu(p).changeopsize(S_FL);
  2958. taicpu(p).loadreg(0,NR_ST);
  2959. end
  2960. else
  2961. begin
  2962. case taicpu(hp2).opcode Of
  2963. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2964. { change to
  2965. fld/fst mem1 (hp1) fld/fst mem1
  2966. fld mem2 (p) fxxx mem2
  2967. fxxxp st, st1 (hp2) }
  2968. begin
  2969. case taicpu(hp2).opcode Of
  2970. A_FADDP: taicpu(p).opcode := A_FADD;
  2971. A_FMULP: taicpu(p).opcode := A_FMUL;
  2972. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  2973. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  2974. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  2975. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  2976. else
  2977. internalerror(2019050533);
  2978. end;
  2979. asml.remove(hp2);
  2980. hp2.free;
  2981. end
  2982. else
  2983. ;
  2984. end
  2985. end
  2986. end;
  2987. function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
  2988. var
  2989. v: TCGInt;
  2990. hp1, hp2, hp3, hp4: tai;
  2991. begin
  2992. Result:=false;
  2993. { cmp register,$8000 neg register
  2994. je target --> jo target
  2995. .... only if register is deallocated before jump.}
  2996. case Taicpu(p).opsize of
  2997. S_B: v:=$80;
  2998. S_W: v:=$8000;
  2999. S_L: v:=qword($80000000);
  3000. { actually, this will never happen: cmp with 64 bit constants is not possible }
  3001. S_Q : v:=Int64($8000000000000000);
  3002. else
  3003. internalerror(2013112905);
  3004. end;
  3005. if MatchOpType(taicpu(p),Top_const,top_reg) and
  3006. (taicpu(p).oper[0]^.val=v) and
  3007. GetNextInstruction(p, hp1) and
  3008. MatchInstruction(hp1,A_Jcc,[]) and
  3009. (Taicpu(hp1).condition in [C_E,C_NE]) then
  3010. begin
  3011. TransferUsedRegs(TmpUsedRegs);
  3012. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  3013. if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
  3014. begin
  3015. DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
  3016. Taicpu(p).opcode:=A_NEG;
  3017. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  3018. Taicpu(p).clearop(1);
  3019. Taicpu(p).ops:=1;
  3020. if Taicpu(hp1).condition=C_E then
  3021. Taicpu(hp1).condition:=C_O
  3022. else
  3023. Taicpu(hp1).condition:=C_NO;
  3024. Result:=true;
  3025. exit;
  3026. end;
  3027. end;
  3028. {
  3029. @@2: @@2:
  3030. .... ....
  3031. cmp operand1,0
  3032. jle/jbe @@1
  3033. dec operand1 --> sub operand1,1
  3034. jmp @@2 jge/jae @@2
  3035. @@1: @@1:
  3036. ... ....}
  3037. if (taicpu(p).oper[0]^.typ = top_const) and
  3038. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  3039. (taicpu(p).oper[0]^.val = 0) and
  3040. GetNextInstruction(p, hp1) and
  3041. MatchInstruction(hp1,A_Jcc,[]) and
  3042. (taicpu(hp1).condition in [C_LE,C_BE]) and
  3043. GetNextInstruction(hp1,hp2) and
  3044. MatchInstruction(hp1,A_DEC,[]) and
  3045. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  3046. GetNextInstruction(hp2, hp3) and
  3047. MatchInstruction(hp1,A_JMP,[]) and
  3048. GetNextInstruction(hp3, hp4) and
  3049. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  3050. begin
  3051. DebugMsg(SPeepholeOptimization + 'CmpJxxDecJmp2SubJcc done',p);
  3052. taicpu(hp2).Opcode := A_SUB;
  3053. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  3054. taicpu(hp2).loadConst(0,1);
  3055. taicpu(hp2).ops:=2;
  3056. taicpu(hp3).Opcode := A_Jcc;
  3057. case taicpu(hp1).condition of
  3058. C_LE: taicpu(hp3).condition := C_GE;
  3059. C_BE: taicpu(hp3).condition := C_AE;
  3060. else
  3061. internalerror(2019050903);
  3062. end;
  3063. asml.remove(p);
  3064. asml.remove(hp1);
  3065. p.free;
  3066. hp1.free;
  3067. p := hp2;
  3068. Result:=true;
  3069. exit;
  3070. end;
  3071. end;
  3072. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  3073. var
  3074. hp1,hp2: tai;
  3075. {$ifdef x86_64}
  3076. hp3: tai;
  3077. {$endif x86_64}
  3078. begin
  3079. Result:=false;
  3080. if not GetNextInstruction(p, hp1) then
  3081. Exit;
  3082. if MatchInstruction(hp1, A_JMP, [S_NO]) then
  3083. begin
  3084. { Sometimes the MOVs that OptPass2JMP produces can be improved
  3085. further, but we can't just put this jump optimisation in pass 1
  3086. because it tends to perform worse when conditional jumps are
  3087. nearby (e.g. when converting CMOV instructions). [Kit] }
  3088. if OptPass2JMP(hp1) then
  3089. { call OptPass1MOV once to potentially merge any MOVs that were created }
  3090. Result := OptPass1MOV(p)
  3091. { OptPass2MOV will now exit but will be called again if OptPass1MOV
  3092. returned True and the instruction is still a MOV, thus checking
  3093. the optimisations below }
  3094. else
  3095. { Since OptPass2JMP returned false, no optimisations were done to
  3096. the jump. Additionally, a label will definitely follow the jump
  3097. (although it may have become dead), so skip ahead as far as
  3098. possible }
  3099. begin
  3100. while (p <> hp1) do
  3101. begin
  3102. { Nothing changed between the MOV and the JMP, so
  3103. don't bother with "UpdateUsedRegsAndOptimize" }
  3104. UpdateUsedRegs(p);
  3105. p := tai(p.Next);
  3106. end;
  3107. { Use "UpdateUsedRegsAndOptimize" here though, because the
  3108. label might now be dead and can be stripped out }
  3109. p := tai(UpdateUsedRegsAndOptimize(hp1).Next);
  3110. { If p is a label, then Result will be False and program flow
  3111. will move onto the next list entry in "PeepHoleOptPass2" }
  3112. if (p = BlockEnd) or not (p.typ in [ait_align, ait_label]) then
  3113. Result := True;
  3114. end;
  3115. end
  3116. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3117. {$ifdef x86_64}
  3118. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  3119. {$else x86_64}
  3120. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  3121. {$endif x86_64}
  3122. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3123. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  3124. { mov reg1, reg2 mov reg1, reg2
  3125. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  3126. begin
  3127. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  3128. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  3129. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  3130. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  3131. TransferUsedRegs(TmpUsedRegs);
  3132. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3133. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  3134. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  3135. then
  3136. begin
  3137. asml.remove(p);
  3138. p.free;
  3139. p := hp1;
  3140. Result:=true;
  3141. end;
  3142. exit;
  3143. end
  3144. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3145. {$ifdef x86_64}
  3146. MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
  3147. {$else x86_64}
  3148. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  3149. {$endif x86_64}
  3150. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  3151. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  3152. or
  3153. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  3154. ) and
  3155. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  3156. { mov reg1, reg2
  3157. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  3158. begin
  3159. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  3160. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  3161. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  3162. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  3163. DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
  3164. asml.remove(p);
  3165. p.free;
  3166. p := hp1;
  3167. Result:=true;
  3168. exit;
  3169. end
  3170. else if (taicpu(p).oper[0]^.typ = top_ref) and
  3171. (hp1.typ = ait_instruction) and
  3172. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  3173. doing it separately in both branches allows to do the cheap checks
  3174. with low probability earlier }
  3175. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  3176. GetNextInstruction(hp1,hp2) and
  3177. MatchInstruction(hp2,A_MOV,[])
  3178. ) or
  3179. ((taicpu(hp1).opcode=A_LEA) and
  3180. GetNextInstruction(hp1,hp2) and
  3181. MatchInstruction(hp2,A_MOV,[]) and
  3182. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  3183. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  3184. ) or
  3185. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  3186. taicpu(p).oper[1]^.reg) and
  3187. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  3188. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  3189. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  3190. ) and
  3191. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  3192. )
  3193. ) and
  3194. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  3195. (taicpu(hp2).oper[1]^.typ = top_ref) then
  3196. begin
  3197. TransferUsedRegs(TmpUsedRegs);
  3198. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  3199. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  3200. if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and
  3201. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then
  3202. { change mov (ref), reg
  3203. add/sub/or/... reg2/$const, reg
  3204. mov reg, (ref)
  3205. # release reg
  3206. to add/sub/or/... reg2/$const, (ref) }
  3207. begin
  3208. case taicpu(hp1).opcode of
  3209. A_INC,A_DEC,A_NOT,A_NEG :
  3210. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  3211. A_LEA :
  3212. begin
  3213. taicpu(hp1).opcode:=A_ADD;
  3214. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  3215. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  3216. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  3217. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  3218. else
  3219. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  3220. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  3221. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  3222. end
  3223. else
  3224. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  3225. end;
  3226. asml.remove(p);
  3227. asml.remove(hp2);
  3228. p.free;
  3229. hp2.free;
  3230. p := hp1
  3231. end;
  3232. Exit;
  3233. {$ifdef x86_64}
  3234. end
  3235. else if (taicpu(p).opsize = S_L) and
  3236. (taicpu(p).oper[1]^.typ = top_reg) and
  3237. (
  3238. MatchInstruction(hp1, A_MOV,[]) and
  3239. (taicpu(hp1).opsize = S_L) and
  3240. (taicpu(hp1).oper[1]^.typ = top_reg)
  3241. ) and (
  3242. GetNextInstruction(hp1, hp2) and
  3243. (tai(hp2).typ=ait_instruction) and
  3244. (taicpu(hp2).opsize = S_Q) and
  3245. (
  3246. (
  3247. MatchInstruction(hp2, A_ADD,[]) and
  3248. (taicpu(hp2).opsize = S_Q) and
  3249. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  3250. (
  3251. (
  3252. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  3253. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3254. ) or (
  3255. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3256. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  3257. )
  3258. )
  3259. ) or (
  3260. MatchInstruction(hp2, A_LEA,[]) and
  3261. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  3262. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  3263. (
  3264. (
  3265. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  3266. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3267. ) or (
  3268. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3269. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  3270. )
  3271. ) and (
  3272. (
  3273. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3274. ) or (
  3275. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  3276. )
  3277. )
  3278. )
  3279. )
  3280. ) and (
  3281. GetNextInstruction(hp2, hp3) and
  3282. MatchInstruction(hp3, A_SHR,[]) and
  3283. (taicpu(hp3).opsize = S_Q) and
  3284. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  3285. (taicpu(hp3).oper[0]^.val = 1) and
  3286. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  3287. ) then
  3288. begin
  3289. { Change movl x, reg1d movl x, reg1d
  3290. movl y, reg2d movl y, reg2d
  3291. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  3292. shrq $1, reg1q shrq $1, reg1q
  3293. ( reg1d and reg2d can be switched around in the first two instructions )
  3294. To movl x, reg1d
  3295. addl y, reg1d
  3296. rcrl $1, reg1d
  3297. This corresponds to the common expression (x + y) shr 1, where
  3298. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  3299. smaller code, but won't account for x + y causing an overflow). [Kit]
  3300. }
  3301. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  3302. { Change first MOV command to have the same register as the final output }
  3303. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  3304. else
  3305. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  3306. { Change second MOV command to an ADD command. This is easier than
  3307. converting the existing command because it means we don't have to
  3308. touch 'y', which might be a complicated reference, and also the
  3309. fact that the third command might either be ADD or LEA. [Kit] }
  3310. taicpu(hp1).opcode := A_ADD;
  3311. { Delete old ADD/LEA instruction }
  3312. asml.remove(hp2);
  3313. hp2.free;
  3314. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  3315. taicpu(hp3).opcode := A_RCR;
  3316. taicpu(hp3).changeopsize(S_L);
  3317. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  3318. {$endif x86_64}
  3319. end;
  3320. end;
  3321. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  3322. var
  3323. hp1 : tai;
  3324. begin
  3325. Result:=false;
  3326. if (taicpu(p).ops >= 2) and
  3327. ((taicpu(p).oper[0]^.typ = top_const) or
  3328. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  3329. (taicpu(p).oper[1]^.typ = top_reg) and
  3330. ((taicpu(p).ops = 2) or
  3331. ((taicpu(p).oper[2]^.typ = top_reg) and
  3332. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  3333. GetLastInstruction(p,hp1) and
  3334. MatchInstruction(hp1,A_MOV,[]) and
  3335. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3336. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3337. begin
  3338. TransferUsedRegs(TmpUsedRegs);
  3339. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) or
  3340. ((taicpu(p).ops = 3) and (taicpu(p).oper[1]^.reg=taicpu(p).oper[2]^.reg)) then
  3341. { change
  3342. mov reg1,reg2
  3343. imul y,reg2 to imul y,reg1,reg2 }
  3344. begin
  3345. taicpu(p).ops := 3;
  3346. taicpu(p).loadreg(2,taicpu(p).oper[1]^.reg);
  3347. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  3348. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  3349. asml.remove(hp1);
  3350. hp1.free;
  3351. result:=true;
  3352. end;
  3353. end;
  3354. end;
  3355. procedure TX86AsmOptimizer.ConvertJumpToRET(const p: tai; const ret_p: tai);
  3356. var
  3357. ThisLabel: TAsmLabel;
  3358. begin
  3359. ThisLabel := tasmlabel(taicpu(p).oper[0]^.ref^.symbol);
  3360. ThisLabel.decrefs;
  3361. taicpu(p).opcode := A_RET;
  3362. taicpu(p).is_jmp := false;
  3363. taicpu(p).ops := taicpu(ret_p).ops;
  3364. case taicpu(ret_p).ops of
  3365. 0:
  3366. taicpu(p).clearop(0);
  3367. 1:
  3368. taicpu(p).loadconst(0,taicpu(ret_p).oper[0]^.val);
  3369. else
  3370. internalerror(2016041301);
  3371. end;
  3372. { If the original label is now dead, it might turn out that the label
  3373. immediately follows p. As a result, everything beyond it, which will
  3374. be just some final register configuration and a RET instruction, is
  3375. now dead code. [Kit] }
  3376. { NOTE: This is much faster than introducing a OptPass2RET routine and
  3377. running RemoveDeadCodeAfterJump for each RET instruction, because
  3378. this optimisation rarely happens and most RETs appear at the end of
  3379. routines where there is nothing that can be stripped. [Kit] }
  3380. if not ThisLabel.is_used then
  3381. RemoveDeadCodeAfterJump(p);
  3382. end;
  3383. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  3384. var
  3385. hp1, hp2 : tai;
  3386. begin
  3387. result:=false;
  3388. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  3389. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  3390. begin
  3391. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  3392. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ = ait_instruction) then
  3393. begin
  3394. case taicpu(hp1).opcode of
  3395. A_RET:
  3396. {
  3397. change
  3398. jmp .L1
  3399. ...
  3400. .L1:
  3401. ret
  3402. into
  3403. ret
  3404. }
  3405. begin
  3406. ConvertJumpToRET(p, hp1);
  3407. result:=true;
  3408. end;
  3409. A_MOV:
  3410. {
  3411. change
  3412. jmp .L1
  3413. ...
  3414. .L1:
  3415. mov ##, ##
  3416. ret
  3417. into
  3418. mov ##, ##
  3419. ret
  3420. }
  3421. { This optimisation tends to increase code size if the pass 1 MOV optimisations aren't
  3422. re-run, so only do this particular optimisation if optimising for speed or when
  3423. optimisations are very in-depth. [Kit] }
  3424. if (current_settings.optimizerswitches * [cs_opt_level3, cs_opt_size]) <> [cs_opt_size] then
  3425. begin
  3426. GetNextInstruction(hp1, hp2);
  3427. if not Assigned(hp2) then
  3428. Exit;
  3429. if (hp2.typ in [ait_label, ait_align]) then
  3430. SkipLabels(hp2,hp2);
  3431. if Assigned(hp2) and MatchInstruction(hp2, A_RET, [S_NO]) then
  3432. begin
  3433. { Duplicate the MOV instruction }
  3434. asml.InsertBefore(hp1.getcopy, p);
  3435. { Now change the jump into a RET instruction }
  3436. ConvertJumpToRET(p, hp2);
  3437. result:=true;
  3438. end;
  3439. end;
  3440. else
  3441. { Do nothing };
  3442. end;
  3443. end;
  3444. end;
  3445. end;
  3446. function CanBeCMOV(p : tai) : boolean;
  3447. begin
  3448. CanBeCMOV:=assigned(p) and
  3449. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  3450. { we can't use cmov ref,reg because
  3451. ref could be nil and cmov still throws an exception
  3452. if ref=nil but the mov isn't done (FK)
  3453. or ((taicpu(p).oper[0]^.typ = top_ref) and
  3454. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  3455. }
  3456. (MatchOpType(taicpu(p),top_reg,top_reg) or
  3457. { allow references, but only pure symbols or got rel. addressing with RIP as based,
  3458. it is not expected that this can cause a seg. violation }
  3459. (MatchOpType(taicpu(p),top_ref,top_reg) and
  3460. (((taicpu(p).oper[0]^.ref^.base=NR_NO) and (taicpu(p).oper[0]^.ref^.refaddr=addr_no)){$ifdef x86_64} or
  3461. ((taicpu(p).oper[0]^.ref^.base=NR_RIP) and (taicpu(p).oper[0]^.ref^.refaddr=addr_pic)){$endif x86_64}
  3462. ) and
  3463. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  3464. (taicpu(p).oper[0]^.ref^.offset=0)
  3465. )
  3466. );
  3467. end;
  3468. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  3469. var
  3470. hp1,hp2,hp3,hp4,hpmov2: tai;
  3471. carryadd_opcode : TAsmOp;
  3472. l : Longint;
  3473. condition : TAsmCond;
  3474. symbol: TAsmSymbol;
  3475. begin
  3476. result:=false;
  3477. symbol:=nil;
  3478. if GetNextInstruction(p,hp1) then
  3479. begin
  3480. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  3481. if (hp1.typ=ait_instruction) and
  3482. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  3483. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  3484. { jb @@1 cmc
  3485. inc/dec operand --> adc/sbb operand,0
  3486. @@1:
  3487. ... and ...
  3488. jnb @@1
  3489. inc/dec operand --> adc/sbb operand,0
  3490. @@1: }
  3491. begin
  3492. carryadd_opcode:=A_NONE;
  3493. if Taicpu(p).condition in [C_NAE,C_B] then
  3494. begin
  3495. if Taicpu(hp1).opcode=A_INC then
  3496. carryadd_opcode:=A_ADC;
  3497. if Taicpu(hp1).opcode=A_DEC then
  3498. carryadd_opcode:=A_SBB;
  3499. if carryadd_opcode<>A_NONE then
  3500. begin
  3501. Taicpu(p).clearop(0);
  3502. Taicpu(p).ops:=0;
  3503. Taicpu(p).is_jmp:=false;
  3504. Taicpu(p).opcode:=A_CMC;
  3505. Taicpu(p).condition:=C_NONE;
  3506. Taicpu(hp1).ops:=2;
  3507. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  3508. Taicpu(hp1).loadconst(0,0);
  3509. Taicpu(hp1).opcode:=carryadd_opcode;
  3510. result:=true;
  3511. exit;
  3512. end;
  3513. end;
  3514. if Taicpu(p).condition in [C_AE,C_NB] then
  3515. begin
  3516. if Taicpu(hp1).opcode=A_INC then
  3517. carryadd_opcode:=A_ADC;
  3518. if Taicpu(hp1).opcode=A_DEC then
  3519. carryadd_opcode:=A_SBB;
  3520. if carryadd_opcode<>A_NONE then
  3521. begin
  3522. asml.remove(p);
  3523. p.free;
  3524. Taicpu(hp1).ops:=2;
  3525. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  3526. Taicpu(hp1).loadconst(0,0);
  3527. Taicpu(hp1).opcode:=carryadd_opcode;
  3528. p:=hp1;
  3529. result:=true;
  3530. exit;
  3531. end;
  3532. end;
  3533. end;
  3534. { Detect the following:
  3535. jmp<cond> @Lbl1
  3536. jmp @Lbl2
  3537. ...
  3538. @Lbl1:
  3539. ret
  3540. Change to:
  3541. jmp<inv_cond> @Lbl2
  3542. ret
  3543. }
  3544. if MatchInstruction(hp1,A_JMP,[]) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  3545. begin
  3546. hp2:=getlabelwithsym(TAsmLabel(symbol));
  3547. if Assigned(hp2) and SkipLabels(hp2,hp2) and
  3548. MatchInstruction(hp2,A_RET,[S_NO]) then
  3549. begin
  3550. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  3551. { Change label address to that of the unconditional jump }
  3552. taicpu(p).loadoper(0, taicpu(hp1).oper[0]^);
  3553. TAsmLabel(symbol).DecRefs;
  3554. taicpu(hp1).opcode := A_RET;
  3555. taicpu(hp1).is_jmp := false;
  3556. taicpu(hp1).ops := taicpu(hp2).ops;
  3557. DebugMsg(SPeepholeOptimization+'JccJmpRet2J!ccRet',p);
  3558. case taicpu(hp2).ops of
  3559. 0:
  3560. taicpu(hp1).clearop(0);
  3561. 1:
  3562. taicpu(hp1).loadconst(0,taicpu(hp2).oper[0]^.val);
  3563. else
  3564. internalerror(2016041302);
  3565. end;
  3566. end;
  3567. end;
  3568. end;
  3569. {$ifndef i8086}
  3570. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  3571. begin
  3572. { check for
  3573. jCC xxx
  3574. <several movs>
  3575. xxx:
  3576. }
  3577. l:=0;
  3578. GetNextInstruction(p, hp1);
  3579. while assigned(hp1) and
  3580. CanBeCMOV(hp1) and
  3581. { stop on labels }
  3582. not(hp1.typ=ait_label) do
  3583. begin
  3584. inc(l);
  3585. GetNextInstruction(hp1,hp1);
  3586. end;
  3587. if assigned(hp1) then
  3588. begin
  3589. if FindLabel(tasmlabel(symbol),hp1) then
  3590. begin
  3591. if (l<=4) and (l>0) then
  3592. begin
  3593. condition:=inverse_cond(taicpu(p).condition);
  3594. GetNextInstruction(p,hp1);
  3595. repeat
  3596. if not Assigned(hp1) then
  3597. InternalError(2018062900);
  3598. taicpu(hp1).opcode:=A_CMOVcc;
  3599. taicpu(hp1).condition:=condition;
  3600. UpdateUsedRegs(hp1);
  3601. GetNextInstruction(hp1,hp1);
  3602. until not(CanBeCMOV(hp1));
  3603. { Remember what hp1 is in case there's multiple aligns to get rid of }
  3604. hp2 := hp1;
  3605. repeat
  3606. if not Assigned(hp2) then
  3607. InternalError(2018062910);
  3608. case hp2.typ of
  3609. ait_label:
  3610. { What we expected - break out of the loop (it won't be a dead label at the top of
  3611. a cluster because that was optimised at an earlier stage) }
  3612. Break;
  3613. ait_align:
  3614. { Go to the next entry until a label is found (may be multiple aligns before it) }
  3615. begin
  3616. hp2 := tai(hp2.Next);
  3617. Continue;
  3618. end;
  3619. else
  3620. begin
  3621. { Might be a comment or temporary allocation entry }
  3622. if not (hp2.typ in SkipInstr) then
  3623. InternalError(2018062911);
  3624. hp2 := tai(hp2.Next);
  3625. Continue;
  3626. end;
  3627. end;
  3628. until False;
  3629. { Now we can safely decrement the reference count }
  3630. tasmlabel(symbol).decrefs;
  3631. DebugMsg(SPeepholeOptimization+'JccMov2CMov',p);
  3632. { Remove the original jump }
  3633. asml.Remove(p);
  3634. p.Free;
  3635. GetNextInstruction(hp2, p); { Instruction after the label }
  3636. { Remove the label if this is its final reference }
  3637. if (tasmlabel(symbol).getrefs=0) then
  3638. StripLabelFast(hp1);
  3639. if Assigned(p) then
  3640. begin
  3641. UpdateUsedRegs(p);
  3642. result:=true;
  3643. end;
  3644. exit;
  3645. end;
  3646. end
  3647. else
  3648. begin
  3649. { check further for
  3650. jCC xxx
  3651. <several movs 1>
  3652. jmp yyy
  3653. xxx:
  3654. <several movs 2>
  3655. yyy:
  3656. }
  3657. { hp2 points to jmp yyy }
  3658. hp2:=hp1;
  3659. { skip hp1 to xxx (or an align right before it) }
  3660. GetNextInstruction(hp1, hp1);
  3661. if assigned(hp2) and
  3662. assigned(hp1) and
  3663. (l<=3) and
  3664. (hp2.typ=ait_instruction) and
  3665. (taicpu(hp2).is_jmp) and
  3666. (taicpu(hp2).condition=C_None) and
  3667. { real label and jump, no further references to the
  3668. label are allowed }
  3669. (tasmlabel(symbol).getrefs=1) and
  3670. FindLabel(tasmlabel(symbol),hp1) then
  3671. begin
  3672. l:=0;
  3673. { skip hp1 to <several moves 2> }
  3674. if (hp1.typ = ait_align) then
  3675. GetNextInstruction(hp1, hp1);
  3676. GetNextInstruction(hp1, hpmov2);
  3677. hp1 := hpmov2;
  3678. while assigned(hp1) and
  3679. CanBeCMOV(hp1) do
  3680. begin
  3681. inc(l);
  3682. GetNextInstruction(hp1, hp1);
  3683. end;
  3684. { hp1 points to yyy (or an align right before it) }
  3685. hp3 := hp1;
  3686. if assigned(hp1) and
  3687. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  3688. begin
  3689. condition:=inverse_cond(taicpu(p).condition);
  3690. GetNextInstruction(p,hp1);
  3691. repeat
  3692. taicpu(hp1).opcode:=A_CMOVcc;
  3693. taicpu(hp1).condition:=condition;
  3694. UpdateUsedRegs(hp1);
  3695. GetNextInstruction(hp1,hp1);
  3696. until not(assigned(hp1)) or
  3697. not(CanBeCMOV(hp1));
  3698. condition:=inverse_cond(condition);
  3699. hp1 := hpmov2;
  3700. { hp1 is now at <several movs 2> }
  3701. while Assigned(hp1) and CanBeCMOV(hp1) do
  3702. begin
  3703. taicpu(hp1).opcode:=A_CMOVcc;
  3704. taicpu(hp1).condition:=condition;
  3705. UpdateUsedRegs(hp1);
  3706. GetNextInstruction(hp1,hp1);
  3707. end;
  3708. hp1 := p;
  3709. { Get first instruction after label }
  3710. GetNextInstruction(hp3, p);
  3711. if assigned(p) and (hp3.typ = ait_align) then
  3712. GetNextInstruction(p, p);
  3713. { Don't dereference yet, as doing so will cause
  3714. GetNextInstruction to skip the label and
  3715. optional align marker. [Kit] }
  3716. GetNextInstruction(hp2, hp4);
  3717. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2CMovCMov',hp1);
  3718. { remove jCC }
  3719. asml.remove(hp1);
  3720. hp1.free;
  3721. { Now we can safely decrement it }
  3722. tasmlabel(symbol).decrefs;
  3723. { Remove label xxx (it will have a ref of zero due to the initial check }
  3724. StripLabelFast(hp4);
  3725. { remove jmp }
  3726. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  3727. asml.remove(hp2);
  3728. hp2.free;
  3729. { As before, now we can safely decrement it }
  3730. tasmlabel(symbol).decrefs;
  3731. { Remove label yyy (and the optional alignment) if its reference falls to zero }
  3732. if tasmlabel(symbol).getrefs = 0 then
  3733. StripLabelFast(hp3);
  3734. if Assigned(p) then
  3735. begin
  3736. UpdateUsedRegs(p);
  3737. result:=true;
  3738. end;
  3739. exit;
  3740. end;
  3741. end;
  3742. end;
  3743. end;
  3744. end;
  3745. {$endif i8086}
  3746. end;
  3747. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  3748. var
  3749. hp1,hp2: tai;
  3750. begin
  3751. result:=false;
  3752. if (taicpu(p).oper[1]^.typ = top_reg) and
  3753. GetNextInstruction(p,hp1) and
  3754. (hp1.typ = ait_instruction) and
  3755. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  3756. GetNextInstruction(hp1,hp2) and
  3757. MatchInstruction(hp2,A_MOV,[]) and
  3758. (taicpu(hp2).oper[0]^.typ = top_reg) and
  3759. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  3760. {$ifdef i386}
  3761. { not all registers have byte size sub registers on i386 }
  3762. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  3763. {$endif i386}
  3764. (((taicpu(hp1).ops=2) and
  3765. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  3766. ((taicpu(hp1).ops=1) and
  3767. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  3768. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  3769. begin
  3770. { change movsX/movzX reg/ref, reg2
  3771. add/sub/or/... reg3/$const, reg2
  3772. mov reg2 reg/ref
  3773. to add/sub/or/... reg3/$const, reg/ref }
  3774. { by example:
  3775. movswl %si,%eax movswl %si,%eax p
  3776. decl %eax addl %edx,%eax hp1
  3777. movw %ax,%si movw %ax,%si hp2
  3778. ->
  3779. movswl %si,%eax movswl %si,%eax p
  3780. decw %eax addw %edx,%eax hp1
  3781. movw %ax,%si movw %ax,%si hp2
  3782. }
  3783. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3784. {
  3785. ->
  3786. movswl %si,%eax movswl %si,%eax p
  3787. decw %si addw %dx,%si hp1
  3788. movw %ax,%si movw %ax,%si hp2
  3789. }
  3790. case taicpu(hp1).ops of
  3791. 1:
  3792. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  3793. 2:
  3794. begin
  3795. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  3796. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  3797. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3798. end;
  3799. else
  3800. internalerror(2008042701);
  3801. end;
  3802. {
  3803. ->
  3804. decw %si addw %dx,%si p
  3805. }
  3806. DebugMsg(SPeepholeOptimization + 'var3',p);
  3807. asml.remove(p);
  3808. asml.remove(hp2);
  3809. p.free;
  3810. hp2.free;
  3811. p:=hp1;
  3812. end
  3813. else if taicpu(p).opcode=A_MOVZX then
  3814. begin
  3815. { removes superfluous And's after movzx's }
  3816. if (taicpu(p).oper[1]^.typ = top_reg) and
  3817. GetNextInstruction(p, hp1) and
  3818. (tai(hp1).typ = ait_instruction) and
  3819. (taicpu(hp1).opcode = A_AND) and
  3820. (taicpu(hp1).oper[0]^.typ = top_const) and
  3821. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3822. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3823. begin
  3824. case taicpu(p).opsize Of
  3825. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  3826. if (taicpu(hp1).oper[0]^.val = $ff) then
  3827. begin
  3828. DebugMsg(SPeepholeOptimization + 'var4',p);
  3829. asml.remove(hp1);
  3830. hp1.free;
  3831. end;
  3832. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  3833. if (taicpu(hp1).oper[0]^.val = $ffff) then
  3834. begin
  3835. DebugMsg(SPeepholeOptimization + 'var5',p);
  3836. asml.remove(hp1);
  3837. hp1.free;
  3838. end;
  3839. {$ifdef x86_64}
  3840. S_LQ:
  3841. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  3842. begin
  3843. if (cs_asm_source in current_settings.globalswitches) then
  3844. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  3845. asml.remove(hp1);
  3846. hp1.Free;
  3847. end;
  3848. {$endif x86_64}
  3849. else
  3850. ;
  3851. end;
  3852. end;
  3853. { changes some movzx constructs to faster synonims (all examples
  3854. are given with eax/ax, but are also valid for other registers)}
  3855. if (taicpu(p).oper[1]^.typ = top_reg) then
  3856. if (taicpu(p).oper[0]^.typ = top_reg) then
  3857. case taicpu(p).opsize of
  3858. S_BW:
  3859. begin
  3860. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3861. not(cs_opt_size in current_settings.optimizerswitches) then
  3862. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  3863. begin
  3864. taicpu(p).opcode := A_AND;
  3865. taicpu(p).changeopsize(S_W);
  3866. taicpu(p).loadConst(0,$ff);
  3867. DebugMsg(SPeepholeOptimization + 'var7',p);
  3868. end
  3869. else if GetNextInstruction(p, hp1) and
  3870. (tai(hp1).typ = ait_instruction) and
  3871. (taicpu(hp1).opcode = A_AND) and
  3872. (taicpu(hp1).oper[0]^.typ = top_const) and
  3873. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3874. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3875. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  3876. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  3877. begin
  3878. DebugMsg(SPeepholeOptimization + 'var8',p);
  3879. taicpu(p).opcode := A_MOV;
  3880. taicpu(p).changeopsize(S_W);
  3881. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  3882. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3883. end;
  3884. end;
  3885. S_BL:
  3886. begin
  3887. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3888. not(cs_opt_size in current_settings.optimizerswitches) then
  3889. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  3890. begin
  3891. taicpu(p).opcode := A_AND;
  3892. taicpu(p).changeopsize(S_L);
  3893. taicpu(p).loadConst(0,$ff)
  3894. end
  3895. else if GetNextInstruction(p, hp1) and
  3896. (tai(hp1).typ = ait_instruction) and
  3897. (taicpu(hp1).opcode = A_AND) and
  3898. (taicpu(hp1).oper[0]^.typ = top_const) and
  3899. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3900. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3901. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  3902. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  3903. begin
  3904. DebugMsg(SPeepholeOptimization + 'var10',p);
  3905. taicpu(p).opcode := A_MOV;
  3906. taicpu(p).changeopsize(S_L);
  3907. { do not use R_SUBWHOLE
  3908. as movl %rdx,%eax
  3909. is invalid in assembler PM }
  3910. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3911. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3912. end
  3913. end;
  3914. {$ifndef i8086}
  3915. S_WL:
  3916. begin
  3917. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3918. not(cs_opt_size in current_settings.optimizerswitches) then
  3919. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  3920. begin
  3921. DebugMsg(SPeepholeOptimization + 'var11',p);
  3922. taicpu(p).opcode := A_AND;
  3923. taicpu(p).changeopsize(S_L);
  3924. taicpu(p).loadConst(0,$ffff);
  3925. end
  3926. else if GetNextInstruction(p, hp1) and
  3927. (tai(hp1).typ = ait_instruction) and
  3928. (taicpu(hp1).opcode = A_AND) and
  3929. (taicpu(hp1).oper[0]^.typ = top_const) and
  3930. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3931. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3932. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  3933. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  3934. begin
  3935. DebugMsg(SPeepholeOptimization + 'var12',p);
  3936. taicpu(p).opcode := A_MOV;
  3937. taicpu(p).changeopsize(S_L);
  3938. { do not use R_SUBWHOLE
  3939. as movl %rdx,%eax
  3940. is invalid in assembler PM }
  3941. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3942. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3943. end;
  3944. end;
  3945. {$endif i8086}
  3946. else
  3947. ;
  3948. end
  3949. else if (taicpu(p).oper[0]^.typ = top_ref) then
  3950. begin
  3951. if GetNextInstruction(p, hp1) and
  3952. (tai(hp1).typ = ait_instruction) and
  3953. (taicpu(hp1).opcode = A_AND) and
  3954. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3955. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3956. begin
  3957. //taicpu(p).opcode := A_MOV;
  3958. case taicpu(p).opsize Of
  3959. S_BL:
  3960. begin
  3961. DebugMsg(SPeepholeOptimization + 'var13',p);
  3962. taicpu(hp1).changeopsize(S_L);
  3963. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3964. end;
  3965. S_WL:
  3966. begin
  3967. DebugMsg(SPeepholeOptimization + 'var14',p);
  3968. taicpu(hp1).changeopsize(S_L);
  3969. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3970. end;
  3971. S_BW:
  3972. begin
  3973. DebugMsg(SPeepholeOptimization + 'var15',p);
  3974. taicpu(hp1).changeopsize(S_W);
  3975. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3976. end;
  3977. {$ifdef x86_64}
  3978. S_BQ:
  3979. begin
  3980. DebugMsg(SPeepholeOptimization + 'var16',p);
  3981. taicpu(hp1).changeopsize(S_Q);
  3982. taicpu(hp1).loadConst(
  3983. 0, taicpu(hp1).oper[0]^.val and $ff);
  3984. end;
  3985. S_WQ:
  3986. begin
  3987. DebugMsg(SPeepholeOptimization + 'var17',p);
  3988. taicpu(hp1).changeopsize(S_Q);
  3989. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  3990. end;
  3991. S_LQ:
  3992. begin
  3993. DebugMsg(SPeepholeOptimization + 'var18',p);
  3994. taicpu(hp1).changeopsize(S_Q);
  3995. taicpu(hp1).loadConst(
  3996. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  3997. end;
  3998. {$endif x86_64}
  3999. else
  4000. Internalerror(2017050704)
  4001. end;
  4002. end;
  4003. end;
  4004. end;
  4005. end;
  4006. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  4007. var
  4008. hp1 : tai;
  4009. MaskLength : Cardinal;
  4010. begin
  4011. Result:=false;
  4012. if GetNextInstruction(p, hp1) then
  4013. begin
  4014. if MatchOpType(taicpu(p),top_const,top_reg) and
  4015. MatchInstruction(hp1,A_AND,[]) and
  4016. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4017. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4018. { the second register must contain the first one, so compare their subreg types }
  4019. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  4020. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  4021. { change
  4022. and const1, reg
  4023. and const2, reg
  4024. to
  4025. and (const1 and const2), reg
  4026. }
  4027. begin
  4028. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  4029. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  4030. asml.remove(p);
  4031. p.Free;
  4032. p:=hp1;
  4033. Result:=true;
  4034. exit;
  4035. end
  4036. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4037. MatchInstruction(hp1,A_MOVZX,[]) and
  4038. (taicpu(hp1).oper[0]^.typ = top_reg) and
  4039. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  4040. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4041. (((taicpu(p).opsize=S_W) and
  4042. (taicpu(hp1).opsize=S_BW)) or
  4043. ((taicpu(p).opsize=S_L) and
  4044. (taicpu(hp1).opsize in [S_WL,S_BL]))
  4045. {$ifdef x86_64}
  4046. or
  4047. ((taicpu(p).opsize=S_Q) and
  4048. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  4049. {$endif x86_64}
  4050. ) then
  4051. begin
  4052. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  4053. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  4054. ) or
  4055. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  4056. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  4057. then
  4058. begin
  4059. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  4060. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  4061. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  4062. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  4063. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  4064. }
  4065. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  4066. asml.remove(hp1);
  4067. hp1.free;
  4068. Exit;
  4069. end;
  4070. end
  4071. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4072. MatchInstruction(hp1,A_SHL,[]) and
  4073. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4074. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  4075. begin
  4076. {$ifopt R+}
  4077. {$define RANGE_WAS_ON}
  4078. {$R-}
  4079. {$endif}
  4080. { get length of potential and mask }
  4081. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  4082. { really a mask? }
  4083. {$ifdef RANGE_WAS_ON}
  4084. {$R+}
  4085. {$endif}
  4086. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  4087. { unmasked part shifted out? }
  4088. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  4089. begin
  4090. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  4091. { take care of the register (de)allocs following p }
  4092. UpdateUsedRegs(tai(p.next));
  4093. asml.remove(p);
  4094. p.free;
  4095. p:=hp1;
  4096. Result:=true;
  4097. exit;
  4098. end;
  4099. end
  4100. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4101. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  4102. (taicpu(hp1).oper[0]^.typ = top_reg) and
  4103. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  4104. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4105. (((taicpu(p).opsize=S_W) and
  4106. (taicpu(hp1).opsize=S_BW)) or
  4107. ((taicpu(p).opsize=S_L) and
  4108. (taicpu(hp1).opsize in [S_WL,S_BL]))
  4109. {$ifdef x86_64}
  4110. or
  4111. ((taicpu(p).opsize=S_Q) and
  4112. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  4113. {$endif x86_64}
  4114. ) then
  4115. begin
  4116. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  4117. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  4118. ) or
  4119. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  4120. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  4121. {$ifdef x86_64}
  4122. or
  4123. (((taicpu(hp1).opsize)=S_LQ) and
  4124. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  4125. )
  4126. {$endif x86_64}
  4127. then
  4128. begin
  4129. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  4130. asml.remove(hp1);
  4131. hp1.free;
  4132. Exit;
  4133. end;
  4134. end
  4135. else if (taicpu(p).oper[1]^.typ = top_reg) and
  4136. (hp1.typ = ait_instruction) and
  4137. (taicpu(hp1).is_jmp) and
  4138. (taicpu(hp1).opcode<>A_JMP) and
  4139. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  4140. begin
  4141. { change
  4142. and x, reg
  4143. jxx
  4144. to
  4145. test x, reg
  4146. jxx
  4147. if reg is deallocated before the
  4148. jump, but only if it's a conditional jump (PFV)
  4149. }
  4150. taicpu(p).opcode := A_TEST;
  4151. Exit;
  4152. end;
  4153. end;
  4154. { Lone AND tests }
  4155. if MatchOpType(taicpu(p),top_const,top_reg) then
  4156. begin
  4157. {
  4158. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  4159. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  4160. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  4161. }
  4162. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  4163. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  4164. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  4165. begin
  4166. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg)
  4167. end;
  4168. end;
  4169. end;
  4170. function TX86AsmOptimizer.OptPass2Lea(var p : tai) : Boolean;
  4171. begin
  4172. Result:=false;
  4173. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  4174. MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  4175. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  4176. begin
  4177. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  4178. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  4179. taicpu(p).opcode:=A_ADD;
  4180. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  4181. result:=true;
  4182. end
  4183. else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  4184. MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  4185. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  4186. begin
  4187. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  4188. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  4189. taicpu(p).opcode:=A_ADD;
  4190. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  4191. result:=true;
  4192. end;
  4193. end;
  4194. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  4195. function SkipSimpleInstructions(var hp1 : tai) : Boolean;
  4196. begin
  4197. { we can skip all instructions not messing with the stack pointer }
  4198. while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
  4199. A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
  4200. A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
  4201. A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
  4202. ({(taicpu(hp1).ops=0) or }
  4203. ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
  4204. (MatchOpType(taicpu(hp1),top_ref,top_reg))
  4205. ) and }
  4206. not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
  4207. )
  4208. ) do
  4209. GetNextInstruction(hp1,hp1);
  4210. Result:=assigned(hp1);
  4211. end;
  4212. var
  4213. hp1, hp2, hp3: tai;
  4214. begin
  4215. Result:=false;
  4216. { replace
  4217. leal(q) x(<stackpointer>),<stackpointer>
  4218. call procname
  4219. leal(q) -x(<stackpointer>),<stackpointer>
  4220. ret
  4221. by
  4222. jmp procname
  4223. but do it only on level 4 because it destroys stack back traces
  4224. }
  4225. if (cs_opt_level4 in current_settings.optimizerswitches) and
  4226. MatchOpType(taicpu(p),top_ref,top_reg) and
  4227. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  4228. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  4229. { the -8 or -24 are not required, but bail out early if possible,
  4230. higher values are unlikely }
  4231. ((taicpu(p).oper[0]^.ref^.offset=-8) or
  4232. (taicpu(p).oper[0]^.ref^.offset=-24)) and
  4233. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  4234. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  4235. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  4236. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  4237. GetNextInstruction(p, hp1) and
  4238. { trick to skip label }
  4239. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  4240. SkipSimpleInstructions(hp1) and
  4241. MatchInstruction(hp1,A_CALL,[S_NO]) and
  4242. GetNextInstruction(hp1, hp2) and
  4243. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  4244. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  4245. (taicpu(hp2).oper[0]^.ref^.offset=-taicpu(p).oper[0]^.ref^.offset) and
  4246. (taicpu(hp2).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  4247. (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
  4248. (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
  4249. (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
  4250. (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
  4251. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  4252. GetNextInstruction(hp2, hp3) and
  4253. { trick to skip label }
  4254. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  4255. MatchInstruction(hp3,A_RET,[S_NO]) and
  4256. (taicpu(hp3).ops=0) then
  4257. begin
  4258. taicpu(hp1).opcode := A_JMP;
  4259. taicpu(hp1).is_jmp := true;
  4260. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  4261. RemoveCurrentP(p);
  4262. AsmL.Remove(hp2);
  4263. hp2.free;
  4264. AsmL.Remove(hp3);
  4265. hp3.free;
  4266. Result:=true;
  4267. end;
  4268. end;
  4269. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  4270. var
  4271. Value, RegName: string;
  4272. begin
  4273. Result:=false;
  4274. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  4275. begin
  4276. case taicpu(p).oper[0]^.val of
  4277. 0:
  4278. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  4279. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  4280. begin
  4281. { change "mov $0,%reg" into "xor %reg,%reg" }
  4282. taicpu(p).opcode := A_XOR;
  4283. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  4284. Result := True;
  4285. end;
  4286. $1..$FFFFFFFF:
  4287. begin
  4288. { Code size reduction by J. Gareth "Kit" Moreton }
  4289. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  4290. case taicpu(p).opsize of
  4291. S_Q:
  4292. begin
  4293. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  4294. Value := debug_tostr(taicpu(p).oper[0]^.val);
  4295. { The actual optimization }
  4296. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4297. taicpu(p).changeopsize(S_L);
  4298. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  4299. Result := True;
  4300. end;
  4301. else
  4302. { Do nothing };
  4303. end;
  4304. end;
  4305. -1:
  4306. { Don't make this optimisation if the CPU flags are required, since OR scrambles them }
  4307. if (cs_opt_size in current_settings.optimizerswitches) and
  4308. (taicpu(p).opsize <> S_B) and
  4309. not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  4310. begin
  4311. { change "mov $-1,%reg" into "or $-1,%reg" }
  4312. { NOTES:
  4313. - No size saving is made when changing a Word-sized assignment unless the register is AX (smaller encoding)
  4314. - This operation creates a false dependency on the register, so only do it when optimising for size
  4315. - It is possible to set memory operands using this method, but this creates an even greater false dependency, so don't do this at all
  4316. }
  4317. taicpu(p).opcode := A_OR;
  4318. Result := True;
  4319. end;
  4320. end;
  4321. end;
  4322. end;
  4323. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  4324. begin
  4325. Result:=false;
  4326. { change "cmp $0, %reg" to "test %reg, %reg" }
  4327. if MatchOpType(taicpu(p),top_const,top_reg) and
  4328. (taicpu(p).oper[0]^.val = 0) then
  4329. begin
  4330. taicpu(p).opcode := A_TEST;
  4331. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  4332. Result:=true;
  4333. end;
  4334. end;
  4335. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  4336. var
  4337. IsTestConstX : Boolean;
  4338. hp1,hp2 : tai;
  4339. begin
  4340. Result:=false;
  4341. { removes the line marked with (x) from the sequence
  4342. and/or/xor/add/sub/... $x, %y
  4343. test/or %y, %y | test $-1, %y (x)
  4344. j(n)z _Label
  4345. as the first instruction already adjusts the ZF
  4346. %y operand may also be a reference }
  4347. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  4348. MatchOperand(taicpu(p).oper[0]^,-1);
  4349. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  4350. GetLastInstruction(p, hp1) and
  4351. (tai(hp1).typ = ait_instruction) and
  4352. GetNextInstruction(p,hp2) and
  4353. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  4354. case taicpu(hp1).opcode Of
  4355. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  4356. begin
  4357. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  4358. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4359. { and in case of carry for A(E)/B(E)/C/NC }
  4360. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  4361. ((taicpu(hp1).opcode <> A_ADD) and
  4362. (taicpu(hp1).opcode <> A_SUB))) then
  4363. begin
  4364. hp1 := tai(p.next);
  4365. asml.remove(p);
  4366. p.free;
  4367. p := tai(hp1);
  4368. Result:=true;
  4369. end;
  4370. end;
  4371. A_SHL, A_SAL, A_SHR, A_SAR:
  4372. begin
  4373. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  4374. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  4375. { therefore, it's only safe to do this optimization for }
  4376. { shifts by a (nonzero) constant }
  4377. (taicpu(hp1).oper[0]^.typ = top_const) and
  4378. (taicpu(hp1).oper[0]^.val <> 0) and
  4379. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4380. { and in case of carry for A(E)/B(E)/C/NC }
  4381. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  4382. begin
  4383. hp1 := tai(p.next);
  4384. asml.remove(p);
  4385. p.free;
  4386. p := tai(hp1);
  4387. Result:=true;
  4388. end;
  4389. end;
  4390. A_DEC, A_INC, A_NEG:
  4391. begin
  4392. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  4393. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4394. { and in case of carry for A(E)/B(E)/C/NC }
  4395. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  4396. begin
  4397. case taicpu(hp1).opcode of
  4398. A_DEC, A_INC:
  4399. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  4400. begin
  4401. case taicpu(hp1).opcode Of
  4402. A_DEC: taicpu(hp1).opcode := A_SUB;
  4403. A_INC: taicpu(hp1).opcode := A_ADD;
  4404. else
  4405. ;
  4406. end;
  4407. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  4408. taicpu(hp1).loadConst(0,1);
  4409. taicpu(hp1).ops:=2;
  4410. end;
  4411. else
  4412. ;
  4413. end;
  4414. hp1 := tai(p.next);
  4415. asml.remove(p);
  4416. p.free;
  4417. p := tai(hp1);
  4418. Result:=true;
  4419. end;
  4420. end
  4421. else
  4422. { change "test $-1,%reg" into "test %reg,%reg" }
  4423. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  4424. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  4425. end { case }
  4426. { change "test $-1,%reg" into "test %reg,%reg" }
  4427. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  4428. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  4429. end;
  4430. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  4431. var
  4432. hp1 : tai;
  4433. {$ifndef x86_64}
  4434. hp2 : taicpu;
  4435. {$endif x86_64}
  4436. begin
  4437. Result:=false;
  4438. {$ifndef x86_64}
  4439. { don't do this on modern CPUs, this really hurts them due to
  4440. broken call/ret pairing }
  4441. if (current_settings.optimizecputype < cpu_Pentium2) and
  4442. not(cs_create_pic in current_settings.moduleswitches) and
  4443. GetNextInstruction(p, hp1) and
  4444. MatchInstruction(hp1,A_JMP,[S_NO]) and
  4445. MatchOpType(taicpu(hp1),top_ref) and
  4446. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  4447. begin
  4448. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  4449. InsertLLItem(p.previous, p, hp2);
  4450. taicpu(p).opcode := A_JMP;
  4451. taicpu(p).is_jmp := true;
  4452. asml.remove(hp1);
  4453. hp1.free;
  4454. Result:=true;
  4455. end
  4456. else
  4457. {$endif x86_64}
  4458. { replace
  4459. call procname
  4460. ret
  4461. by
  4462. jmp procname
  4463. but do it only on level 4 because it destroys stack back traces
  4464. }
  4465. if (cs_opt_level4 in current_settings.optimizerswitches) and
  4466. GetNextInstruction(p, hp1) and
  4467. MatchInstruction(hp1,A_RET,[S_NO]) and
  4468. (taicpu(hp1).ops=0) then
  4469. begin
  4470. taicpu(p).opcode := A_JMP;
  4471. taicpu(p).is_jmp := true;
  4472. DebugMsg(SPeepholeOptimization + 'CallRet2Jmp done',p);
  4473. asml.remove(hp1);
  4474. hp1.free;
  4475. Result:=true;
  4476. end;
  4477. end;
  4478. {$ifdef x86_64}
  4479. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  4480. var
  4481. PreMessage: string;
  4482. begin
  4483. Result := False;
  4484. { Code size reduction by J. Gareth "Kit" Moreton }
  4485. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  4486. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  4487. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  4488. then
  4489. begin
  4490. { Has 64-bit register name and opcode suffix }
  4491. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  4492. { The actual optimization }
  4493. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4494. if taicpu(p).opsize = S_BQ then
  4495. taicpu(p).changeopsize(S_BL)
  4496. else
  4497. taicpu(p).changeopsize(S_WL);
  4498. DebugMsg(SPeepholeOptimization + PreMessage +
  4499. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  4500. end;
  4501. end;
  4502. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  4503. var
  4504. PreMessage, RegName: string;
  4505. begin
  4506. { Code size reduction by J. Gareth "Kit" Moreton }
  4507. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  4508. as this removes the REX prefix }
  4509. Result := False;
  4510. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  4511. Exit;
  4512. if taicpu(p).oper[0]^.typ <> top_reg then
  4513. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  4514. InternalError(2018011500);
  4515. case taicpu(p).opsize of
  4516. S_Q:
  4517. begin
  4518. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  4519. begin
  4520. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  4521. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  4522. { The actual optimization }
  4523. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  4524. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4525. taicpu(p).changeopsize(S_L);
  4526. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  4527. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  4528. end;
  4529. end;
  4530. else
  4531. ;
  4532. end;
  4533. end;
  4534. {$endif}
  4535. procedure TX86AsmOptimizer.OptReferences;
  4536. var
  4537. p: tai;
  4538. i: Integer;
  4539. begin
  4540. p := BlockStart;
  4541. while (p <> BlockEnd) Do
  4542. begin
  4543. if p.typ=ait_instruction then
  4544. begin
  4545. for i:=0 to taicpu(p).ops-1 do
  4546. if taicpu(p).oper[i]^.typ=top_ref then
  4547. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  4548. end;
  4549. p:=tai(p.next);
  4550. end;
  4551. end;
  4552. end.