aoptx86.pas 256 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TOptsToCheck = (
  29. aoc_MovAnd2Mov_3
  30. );
  31. TX86AsmOptimizer = class(TAsmOptimizer)
  32. { some optimizations are very expensive to check, so the
  33. pre opt pass can be used to set some flags, depending on the found
  34. instructions if it is worth to check a certain optimization }
  35. OptsToCheck : set of TOptsToCheck;
  36. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  37. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  38. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  39. function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  40. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  41. {
  42. In comparison with GetNextInstructionUsingReg, GetNextInstructionUsingRegTrackingUse tracks
  43. the use of a register by allocs/dealloc, so it can ignore calls.
  44. In the following example, GetNextInstructionUsingReg will return the second movq,
  45. GetNextInstructionUsingRegTrackingUse won't.
  46. movq %rdi,%rax
  47. # Register rdi released
  48. # Register rdi allocated
  49. movq %rax,%rdi
  50. While in this example:
  51. movq %rdi,%rax
  52. call proc
  53. movq %rdi,%rax
  54. GetNextInstructionUsingRegTrackingUse will return the second instruction while GetNextInstructionUsingReg
  55. won't.
  56. }
  57. function GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
  58. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  59. protected
  60. class function IsMOVZXAcceptable: Boolean; static; inline;
  61. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  62. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  63. { checks whether reading the value in reg1 depends on the value of reg2. This
  64. is very similar to SuperRegisterEquals, except it takes into account that
  65. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  66. depend on the value in AH). }
  67. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  68. { Replaces all references to AOldReg in a memory reference to ANewReg }
  69. class function ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean; static;
  70. { Replaces all references to AOldReg in an operand to ANewReg }
  71. class function ReplaceRegisterInOper(const p: taicpu; const OperIdx: Integer; const AOldReg, ANewReg: TRegister): Boolean; static;
  72. { Replaces all references to AOldReg in an instruction to ANewReg,
  73. except where the register is being written }
  74. function ReplaceRegisterInInstruction(const p: taicpu; const AOldReg, ANewReg: TRegister): Boolean;
  75. { Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents),
  76. or writes to a global symbol }
  77. class function IsRefSafe(const ref: PReference): Boolean; static; inline;
  78. { Returns true if the given MOV instruction can be safely converted to CMOV }
  79. class function CanBeCMOV(p : tai) : boolean; static;
  80. function DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
  81. procedure DebugMsg(const s : string; p : tai);inline;
  82. class function IsExitCode(p : tai) : boolean; static;
  83. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean; static;
  84. procedure RemoveLastDeallocForFuncRes(p : tai);
  85. function DoSubAddOpt(var p : tai) : Boolean;
  86. function PrePeepholeOptSxx(var p : tai) : boolean;
  87. function PrePeepholeOptIMUL(var p : tai) : boolean;
  88. function OptPass1AND(var p : tai) : boolean;
  89. function OptPass1_V_MOVAP(var p : tai) : boolean;
  90. function OptPass1VOP(var p : tai) : boolean;
  91. function OptPass1MOV(var p : tai) : boolean;
  92. function OptPass1Movx(var p : tai) : boolean;
  93. function OptPass1MOVXX(var p : tai) : boolean;
  94. function OptPass1OP(var p : tai) : boolean;
  95. function OptPass1LEA(var p : tai) : boolean;
  96. function OptPass1Sub(var p : tai) : boolean;
  97. function OptPass1SHLSAL(var p : tai) : boolean;
  98. function OptPass1SETcc(var p : tai) : boolean;
  99. function OptPass1FSTP(var p : tai) : boolean;
  100. function OptPass1FLD(var p : tai) : boolean;
  101. function OptPass1Cmp(var p : tai) : boolean;
  102. function OptPass2MOV(var p : tai) : boolean;
  103. function OptPass2Imul(var p : tai) : boolean;
  104. function OptPass2Jmp(var p : tai) : boolean;
  105. function OptPass2Jcc(var p : tai) : boolean;
  106. function OptPass2Lea(var p: tai): Boolean;
  107. function OptPass2SUB(var p: tai): Boolean;
  108. function PostPeepholeOptMov(var p : tai) : Boolean;
  109. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  110. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  111. function PostPeepholeOptXor(var p : tai) : Boolean;
  112. {$endif}
  113. function PostPeepholeOptMOVSX(var p : tai) : boolean;
  114. function PostPeepholeOptCmp(var p : tai) : Boolean;
  115. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  116. function PostPeepholeOptCall(var p : tai) : Boolean;
  117. function PostPeepholeOptLea(var p : tai) : Boolean;
  118. procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
  119. { Processor-dependent reference optimisation }
  120. class procedure OptimizeRefs(var p: taicpu); static;
  121. end;
  122. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  123. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  124. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  125. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  126. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  127. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  128. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  129. function RefsEqual(const r1, r2: treference): boolean;
  130. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  131. { returns true, if ref is a reference using only the registers passed as base and index
  132. and having an offset }
  133. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  134. implementation
  135. uses
  136. cutils,verbose,
  137. globals,
  138. cpuinfo,
  139. procinfo,
  140. aasmbase,
  141. aoptbase,aoptutils,
  142. symconst,symsym,
  143. cgx86,
  144. itcpugas;
  145. {$ifdef DEBUG_AOPTCPU}
  146. const
  147. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  148. {$else DEBUG_AOPTCPU}
  149. { Empty strings help the optimizer to remove string concatenations that won't
  150. ever appear to the user on release builds. [Kit] }
  151. const
  152. SPeepholeOptimization = '';
  153. {$endif DEBUG_AOPTCPU}
  154. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  155. begin
  156. result :=
  157. (instr.typ = ait_instruction) and
  158. (taicpu(instr).opcode = op) and
  159. ((opsize = []) or (taicpu(instr).opsize in opsize));
  160. end;
  161. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  162. begin
  163. result :=
  164. (instr.typ = ait_instruction) and
  165. ((taicpu(instr).opcode = op1) or
  166. (taicpu(instr).opcode = op2)
  167. ) and
  168. ((opsize = []) or (taicpu(instr).opsize in opsize));
  169. end;
  170. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  171. begin
  172. result :=
  173. (instr.typ = ait_instruction) and
  174. ((taicpu(instr).opcode = op1) or
  175. (taicpu(instr).opcode = op2) or
  176. (taicpu(instr).opcode = op3)
  177. ) and
  178. ((opsize = []) or (taicpu(instr).opsize in opsize));
  179. end;
  180. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  181. const opsize : topsizes) : boolean;
  182. var
  183. op : TAsmOp;
  184. begin
  185. result:=false;
  186. for op in ops do
  187. begin
  188. if (instr.typ = ait_instruction) and
  189. (taicpu(instr).opcode = op) and
  190. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  191. begin
  192. result:=true;
  193. exit;
  194. end;
  195. end;
  196. end;
  197. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  198. begin
  199. result := (oper.typ = top_reg) and (oper.reg = reg);
  200. end;
  201. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  202. begin
  203. result := (oper.typ = top_const) and (oper.val = a);
  204. end;
  205. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  206. begin
  207. result := oper1.typ = oper2.typ;
  208. if result then
  209. case oper1.typ of
  210. top_const:
  211. Result:=oper1.val = oper2.val;
  212. top_reg:
  213. Result:=oper1.reg = oper2.reg;
  214. top_ref:
  215. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  216. else
  217. internalerror(2013102801);
  218. end
  219. end;
  220. function RefsEqual(const r1, r2: treference): boolean;
  221. begin
  222. RefsEqual :=
  223. (r1.offset = r2.offset) and
  224. (r1.segment = r2.segment) and (r1.base = r2.base) and
  225. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  226. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  227. (r1.relsymbol = r2.relsymbol) and
  228. (r1.volatility=[]) and
  229. (r2.volatility=[]);
  230. end;
  231. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  232. begin
  233. Result:=(ref.offset=0) and
  234. (ref.scalefactor in [0,1]) and
  235. (ref.segment=NR_NO) and
  236. (ref.symbol=nil) and
  237. (ref.relsymbol=nil) and
  238. ((base=NR_INVALID) or
  239. (ref.base=base)) and
  240. ((index=NR_INVALID) or
  241. (ref.index=index)) and
  242. (ref.volatility=[]);
  243. end;
  244. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  245. begin
  246. Result:=(ref.scalefactor in [0,1]) and
  247. (ref.segment=NR_NO) and
  248. (ref.symbol=nil) and
  249. (ref.relsymbol=nil) and
  250. ((base=NR_INVALID) or
  251. (ref.base=base)) and
  252. ((index=NR_INVALID) or
  253. (ref.index=index)) and
  254. (ref.volatility=[]);
  255. end;
  256. function InstrReadsFlags(p: tai): boolean;
  257. begin
  258. InstrReadsFlags := true;
  259. case p.typ of
  260. ait_instruction:
  261. if InsProp[taicpu(p).opcode].Ch*
  262. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  263. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  264. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  265. exit;
  266. ait_label:
  267. exit;
  268. else
  269. ;
  270. end;
  271. InstrReadsFlags := false;
  272. end;
  273. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  274. begin
  275. Next:=Current;
  276. repeat
  277. Result:=GetNextInstruction(Next,Next);
  278. until not (Result) or
  279. not(cs_opt_level3 in current_settings.optimizerswitches) or
  280. (Next.typ<>ait_instruction) or
  281. RegInInstruction(reg,Next) or
  282. is_calljmp(taicpu(Next).opcode);
  283. end;
  284. function TX86AsmOptimizer.GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
  285. begin
  286. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  287. begin
  288. Result:=GetNextInstruction(Current,Next);
  289. exit;
  290. end;
  291. Next:=tai(Current.Next);
  292. Result:=false;
  293. while assigned(Next) do
  294. begin
  295. if ((Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) and not(taicpu(Next).opcode=A_CALL)) or
  296. ((Next.typ=ait_regalloc) and (getsupreg(tai_regalloc(Next).reg)=getsupreg(reg))) or
  297. ((Next.typ=ait_label) and not(labelCanBeSkipped(Tai_Label(Next)))) then
  298. exit
  299. else if (Next.typ=ait_instruction) and RegInInstruction(reg,Next) and not(taicpu(Next).opcode=A_CALL) then
  300. begin
  301. Result:=true;
  302. exit;
  303. end;
  304. Next:=tai(Next.Next);
  305. end;
  306. end;
  307. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  308. begin
  309. Result:=RegReadByInstruction(reg,hp);
  310. end;
  311. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  312. var
  313. p: taicpu;
  314. opcount: longint;
  315. begin
  316. RegReadByInstruction := false;
  317. if hp.typ <> ait_instruction then
  318. exit;
  319. p := taicpu(hp);
  320. case p.opcode of
  321. A_CALL:
  322. regreadbyinstruction := true;
  323. A_IMUL:
  324. case p.ops of
  325. 1:
  326. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  327. (
  328. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  329. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  330. );
  331. 2,3:
  332. regReadByInstruction :=
  333. reginop(reg,p.oper[0]^) or
  334. reginop(reg,p.oper[1]^);
  335. else
  336. InternalError(2019112801);
  337. end;
  338. A_MUL:
  339. begin
  340. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  341. (
  342. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  343. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  344. );
  345. end;
  346. A_IDIV,A_DIV:
  347. begin
  348. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  349. (
  350. (getregtype(reg)=R_INTREGISTER) and
  351. (
  352. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  353. )
  354. );
  355. end;
  356. else
  357. begin
  358. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  359. begin
  360. RegReadByInstruction := false;
  361. exit;
  362. end;
  363. for opcount := 0 to p.ops-1 do
  364. if (p.oper[opCount]^.typ = top_ref) and
  365. RegInRef(reg,p.oper[opcount]^.ref^) then
  366. begin
  367. RegReadByInstruction := true;
  368. exit
  369. end;
  370. { special handling for SSE MOVSD }
  371. if (p.opcode=A_MOVSD) and (p.ops>0) then
  372. begin
  373. if p.ops<>2 then
  374. internalerror(2017042702);
  375. regReadByInstruction := reginop(reg,p.oper[0]^) or
  376. (
  377. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  378. );
  379. exit;
  380. end;
  381. with insprop[p.opcode] do
  382. begin
  383. if getregtype(reg)=R_INTREGISTER then
  384. begin
  385. case getsupreg(reg) of
  386. RS_EAX:
  387. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  388. begin
  389. RegReadByInstruction := true;
  390. exit
  391. end;
  392. RS_ECX:
  393. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  394. begin
  395. RegReadByInstruction := true;
  396. exit
  397. end;
  398. RS_EDX:
  399. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  400. begin
  401. RegReadByInstruction := true;
  402. exit
  403. end;
  404. RS_EBX:
  405. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  406. begin
  407. RegReadByInstruction := true;
  408. exit
  409. end;
  410. RS_ESP:
  411. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  412. begin
  413. RegReadByInstruction := true;
  414. exit
  415. end;
  416. RS_EBP:
  417. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  418. begin
  419. RegReadByInstruction := true;
  420. exit
  421. end;
  422. RS_ESI:
  423. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  424. begin
  425. RegReadByInstruction := true;
  426. exit
  427. end;
  428. RS_EDI:
  429. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  430. begin
  431. RegReadByInstruction := true;
  432. exit
  433. end;
  434. end;
  435. end;
  436. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  437. begin
  438. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  439. begin
  440. case p.condition of
  441. C_A,C_NBE, { CF=0 and ZF=0 }
  442. C_BE,C_NA: { CF=1 or ZF=1 }
  443. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  444. C_AE,C_NB,C_NC, { CF=0 }
  445. C_B,C_NAE,C_C: { CF=1 }
  446. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  447. C_NE,C_NZ, { ZF=0 }
  448. C_E,C_Z: { ZF=1 }
  449. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  450. C_G,C_NLE, { ZF=0 and SF=OF }
  451. C_LE,C_NG: { ZF=1 or SF<>OF }
  452. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  453. C_GE,C_NL, { SF=OF }
  454. C_L,C_NGE: { SF<>OF }
  455. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  456. C_NO, { OF=0 }
  457. C_O: { OF=1 }
  458. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  459. C_NP,C_PO, { PF=0 }
  460. C_P,C_PE: { PF=1 }
  461. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  462. C_NS, { SF=0 }
  463. C_S: { SF=1 }
  464. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  465. else
  466. internalerror(2017042701);
  467. end;
  468. if RegReadByInstruction then
  469. exit;
  470. end;
  471. case getsubreg(reg) of
  472. R_SUBW,R_SUBD,R_SUBQ:
  473. RegReadByInstruction :=
  474. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  475. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  476. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  477. R_SUBFLAGCARRY:
  478. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  479. R_SUBFLAGPARITY:
  480. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  481. R_SUBFLAGAUXILIARY:
  482. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  483. R_SUBFLAGZERO:
  484. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  485. R_SUBFLAGSIGN:
  486. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  487. R_SUBFLAGOVERFLOW:
  488. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  489. R_SUBFLAGINTERRUPT:
  490. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  491. R_SUBFLAGDIRECTION:
  492. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  493. else
  494. internalerror(2017042601);
  495. end;
  496. exit;
  497. end;
  498. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  499. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  500. (p.oper[0]^.reg=p.oper[1]^.reg) then
  501. exit;
  502. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  503. begin
  504. RegReadByInstruction := true;
  505. exit
  506. end;
  507. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  508. begin
  509. RegReadByInstruction := true;
  510. exit
  511. end;
  512. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  513. begin
  514. RegReadByInstruction := true;
  515. exit
  516. end;
  517. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  518. begin
  519. RegReadByInstruction := true;
  520. exit
  521. end;
  522. end;
  523. end;
  524. end;
  525. end;
  526. function TX86AsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  527. begin
  528. result:=false;
  529. if p1.typ<>ait_instruction then
  530. exit;
  531. if (Ch_All in insprop[taicpu(p1).opcode].Ch) then
  532. exit(true);
  533. if (getregtype(reg)=R_INTREGISTER) and
  534. { change information for xmm movsd are not correct }
  535. ((taicpu(p1).opcode<>A_MOVSD) or (taicpu(p1).ops=0)) then
  536. begin
  537. case getsupreg(reg) of
  538. { RS_EAX = RS_RAX on x86-64 }
  539. RS_EAX:
  540. result:=([Ch_REAX,Ch_RRAX,Ch_WEAX,Ch_WRAX,Ch_RWEAX,Ch_RWRAX,Ch_MEAX,Ch_MRAX]*insprop[taicpu(p1).opcode].Ch)<>[];
  541. RS_ECX:
  542. result:=([Ch_RECX,Ch_RRCX,Ch_WECX,Ch_WRCX,Ch_RWECX,Ch_RWRCX,Ch_MECX,Ch_MRCX]*insprop[taicpu(p1).opcode].Ch)<>[];
  543. RS_EDX:
  544. result:=([Ch_REDX,Ch_RRDX,Ch_WEDX,Ch_WRDX,Ch_RWEDX,Ch_RWRDX,Ch_MEDX,Ch_MRDX]*insprop[taicpu(p1).opcode].Ch)<>[];
  545. RS_EBX:
  546. result:=([Ch_REBX,Ch_RRBX,Ch_WEBX,Ch_WRBX,Ch_RWEBX,Ch_RWRBX,Ch_MEBX,Ch_MRBX]*insprop[taicpu(p1).opcode].Ch)<>[];
  547. RS_ESP:
  548. result:=([Ch_RESP,Ch_RRSP,Ch_WESP,Ch_WRSP,Ch_RWESP,Ch_RWRSP,Ch_MESP,Ch_MRSP]*insprop[taicpu(p1).opcode].Ch)<>[];
  549. RS_EBP:
  550. result:=([Ch_REBP,Ch_RRBP,Ch_WEBP,Ch_WRBP,Ch_RWEBP,Ch_RWRBP,Ch_MEBP,Ch_MRBP]*insprop[taicpu(p1).opcode].Ch)<>[];
  551. RS_ESI:
  552. result:=([Ch_RESI,Ch_RRSI,Ch_WESI,Ch_WRSI,Ch_RWESI,Ch_RWRSI,Ch_MESI,Ch_MRSI,Ch_RMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  553. RS_EDI:
  554. result:=([Ch_REDI,Ch_RRDI,Ch_WEDI,Ch_WRDI,Ch_RWEDI,Ch_RWRDI,Ch_MEDI,Ch_MRDI,Ch_WMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  555. else
  556. ;
  557. end;
  558. if result then
  559. exit;
  560. end
  561. else if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  562. begin
  563. if ([Ch_RFlags,Ch_WFlags,Ch_RWFlags,Ch_RFLAGScc]*insprop[taicpu(p1).opcode].Ch)<>[] then
  564. exit(true);
  565. case getsubreg(reg) of
  566. R_SUBFLAGCARRY:
  567. Result:=([Ch_RCarryFlag,Ch_RWCarryFlag,Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  568. R_SUBFLAGPARITY:
  569. Result:=([Ch_RParityFlag,Ch_RWParityFlag,Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  570. R_SUBFLAGAUXILIARY:
  571. Result:=([Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  572. R_SUBFLAGZERO:
  573. Result:=([Ch_RZeroFlag,Ch_RWZeroFlag,Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  574. R_SUBFLAGSIGN:
  575. Result:=([Ch_RSignFlag,Ch_RWSignFlag,Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  576. R_SUBFLAGOVERFLOW:
  577. Result:=([Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  578. R_SUBFLAGINTERRUPT:
  579. Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  580. R_SUBFLAGDIRECTION:
  581. Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  582. else
  583. ;
  584. end;
  585. if result then
  586. exit;
  587. end
  588. else if (getregtype(reg)=R_FPUREGISTER) and (Ch_FPU in insprop[taicpu(p1).opcode].Ch) then
  589. exit(true);
  590. Result:=inherited RegInInstruction(Reg, p1);
  591. end;
  592. function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  593. begin
  594. Result := False;
  595. if p1.typ <> ait_instruction then
  596. exit;
  597. with insprop[taicpu(p1).opcode] do
  598. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  599. begin
  600. case getsubreg(reg) of
  601. R_SUBW,R_SUBD,R_SUBQ:
  602. Result :=
  603. [Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
  604. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  605. Ch_W0DirFlag,Ch_W1DirFlag,Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  606. R_SUBFLAGCARRY:
  607. Result:=[Ch_WCarryFlag,Ch_RWCarryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  608. R_SUBFLAGPARITY:
  609. Result:=[Ch_WParityFlag,Ch_RWParityFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  610. R_SUBFLAGAUXILIARY:
  611. Result:=[Ch_WAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  612. R_SUBFLAGZERO:
  613. Result:=[Ch_WZeroFlag,Ch_RWZeroFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  614. R_SUBFLAGSIGN:
  615. Result:=[Ch_WSignFlag,Ch_RWSignFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  616. R_SUBFLAGOVERFLOW:
  617. Result:=[Ch_WOverflowFlag,Ch_RWOverflowFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  618. R_SUBFLAGINTERRUPT:
  619. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  620. R_SUBFLAGDIRECTION:
  621. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  622. else
  623. internalerror(2017042602);
  624. end;
  625. exit;
  626. end;
  627. case taicpu(p1).opcode of
  628. A_CALL:
  629. { We could potentially set Result to False if the register in
  630. question is non-volatile for the subroutine's calling convention,
  631. but this would require detecting the calling convention in use and
  632. also assuming that the routine doesn't contain malformed assembly
  633. language, for example... so it could only be done under -O4 as it
  634. would be considered a side-effect. [Kit] }
  635. Result := True;
  636. A_MOVSD:
  637. { special handling for SSE MOVSD }
  638. if (taicpu(p1).ops>0) then
  639. begin
  640. if taicpu(p1).ops<>2 then
  641. internalerror(2017042703);
  642. Result := (taicpu(p1).oper[1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[1]^);
  643. end;
  644. { VMOVSS and VMOVSD has two and three operand flavours, this cannot modelled by x86ins.dat
  645. so fix it here (FK)
  646. }
  647. A_VMOVSS,
  648. A_VMOVSD:
  649. begin
  650. Result := (taicpu(p1).ops=3) and (taicpu(p1).oper[2]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[2]^);
  651. exit;
  652. end;
  653. A_IMUL:
  654. Result := (taicpu(p1).oper[taicpu(p1).ops-1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[taicpu(p1).ops-1]^);
  655. else
  656. ;
  657. end;
  658. if Result then
  659. exit;
  660. with insprop[taicpu(p1).opcode] do
  661. begin
  662. if getregtype(reg)=R_INTREGISTER then
  663. begin
  664. case getsupreg(reg) of
  665. RS_EAX:
  666. if [Ch_WEAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  667. begin
  668. Result := True;
  669. exit
  670. end;
  671. RS_ECX:
  672. if [Ch_WECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  673. begin
  674. Result := True;
  675. exit
  676. end;
  677. RS_EDX:
  678. if [Ch_WEDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  679. begin
  680. Result := True;
  681. exit
  682. end;
  683. RS_EBX:
  684. if [Ch_WEBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  685. begin
  686. Result := True;
  687. exit
  688. end;
  689. RS_ESP:
  690. if [Ch_WESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  691. begin
  692. Result := True;
  693. exit
  694. end;
  695. RS_EBP:
  696. if [Ch_WEBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  697. begin
  698. Result := True;
  699. exit
  700. end;
  701. RS_ESI:
  702. if [Ch_WESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  703. begin
  704. Result := True;
  705. exit
  706. end;
  707. RS_EDI:
  708. if [Ch_WEDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  709. begin
  710. Result := True;
  711. exit
  712. end;
  713. end;
  714. end;
  715. if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
  716. begin
  717. Result := true;
  718. exit
  719. end;
  720. if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
  721. begin
  722. Result := true;
  723. exit
  724. end;
  725. if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
  726. begin
  727. Result := true;
  728. exit
  729. end;
  730. if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
  731. begin
  732. Result := true;
  733. exit
  734. end;
  735. end;
  736. end;
  737. {$ifdef DEBUG_AOPTCPU}
  738. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  739. begin
  740. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  741. end;
  742. function debug_tostr(i: tcgint): string; inline;
  743. begin
  744. Result := tostr(i);
  745. end;
  746. function debug_regname(r: TRegister): string; inline;
  747. begin
  748. Result := '%' + std_regname(r);
  749. end;
  750. { Debug output function - creates a string representation of an operator }
  751. function debug_operstr(oper: TOper): string;
  752. begin
  753. case oper.typ of
  754. top_const:
  755. Result := '$' + debug_tostr(oper.val);
  756. top_reg:
  757. Result := debug_regname(oper.reg);
  758. top_ref:
  759. begin
  760. if oper.ref^.offset <> 0 then
  761. Result := debug_tostr(oper.ref^.offset) + '('
  762. else
  763. Result := '(';
  764. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  765. begin
  766. Result := Result + debug_regname(oper.ref^.base);
  767. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  768. Result := Result + ',' + debug_regname(oper.ref^.index);
  769. end
  770. else
  771. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  772. Result := Result + debug_regname(oper.ref^.index);
  773. if (oper.ref^.scalefactor > 1) then
  774. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  775. else
  776. Result := Result + ')';
  777. end;
  778. else
  779. Result := '[UNKNOWN]';
  780. end;
  781. end;
  782. function debug_op2str(opcode: tasmop): string; inline;
  783. begin
  784. Result := std_op2str[opcode];
  785. end;
  786. function debug_opsize2str(opsize: topsize): string; inline;
  787. begin
  788. Result := gas_opsize2str[opsize];
  789. end;
  790. {$else DEBUG_AOPTCPU}
  791. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  792. begin
  793. end;
  794. function debug_tostr(i: tcgint): string; inline;
  795. begin
  796. Result := '';
  797. end;
  798. function debug_regname(r: TRegister): string; inline;
  799. begin
  800. Result := '';
  801. end;
  802. function debug_operstr(oper: TOper): string; inline;
  803. begin
  804. Result := '';
  805. end;
  806. function debug_op2str(opcode: tasmop): string; inline;
  807. begin
  808. Result := '';
  809. end;
  810. function debug_opsize2str(opsize: topsize): string; inline;
  811. begin
  812. Result := '';
  813. end;
  814. {$endif DEBUG_AOPTCPU}
  815. class function TX86AsmOptimizer.IsMOVZXAcceptable: Boolean; inline;
  816. begin
  817. {$ifdef x86_64}
  818. { Always fine on x86-64 }
  819. Result := True;
  820. {$else x86_64}
  821. Result :=
  822. {$ifdef i8086}
  823. (current_settings.cputype >= cpu_386) and
  824. {$endif i8086}
  825. (
  826. { Always accept if optimising for size }
  827. (cs_opt_size in current_settings.optimizerswitches) or
  828. { From the Pentium II onwards, MOVZX only takes 1 cycle. [Kit] }
  829. (current_settings.optimizecputype >= cpu_Pentium2)
  830. );
  831. {$endif x86_64}
  832. end;
  833. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  834. begin
  835. if not SuperRegistersEqual(reg1,reg2) then
  836. exit(false);
  837. if getregtype(reg1)<>R_INTREGISTER then
  838. exit(true); {because SuperRegisterEqual is true}
  839. case getsubreg(reg1) of
  840. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  841. higher, it preserves the high bits, so the new value depends on
  842. reg2's previous value. In other words, it is equivalent to doing:
  843. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  844. R_SUBL:
  845. exit(getsubreg(reg2)=R_SUBL);
  846. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  847. higher, it actually does a:
  848. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  849. R_SUBH:
  850. exit(getsubreg(reg2)=R_SUBH);
  851. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  852. bits of reg2:
  853. reg2 := (reg2 and $ffff0000) or word(reg1); }
  854. R_SUBW:
  855. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  856. { a write to R_SUBD always overwrites every other subregister,
  857. because it clears the high 32 bits of R_SUBQ on x86_64 }
  858. R_SUBD,
  859. R_SUBQ:
  860. exit(true);
  861. else
  862. internalerror(2017042801);
  863. end;
  864. end;
  865. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  866. begin
  867. if not SuperRegistersEqual(reg1,reg2) then
  868. exit(false);
  869. if getregtype(reg1)<>R_INTREGISTER then
  870. exit(true); {because SuperRegisterEqual is true}
  871. case getsubreg(reg1) of
  872. R_SUBL:
  873. exit(getsubreg(reg2)<>R_SUBH);
  874. R_SUBH:
  875. exit(getsubreg(reg2)<>R_SUBL);
  876. R_SUBW,
  877. R_SUBD,
  878. R_SUBQ:
  879. exit(true);
  880. else
  881. internalerror(2017042802);
  882. end;
  883. end;
  884. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  885. var
  886. hp1 : tai;
  887. l : TCGInt;
  888. begin
  889. result:=false;
  890. { changes the code sequence
  891. shr/sar const1, x
  892. shl const2, x
  893. to
  894. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  895. if GetNextInstruction(p, hp1) and
  896. MatchInstruction(hp1,A_SHL,[]) and
  897. (taicpu(p).oper[0]^.typ = top_const) and
  898. (taicpu(hp1).oper[0]^.typ = top_const) and
  899. (taicpu(hp1).opsize = taicpu(p).opsize) and
  900. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  901. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  902. begin
  903. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  904. not(cs_opt_size in current_settings.optimizerswitches) then
  905. begin
  906. { shr/sar const1, %reg
  907. shl const2, %reg
  908. with const1 > const2 }
  909. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  910. taicpu(hp1).opcode := A_AND;
  911. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  912. case taicpu(p).opsize Of
  913. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  914. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  915. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  916. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  917. else
  918. Internalerror(2017050703)
  919. end;
  920. end
  921. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  922. not(cs_opt_size in current_settings.optimizerswitches) then
  923. begin
  924. { shr/sar const1, %reg
  925. shl const2, %reg
  926. with const1 < const2 }
  927. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  928. taicpu(p).opcode := A_AND;
  929. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  930. case taicpu(p).opsize Of
  931. S_B: taicpu(p).loadConst(0,l Xor $ff);
  932. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  933. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  934. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  935. else
  936. Internalerror(2017050702)
  937. end;
  938. end
  939. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  940. begin
  941. { shr/sar const1, %reg
  942. shl const2, %reg
  943. with const1 = const2 }
  944. taicpu(p).opcode := A_AND;
  945. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  946. case taicpu(p).opsize Of
  947. S_B: taicpu(p).loadConst(0,l Xor $ff);
  948. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  949. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  950. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  951. else
  952. Internalerror(2017050701)
  953. end;
  954. asml.remove(hp1);
  955. hp1.free;
  956. end;
  957. end;
  958. end;
  959. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  960. var
  961. opsize : topsize;
  962. hp1 : tai;
  963. tmpref : treference;
  964. ShiftValue : Cardinal;
  965. BaseValue : TCGInt;
  966. begin
  967. result:=false;
  968. opsize:=taicpu(p).opsize;
  969. { changes certain "imul const, %reg"'s to lea sequences }
  970. if (MatchOpType(taicpu(p),top_const,top_reg) or
  971. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  972. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  973. if (taicpu(p).oper[0]^.val = 1) then
  974. if (taicpu(p).ops = 2) then
  975. { remove "imul $1, reg" }
  976. begin
  977. hp1 := tai(p.Next);
  978. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  979. RemoveCurrentP(p);
  980. result:=true;
  981. end
  982. else
  983. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  984. begin
  985. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  986. InsertLLItem(p.previous, p.next, hp1);
  987. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  988. p.free;
  989. p := hp1;
  990. end
  991. else if ((taicpu(p).ops <= 2) or
  992. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  993. not(cs_opt_size in current_settings.optimizerswitches) and
  994. (not(GetNextInstruction(p, hp1)) or
  995. not((tai(hp1).typ = ait_instruction) and
  996. ((taicpu(hp1).opcode=A_Jcc) and
  997. (taicpu(hp1).condition in [C_O,C_NO])))) then
  998. begin
  999. {
  1000. imul X, reg1, reg2 to
  1001. lea (reg1,reg1,Y), reg2
  1002. shl ZZ,reg2
  1003. imul XX, reg1 to
  1004. lea (reg1,reg1,YY), reg1
  1005. shl ZZ,reg2
  1006. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  1007. it does not exist as a separate optimization target in FPC though.
  1008. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  1009. at most two zeros
  1010. }
  1011. reference_reset(tmpref,1,[]);
  1012. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  1013. begin
  1014. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  1015. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  1016. TmpRef.base := taicpu(p).oper[1]^.reg;
  1017. TmpRef.index := taicpu(p).oper[1]^.reg;
  1018. if not(BaseValue in [3,5,9]) then
  1019. Internalerror(2018110101);
  1020. TmpRef.ScaleFactor := BaseValue-1;
  1021. if (taicpu(p).ops = 2) then
  1022. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  1023. else
  1024. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  1025. AsmL.InsertAfter(hp1,p);
  1026. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  1027. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  1028. RemoveCurrentP(p);
  1029. if ShiftValue>0 then
  1030. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  1031. end;
  1032. end;
  1033. end;
  1034. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  1035. var
  1036. p: taicpu;
  1037. begin
  1038. if not assigned(hp) or
  1039. (hp.typ <> ait_instruction) then
  1040. begin
  1041. Result := false;
  1042. exit;
  1043. end;
  1044. p := taicpu(hp);
  1045. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  1046. with insprop[p.opcode] do
  1047. begin
  1048. case getsubreg(reg) of
  1049. R_SUBW,R_SUBD,R_SUBQ:
  1050. Result:=
  1051. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  1052. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  1053. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  1054. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  1055. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  1056. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  1057. R_SUBFLAGCARRY:
  1058. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  1059. R_SUBFLAGPARITY:
  1060. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  1061. R_SUBFLAGAUXILIARY:
  1062. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  1063. R_SUBFLAGZERO:
  1064. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  1065. R_SUBFLAGSIGN:
  1066. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  1067. R_SUBFLAGOVERFLOW:
  1068. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  1069. R_SUBFLAGINTERRUPT:
  1070. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  1071. R_SUBFLAGDIRECTION:
  1072. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  1073. else
  1074. begin
  1075. writeln(getsubreg(reg));
  1076. internalerror(2017050501);
  1077. end;
  1078. end;
  1079. exit;
  1080. end;
  1081. Result :=
  1082. (((p.opcode = A_MOV) or
  1083. (p.opcode = A_MOVZX) or
  1084. (p.opcode = A_MOVSX) or
  1085. (p.opcode = A_LEA) or
  1086. (p.opcode = A_VMOVSS) or
  1087. (p.opcode = A_VMOVSD) or
  1088. (p.opcode = A_VMOVAPD) or
  1089. (p.opcode = A_VMOVAPS) or
  1090. (p.opcode = A_VMOVQ) or
  1091. (p.opcode = A_MOVSS) or
  1092. (p.opcode = A_MOVSD) or
  1093. (p.opcode = A_MOVQ) or
  1094. (p.opcode = A_MOVAPD) or
  1095. (p.opcode = A_MOVAPS) or
  1096. {$ifndef x86_64}
  1097. (p.opcode = A_LDS) or
  1098. (p.opcode = A_LES) or
  1099. {$endif not x86_64}
  1100. (p.opcode = A_LFS) or
  1101. (p.opcode = A_LGS) or
  1102. (p.opcode = A_LSS)) and
  1103. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  1104. (p.oper[1]^.typ = top_reg) and
  1105. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  1106. ((p.oper[0]^.typ = top_const) or
  1107. ((p.oper[0]^.typ = top_reg) and
  1108. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  1109. ((p.oper[0]^.typ = top_ref) and
  1110. not RegInRef(reg,p.oper[0]^.ref^)))) or
  1111. ((p.opcode = A_POP) and
  1112. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  1113. ((p.opcode = A_IMUL) and
  1114. (p.ops=3) and
  1115. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  1116. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  1117. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  1118. ((((p.opcode = A_IMUL) or
  1119. (p.opcode = A_MUL)) and
  1120. (p.ops=1)) and
  1121. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  1122. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  1123. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  1124. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  1125. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  1126. {$ifdef x86_64}
  1127. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  1128. {$endif x86_64}
  1129. )) or
  1130. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  1131. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  1132. {$ifdef x86_64}
  1133. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  1134. {$endif x86_64}
  1135. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  1136. {$ifndef x86_64}
  1137. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1138. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1139. {$endif not x86_64}
  1140. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1141. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1142. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1143. {$ifndef x86_64}
  1144. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  1145. {$endif not x86_64}
  1146. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  1147. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  1148. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  1149. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  1150. {$ifdef x86_64}
  1151. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  1152. {$endif x86_64}
  1153. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  1154. (((p.opcode = A_FSTSW) or
  1155. (p.opcode = A_FNSTSW)) and
  1156. (p.oper[0]^.typ=top_reg) and
  1157. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  1158. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  1159. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  1160. (p.oper[0]^.reg=p.oper[1]^.reg) and
  1161. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  1162. end;
  1163. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  1164. var
  1165. hp2,hp3 : tai;
  1166. begin
  1167. { some x86-64 issue a NOP before the real exit code }
  1168. if MatchInstruction(p,A_NOP,[]) then
  1169. GetNextInstruction(p,p);
  1170. result:=assigned(p) and (p.typ=ait_instruction) and
  1171. ((taicpu(p).opcode = A_RET) or
  1172. ((taicpu(p).opcode=A_LEAVE) and
  1173. GetNextInstruction(p,hp2) and
  1174. MatchInstruction(hp2,A_RET,[S_NO])
  1175. ) or
  1176. (((taicpu(p).opcode=A_LEA) and
  1177. MatchOpType(taicpu(p),top_ref,top_reg) and
  1178. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  1179. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1180. ) and
  1181. GetNextInstruction(p,hp2) and
  1182. MatchInstruction(hp2,A_RET,[S_NO])
  1183. ) or
  1184. ((((taicpu(p).opcode=A_MOV) and
  1185. MatchOpType(taicpu(p),top_reg,top_reg) and
  1186. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  1187. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  1188. ((taicpu(p).opcode=A_LEA) and
  1189. MatchOpType(taicpu(p),top_ref,top_reg) and
  1190. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  1191. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1192. )
  1193. ) and
  1194. GetNextInstruction(p,hp2) and
  1195. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  1196. MatchOpType(taicpu(hp2),top_reg) and
  1197. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  1198. GetNextInstruction(hp2,hp3) and
  1199. MatchInstruction(hp3,A_RET,[S_NO])
  1200. )
  1201. );
  1202. end;
  1203. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1204. begin
  1205. isFoldableArithOp := False;
  1206. case hp1.opcode of
  1207. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1208. isFoldableArithOp :=
  1209. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1210. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1211. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1212. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1213. (taicpu(hp1).oper[1]^.reg = reg);
  1214. A_INC,A_DEC,A_NEG,A_NOT:
  1215. isFoldableArithOp :=
  1216. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1217. (taicpu(hp1).oper[0]^.reg = reg);
  1218. else
  1219. ;
  1220. end;
  1221. end;
  1222. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  1223. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  1224. var
  1225. hp2: tai;
  1226. begin
  1227. hp2 := p;
  1228. repeat
  1229. hp2 := tai(hp2.previous);
  1230. if assigned(hp2) and
  1231. (hp2.typ = ait_regalloc) and
  1232. (tai_regalloc(hp2).ratype=ra_dealloc) and
  1233. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  1234. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  1235. begin
  1236. asml.remove(hp2);
  1237. hp2.free;
  1238. break;
  1239. end;
  1240. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  1241. end;
  1242. begin
  1243. case current_procinfo.procdef.returndef.typ of
  1244. arraydef,recorddef,pointerdef,
  1245. stringdef,enumdef,procdef,objectdef,errordef,
  1246. filedef,setdef,procvardef,
  1247. classrefdef,forwarddef:
  1248. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1249. orddef:
  1250. if current_procinfo.procdef.returndef.size <> 0 then
  1251. begin
  1252. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1253. { for int64/qword }
  1254. if current_procinfo.procdef.returndef.size = 8 then
  1255. DoRemoveLastDeallocForFuncRes(RS_EDX);
  1256. end;
  1257. else
  1258. ;
  1259. end;
  1260. end;
  1261. function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
  1262. var
  1263. hp1,hp2 : tai;
  1264. begin
  1265. result:=false;
  1266. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1267. begin
  1268. { vmova* reg1,reg1
  1269. =>
  1270. <nop> }
  1271. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1272. begin
  1273. GetNextInstruction(p,hp1);
  1274. asml.Remove(p);
  1275. p.Free;
  1276. p:=hp1;
  1277. result:=true;
  1278. exit;
  1279. end
  1280. else if GetNextInstruction(p,hp1) then
  1281. begin
  1282. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1283. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1284. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1285. begin
  1286. { vmova* reg1,reg2
  1287. vmova* reg2,reg3
  1288. dealloc reg2
  1289. =>
  1290. vmova* reg1,reg3 }
  1291. TransferUsedRegs(TmpUsedRegs);
  1292. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1293. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1294. begin
  1295. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 1',p);
  1296. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1297. asml.Remove(hp1);
  1298. hp1.Free;
  1299. result:=true;
  1300. exit;
  1301. end
  1302. { special case:
  1303. vmova* reg1,reg2
  1304. vmova* reg2,reg1
  1305. =>
  1306. vmova* reg1,reg2 }
  1307. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  1308. begin
  1309. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 2',p);
  1310. asml.Remove(hp1);
  1311. hp1.Free;
  1312. result:=true;
  1313. exit;
  1314. end
  1315. end
  1316. else if ((MatchInstruction(p,[A_MOVAPS,A_VMOVAPS],[S_NO]) and
  1317. MatchInstruction(hp1,[A_MOVSS,A_VMOVSS],[S_NO])) or
  1318. ((MatchInstruction(p,[A_MOVAPD,A_VMOVAPD],[S_NO]) and
  1319. MatchInstruction(hp1,[A_MOVSD,A_VMOVSD],[S_NO])))
  1320. ) and
  1321. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1322. begin
  1323. { vmova* reg1,reg2
  1324. vmovs* reg2,<op>
  1325. dealloc reg2
  1326. =>
  1327. vmovs* reg1,reg3 }
  1328. TransferUsedRegs(TmpUsedRegs);
  1329. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1330. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1331. begin
  1332. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVS*2(V)MOVS* 1',p);
  1333. taicpu(p).opcode:=taicpu(hp1).opcode;
  1334. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1335. asml.Remove(hp1);
  1336. hp1.Free;
  1337. result:=true;
  1338. exit;
  1339. end
  1340. end;
  1341. end;
  1342. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  1343. begin
  1344. if MatchInstruction(hp1,[A_VFMADDPD,
  1345. A_VFMADD132PD,
  1346. A_VFMADD132PS,
  1347. A_VFMADD132SD,
  1348. A_VFMADD132SS,
  1349. A_VFMADD213PD,
  1350. A_VFMADD213PS,
  1351. A_VFMADD213SD,
  1352. A_VFMADD213SS,
  1353. A_VFMADD231PD,
  1354. A_VFMADD231PS,
  1355. A_VFMADD231SD,
  1356. A_VFMADD231SS,
  1357. A_VFMADDSUB132PD,
  1358. A_VFMADDSUB132PS,
  1359. A_VFMADDSUB213PD,
  1360. A_VFMADDSUB213PS,
  1361. A_VFMADDSUB231PD,
  1362. A_VFMADDSUB231PS,
  1363. A_VFMSUB132PD,
  1364. A_VFMSUB132PS,
  1365. A_VFMSUB132SD,
  1366. A_VFMSUB132SS,
  1367. A_VFMSUB213PD,
  1368. A_VFMSUB213PS,
  1369. A_VFMSUB213SD,
  1370. A_VFMSUB213SS,
  1371. A_VFMSUB231PD,
  1372. A_VFMSUB231PS,
  1373. A_VFMSUB231SD,
  1374. A_VFMSUB231SS,
  1375. A_VFMSUBADD132PD,
  1376. A_VFMSUBADD132PS,
  1377. A_VFMSUBADD213PD,
  1378. A_VFMSUBADD213PS,
  1379. A_VFMSUBADD231PD,
  1380. A_VFMSUBADD231PS,
  1381. A_VFNMADD132PD,
  1382. A_VFNMADD132PS,
  1383. A_VFNMADD132SD,
  1384. A_VFNMADD132SS,
  1385. A_VFNMADD213PD,
  1386. A_VFNMADD213PS,
  1387. A_VFNMADD213SD,
  1388. A_VFNMADD213SS,
  1389. A_VFNMADD231PD,
  1390. A_VFNMADD231PS,
  1391. A_VFNMADD231SD,
  1392. A_VFNMADD231SS,
  1393. A_VFNMSUB132PD,
  1394. A_VFNMSUB132PS,
  1395. A_VFNMSUB132SD,
  1396. A_VFNMSUB132SS,
  1397. A_VFNMSUB213PD,
  1398. A_VFNMSUB213PS,
  1399. A_VFNMSUB213SD,
  1400. A_VFNMSUB213SS,
  1401. A_VFNMSUB231PD,
  1402. A_VFNMSUB231PS,
  1403. A_VFNMSUB231SD,
  1404. A_VFNMSUB231SS],[S_NO]) and
  1405. { we mix single and double opperations here because we assume that the compiler
  1406. generates vmovapd only after double operations and vmovaps only after single operations }
  1407. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1408. GetNextInstruction(hp1,hp2) and
  1409. MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and
  1410. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1411. begin
  1412. TransferUsedRegs(TmpUsedRegs);
  1413. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1414. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1415. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1416. begin
  1417. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1418. asml.Remove(p);
  1419. p.Free;
  1420. asml.Remove(hp2);
  1421. hp2.Free;
  1422. p:=hp1;
  1423. end;
  1424. end
  1425. else if (hp1.typ = ait_instruction) and
  1426. GetNextInstruction(hp1, hp2) and
  1427. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  1428. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1429. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1430. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1431. (((taicpu(p).opcode=A_MOVAPS) and
  1432. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  1433. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  1434. ((taicpu(p).opcode=A_MOVAPD) and
  1435. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  1436. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  1437. ) then
  1438. { change
  1439. movapX reg,reg2
  1440. addsX/subsX/... reg3, reg2
  1441. movapX reg2,reg
  1442. to
  1443. addsX/subsX/... reg3,reg
  1444. }
  1445. begin
  1446. TransferUsedRegs(TmpUsedRegs);
  1447. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1448. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1449. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1450. begin
  1451. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1452. debug_op2str(taicpu(p).opcode)+' '+
  1453. debug_op2str(taicpu(hp1).opcode)+' '+
  1454. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1455. { we cannot eliminate the first move if
  1456. the operations uses the same register for source and dest }
  1457. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1458. begin
  1459. asml.remove(p);
  1460. p.Free;
  1461. end;
  1462. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1463. asml.remove(hp2);
  1464. hp2.Free;
  1465. p:=hp1;
  1466. result:=true;
  1467. end;
  1468. end;
  1469. end;
  1470. end;
  1471. end;
  1472. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1473. var
  1474. hp1 : tai;
  1475. begin
  1476. result:=false;
  1477. { replace
  1478. V<Op>X %mreg1,%mreg2,%mreg3
  1479. VMovX %mreg3,%mreg4
  1480. dealloc %mreg3
  1481. by
  1482. V<Op>X %mreg1,%mreg2,%mreg4
  1483. ?
  1484. }
  1485. if GetNextInstruction(p,hp1) and
  1486. { we mix single and double operations here because we assume that the compiler
  1487. generates vmovapd only after double operations and vmovaps only after single operations }
  1488. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1489. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1490. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1491. begin
  1492. TransferUsedRegs(TmpUsedRegs);
  1493. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1494. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  1495. begin
  1496. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1497. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1498. asml.Remove(hp1);
  1499. hp1.Free;
  1500. result:=true;
  1501. end;
  1502. end;
  1503. end;
  1504. { Replaces all references to AOldReg in a memory reference to ANewReg }
  1505. class function TX86AsmOptimizer.ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean;
  1506. var
  1507. OldSupReg: TSuperRegister;
  1508. OldSubReg, MemSubReg: TSubRegister;
  1509. begin
  1510. Result := False;
  1511. { For safety reasons, only check for exact register matches }
  1512. { Check base register }
  1513. if (ref.base = AOldReg) then
  1514. begin
  1515. ref.base := ANewReg;
  1516. Result := True;
  1517. end;
  1518. { Check index register }
  1519. if (ref.index = AOldReg) then
  1520. begin
  1521. ref.index := ANewReg;
  1522. Result := True;
  1523. end;
  1524. end;
  1525. { Replaces all references to AOldReg in an operand to ANewReg }
  1526. class function TX86AsmOptimizer.ReplaceRegisterInOper(const p: taicpu; const OperIdx: Integer; const AOldReg, ANewReg: TRegister): Boolean;
  1527. var
  1528. OldSupReg, NewSupReg: TSuperRegister;
  1529. OldSubReg, NewSubReg, MemSubReg: TSubRegister;
  1530. OldRegType: TRegisterType;
  1531. ThisOper: POper;
  1532. begin
  1533. ThisOper := p.oper[OperIdx]; { Faster to access overall }
  1534. Result := False;
  1535. if (AOldReg = NR_NO) or (ANewReg = NR_NO) then
  1536. InternalError(2020011801);
  1537. OldSupReg := getsupreg(AOldReg);
  1538. OldSubReg := getsubreg(AOldReg);
  1539. OldRegType := getregtype(AOldReg);
  1540. NewSupReg := getsupreg(ANewReg);
  1541. NewSubReg := getsubreg(ANewReg);
  1542. if OldRegType <> getregtype(ANewReg) then
  1543. InternalError(2020011802);
  1544. if OldSubReg <> NewSubReg then
  1545. InternalError(2020011803);
  1546. case ThisOper^.typ of
  1547. top_reg:
  1548. if (
  1549. (ThisOper^.reg = AOldReg) or
  1550. (
  1551. (OldRegType = R_INTREGISTER) and
  1552. (getsupreg(ThisOper^.reg) = OldSupReg) and
  1553. (getregtype(ThisOper^.reg) = R_INTREGISTER) and
  1554. (
  1555. (getsubreg(ThisOper^.reg) <= OldSubReg)
  1556. {$ifndef x86_64}
  1557. and (
  1558. { Under i386 and i8086, ESI, EDI, EBP and ESP
  1559. don't have an 8-bit representation }
  1560. (getsubreg(ThisOper^.reg) >= R_SUBW) or
  1561. not (NewSupReg in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  1562. )
  1563. {$endif x86_64}
  1564. )
  1565. )
  1566. ) then
  1567. begin
  1568. ThisOper^.reg := newreg(getregtype(ANewReg), NewSupReg, getsubreg(p.oper[OperIdx]^.reg));;
  1569. Result := True;
  1570. end;
  1571. top_ref:
  1572. if ReplaceRegisterInRef(ThisOper^.ref^, AOldReg, ANewReg) then
  1573. Result := True;
  1574. else
  1575. ;
  1576. end;
  1577. end;
  1578. { Replaces all references to AOldReg in an instruction to ANewReg }
  1579. function TX86AsmOptimizer.ReplaceRegisterInInstruction(const p: taicpu; const AOldReg, ANewReg: TRegister): Boolean;
  1580. const
  1581. ReadFlag: array[0..3] of TInsChange = (Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Rop4);
  1582. var
  1583. OperIdx: Integer;
  1584. begin
  1585. Result := False;
  1586. for OperIdx := 0 to p.ops - 1 do
  1587. if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) and
  1588. { The shift and rotate instructions can only use CL }
  1589. not (
  1590. (OperIdx = 0) and
  1591. { This second condition just helps to avoid unnecessarily
  1592. calling MatchInstruction for 10 different opcodes }
  1593. (p.oper[0]^.reg = NR_CL) and
  1594. MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], [])
  1595. ) then
  1596. Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
  1597. end;
  1598. class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; inline;
  1599. begin
  1600. Result :=
  1601. (ref^.index = NR_NO) and
  1602. (
  1603. {$ifdef x86_64}
  1604. (
  1605. (ref^.base = NR_RIP) and
  1606. (ref^.refaddr in [addr_pic, addr_pic_no_got])
  1607. ) or
  1608. {$endif x86_64}
  1609. (ref^.base = NR_STACK_POINTER_REG) or
  1610. (ref^.base = current_procinfo.framepointer)
  1611. );
  1612. end;
  1613. function TX86AsmOptimizer.DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
  1614. var
  1615. CurrentReg, ReplaceReg: TRegister;
  1616. SubReg: TSubRegister;
  1617. begin
  1618. Result := False;
  1619. ReplaceReg := taicpu(p_mov).oper[0]^.reg;
  1620. CurrentReg := taicpu(p_mov).oper[1]^.reg;
  1621. case hp.opcode of
  1622. A_FSTSW, A_FNSTSW,
  1623. A_IN, A_INS, A_OUT, A_OUTS,
  1624. A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS:
  1625. { These routines have explicit operands, but they are restricted in
  1626. what they can be (e.g. IN and OUT can only read from AL, AX or
  1627. EAX. }
  1628. Exit;
  1629. A_IMUL:
  1630. begin
  1631. { The 1-operand version writes to implicit registers
  1632. The 2-operand version reads from the first operator, and reads
  1633. from and writes to the second (equivalent to Ch_ROp1, ChRWOp2).
  1634. the 3-operand version reads from a register that it doesn't write to
  1635. }
  1636. case hp.ops of
  1637. 1:
  1638. if (
  1639. (
  1640. (hp.opsize = S_B) and (getsupreg(CurrentReg) <> RS_EAX)
  1641. ) or
  1642. not (getsupreg(CurrentReg) in [RS_EAX, RS_EDX])
  1643. ) and ReplaceRegisterInOper(hp, 0, CurrentReg, ReplaceReg) then
  1644. begin
  1645. Result := True;
  1646. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 1)', hp);
  1647. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  1648. end;
  1649. 2:
  1650. { Only modify the first parameter }
  1651. if ReplaceRegisterInOper(hp, 0, CurrentReg, ReplaceReg) then
  1652. begin
  1653. Result := True;
  1654. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 2)', hp);
  1655. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  1656. end;
  1657. 3:
  1658. { Only modify the second parameter }
  1659. if ReplaceRegisterInOper(hp, 1, CurrentReg, ReplaceReg) then
  1660. begin
  1661. Result := True;
  1662. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 3)', hp);
  1663. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  1664. end;
  1665. else
  1666. InternalError(2020012901);
  1667. end;
  1668. end;
  1669. else
  1670. if (hp.ops > 0) and
  1671. ReplaceRegisterInInstruction(hp, CurrentReg, ReplaceReg) then
  1672. begin
  1673. Result := True;
  1674. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovXXX2MovXXX)', hp);
  1675. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  1676. end;
  1677. end;
  1678. end;
  1679. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1680. var
  1681. hp1, hp2, hp4: tai;
  1682. GetNextInstruction_p, TempRegUsed: Boolean;
  1683. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  1684. NewSize: topsize;
  1685. CurrentReg: TRegister;
  1686. begin
  1687. Result:=false;
  1688. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1689. { remove mov reg1,reg1? }
  1690. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  1691. then
  1692. begin
  1693. DebugMsg(SPeepholeOptimization + 'Mov2Nop 1 done',p);
  1694. { take care of the register (de)allocs following p }
  1695. UpdateUsedRegs(tai(p.next));
  1696. asml.remove(p);
  1697. p.free;
  1698. p:=hp1;
  1699. Result:=true;
  1700. exit;
  1701. end;
  1702. { All the next optimisations require a next instruction }
  1703. if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
  1704. Exit;
  1705. { Look for:
  1706. mov %reg1,%reg2
  1707. ??? %reg2,r/m
  1708. Change to:
  1709. mov %reg1,%reg2
  1710. ??? %reg1,r/m
  1711. }
  1712. if MatchOpType(taicpu(p), top_reg, top_reg) then
  1713. begin
  1714. CurrentReg := taicpu(p).oper[1]^.reg;
  1715. if RegReadByInstruction(CurrentReg, hp1) and
  1716. DeepMOVOpt(taicpu(p), taicpu(hp1)) then
  1717. begin
  1718. TransferUsedRegs(TmpUsedRegs);
  1719. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1720. if not RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs) and
  1721. { Just in case something didn't get modified (e.g. an
  1722. implicit register) }
  1723. not RegReadByInstruction(CurrentReg, hp1) then
  1724. begin
  1725. { We can remove the original MOV }
  1726. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
  1727. Asml.Remove(p);
  1728. p.Free;
  1729. p := hp1;
  1730. { TmpUsedRegs contains the results of "UpdateUsedRegs(tai(p.Next))" already,
  1731. so just restore it to UsedRegs instead of calculating it again }
  1732. RestoreUsedRegs(TmpUsedRegs);
  1733. Result := True;
  1734. Exit;
  1735. end;
  1736. { If we know a MOV instruction has become a null operation, we might as well
  1737. get rid of it now to save time. }
  1738. if (taicpu(hp1).opcode = A_MOV) and
  1739. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1740. SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
  1741. { Just being a register is enough to confirm it's a null operation }
  1742. (taicpu(hp1).oper[0]^.typ = top_reg) then
  1743. begin
  1744. Result := True;
  1745. { Speed-up to reduce a pipeline stall... if we had something like...
  1746. movl %eax,%edx
  1747. movw %dx,%ax
  1748. ... the second instruction would change to movw %ax,%ax, but
  1749. given that it is now %ax that's active rather than %eax,
  1750. penalties might occur due to a partial register write, so instead,
  1751. change it to a MOVZX instruction when optimising for speed.
  1752. }
  1753. if not (cs_opt_size in current_settings.optimizerswitches) and
  1754. IsMOVZXAcceptable and
  1755. (taicpu(hp1).opsize < taicpu(p).opsize)
  1756. {$ifdef x86_64}
  1757. { operations already implicitly set the upper 64 bits to zero }
  1758. and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
  1759. {$endif x86_64}
  1760. then
  1761. begin
  1762. CurrentReg := taicpu(hp1).oper[1]^.reg;
  1763. DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
  1764. case taicpu(p).opsize of
  1765. S_W:
  1766. if taicpu(hp1).opsize = S_B then
  1767. taicpu(hp1).opsize := S_BL
  1768. else
  1769. InternalError(2020012911);
  1770. S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
  1771. case taicpu(hp1).opsize of
  1772. S_B:
  1773. taicpu(hp1).opsize := S_BL;
  1774. S_W:
  1775. taicpu(hp1).opsize := S_WL;
  1776. else
  1777. InternalError(2020012912);
  1778. end;
  1779. else
  1780. InternalError(2020012910);
  1781. end;
  1782. taicpu(hp1).opcode := A_MOVZX;
  1783. taicpu(hp1).oper[1]^.reg := newreg(getregtype(CurrentReg), getsupreg(CurrentReg), R_SUBD)
  1784. end
  1785. else
  1786. begin
  1787. GetNextInstruction_p := GetNextInstruction(hp1, hp2);
  1788. DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
  1789. asml.remove(hp1);
  1790. hp1.free;
  1791. { The instruction after what was hp1 is now the immediate next instruction,
  1792. so we can continue to make optimisations if it's present }
  1793. if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
  1794. Exit;
  1795. hp1 := hp2;
  1796. end;
  1797. end;
  1798. end;
  1799. end;
  1800. { Depending on the DeepMOVOpt above, it may turn out that hp1 completely
  1801. overwrites the original destination register. e.g.
  1802. movl ###,%reg2d
  1803. movslq ###,%reg2q (### doesn't have to be the same as the first one)
  1804. In this case, we can remove the MOV (Go to "Mov2Nop 5" below)
  1805. }
  1806. if (taicpu(p).oper[1]^.typ = top_reg) and
  1807. MatchInstruction(hp1, [A_LEA, A_MOV, A_MOVSX, A_MOVZX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}], []) and
  1808. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1809. Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[1]^.reg) then
  1810. begin
  1811. if RegInOp(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^) then
  1812. begin
  1813. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1814. case taicpu(p).oper[0]^.typ of
  1815. top_const:
  1816. { We have something like:
  1817. movb $x, %regb
  1818. movzbl %regb,%regd
  1819. Change to:
  1820. movl $x, %regd
  1821. }
  1822. begin
  1823. case taicpu(hp1).opsize of
  1824. S_BW:
  1825. begin
  1826. if (taicpu(hp1).opcode = A_MOVSX) and
  1827. (taicpu(p).oper[0]^.val > $7F) then
  1828. taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $100; { Convert to signed }
  1829. setsubreg(taicpu(p).oper[1]^.reg, R_SUBW);
  1830. taicpu(p).opsize := S_W;
  1831. end;
  1832. S_BL:
  1833. begin
  1834. if (taicpu(hp1).opcode = A_MOVSX) and
  1835. (taicpu(p).oper[0]^.val > $7F) then
  1836. taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $100; { Convert to signed }
  1837. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  1838. taicpu(p).opsize := S_L;
  1839. end;
  1840. S_WL:
  1841. begin
  1842. if (taicpu(hp1).opcode = A_MOVSX) and
  1843. (taicpu(p).oper[0]^.val > $7FFF) then
  1844. taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $10000; { Convert to signed }
  1845. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  1846. taicpu(p).opsize := S_L;
  1847. end;
  1848. {$ifdef x86_64}
  1849. S_BQ:
  1850. begin
  1851. if (taicpu(hp1).opcode = A_MOVSX) and
  1852. (taicpu(p).oper[0]^.val > $7F) then
  1853. taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $100; { Convert to signed }
  1854. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  1855. taicpu(p).opsize := S_Q;
  1856. end;
  1857. S_WQ:
  1858. begin
  1859. if (taicpu(hp1).opcode = A_MOVSX) and
  1860. (taicpu(p).oper[0]^.val > $7FFF) then
  1861. taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $10000; { Convert to signed }
  1862. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  1863. taicpu(p).opsize := S_Q;
  1864. end;
  1865. S_LQ:
  1866. begin
  1867. if (taicpu(hp1).opcode = A_MOVSXD) and { Note it's MOVSXD, not MOVSX }
  1868. (taicpu(p).oper[0]^.val > $7FFFFFFF) then
  1869. taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $100000000; { Convert to signed }
  1870. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  1871. taicpu(p).opsize := S_Q;
  1872. end;
  1873. {$endif x86_64}
  1874. else
  1875. { If hp1 was a MOV instruction, it should have been
  1876. optimised already }
  1877. InternalError(2020021001);
  1878. end;
  1879. DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 2 done',p);
  1880. asml.Remove(hp1);
  1881. hp1.Free;
  1882. Result := True;
  1883. Exit;
  1884. end;
  1885. top_ref:
  1886. { We have something like:
  1887. movb mem, %regb
  1888. movzbl %regb,%regd
  1889. Change to:
  1890. movzbl mem, %regd
  1891. }
  1892. if (taicpu(p).oper[0]^.ref^.refaddr<>addr_full) and (IsMOVZXAcceptable or (taicpu(hp1).opcode<>A_MOVZX)) then
  1893. begin
  1894. DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 1 done',p);
  1895. taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
  1896. RemoveCurrentP(p);
  1897. Result:=True;
  1898. Exit;
  1899. end;
  1900. else
  1901. if (taicpu(hp1).opcode <> A_MOV) and (taicpu(hp1).opcode <> A_LEA) then
  1902. { Just to make a saving, since there are no more optimisations with MOVZX and MOVSX/D }
  1903. Exit;
  1904. end;
  1905. end
  1906. { The RegInOp check makes sure that movl r/m,%reg1l; movzbl (%reg1l),%reg1l"
  1907. and "movl r/m,%reg1; leal $1(%reg1,%reg2),%reg1" etc. are not incorrectly
  1908. optimised }
  1909. else
  1910. begin
  1911. DebugMsg(SPeepholeOptimization + 'Mov2Nop 5 done',p);
  1912. { take care of the register (de)allocs following p }
  1913. UpdateUsedRegs(tai(p.next));
  1914. asml.remove(p);
  1915. p.free;
  1916. p:=hp1;
  1917. Result := True;
  1918. Exit;
  1919. end;
  1920. end;
  1921. if (taicpu(hp1).opcode = A_AND) and
  1922. (taicpu(p).oper[1]^.typ = top_reg) and
  1923. MatchOpType(taicpu(hp1),top_const,top_reg) then
  1924. begin
  1925. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1926. begin
  1927. case taicpu(p).opsize of
  1928. S_L:
  1929. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1930. begin
  1931. { Optimize out:
  1932. mov x, %reg
  1933. and ffffffffh, %reg
  1934. }
  1935. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1936. asml.remove(hp1);
  1937. hp1.free;
  1938. Result:=true;
  1939. exit;
  1940. end;
  1941. S_Q: { TODO: Confirm if this is even possible }
  1942. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1943. begin
  1944. { Optimize out:
  1945. mov x, %reg
  1946. and ffffffffffffffffh, %reg
  1947. }
  1948. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1949. asml.remove(hp1);
  1950. hp1.free;
  1951. Result:=true;
  1952. exit;
  1953. end;
  1954. else
  1955. ;
  1956. end;
  1957. end
  1958. else if IsMOVZXAcceptable and
  1959. (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1960. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1961. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1962. then
  1963. begin
  1964. InputVal := debug_operstr(taicpu(p).oper[0]^);
  1965. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  1966. case taicpu(p).opsize of
  1967. S_B:
  1968. if (taicpu(hp1).oper[0]^.val = $ff) then
  1969. begin
  1970. { Convert:
  1971. movb x, %regl movb x, %regl
  1972. andw ffh, %regw andl ffh, %regd
  1973. To:
  1974. movzbw x, %regd movzbl x, %regd
  1975. (Identical registers, just different sizes)
  1976. }
  1977. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1978. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1979. case taicpu(hp1).opsize of
  1980. S_W: NewSize := S_BW;
  1981. S_L: NewSize := S_BL;
  1982. {$ifdef x86_64}
  1983. S_Q: NewSize := S_BQ;
  1984. {$endif x86_64}
  1985. else
  1986. InternalError(2018011510);
  1987. end;
  1988. end
  1989. else
  1990. NewSize := S_NO;
  1991. S_W:
  1992. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1993. begin
  1994. { Convert:
  1995. movw x, %regw
  1996. andl ffffh, %regd
  1997. To:
  1998. movzwl x, %regd
  1999. (Identical registers, just different sizes)
  2000. }
  2001. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  2002. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  2003. case taicpu(hp1).opsize of
  2004. S_L: NewSize := S_WL;
  2005. {$ifdef x86_64}
  2006. S_Q: NewSize := S_WQ;
  2007. {$endif x86_64}
  2008. else
  2009. InternalError(2018011511);
  2010. end;
  2011. end
  2012. else
  2013. NewSize := S_NO;
  2014. else
  2015. NewSize := S_NO;
  2016. end;
  2017. if NewSize <> S_NO then
  2018. begin
  2019. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  2020. { The actual optimization }
  2021. taicpu(p).opcode := A_MOVZX;
  2022. taicpu(p).changeopsize(NewSize);
  2023. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  2024. { Safeguard if "and" is followed by a conditional command }
  2025. TransferUsedRegs(TmpUsedRegs);
  2026. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  2027. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  2028. begin
  2029. { At this point, the "and" command is effectively equivalent to
  2030. "test %reg,%reg". This will be handled separately by the
  2031. Peephole Optimizer. [Kit] }
  2032. DebugMsg(SPeepholeOptimization + PreMessage +
  2033. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  2034. end
  2035. else
  2036. begin
  2037. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  2038. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  2039. asml.Remove(hp1);
  2040. hp1.Free;
  2041. end;
  2042. Result := True;
  2043. Exit;
  2044. end;
  2045. end;
  2046. end;
  2047. { Next instruction is also a MOV ? }
  2048. if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  2049. begin
  2050. if (taicpu(p).oper[1]^.typ = top_reg) and
  2051. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  2052. begin
  2053. CurrentReg := taicpu(p).oper[1]^.reg;
  2054. TransferUsedRegs(TmpUsedRegs);
  2055. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2056. { we have
  2057. mov x, %treg
  2058. mov %treg, y
  2059. }
  2060. if not(RegInOp(CurrentReg, taicpu(hp1).oper[1]^)) then
  2061. if not(RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs)) then
  2062. { we've got
  2063. mov x, %treg
  2064. mov %treg, y
  2065. with %treg is not used after }
  2066. case taicpu(p).oper[0]^.typ Of
  2067. { top_reg is covered by DeepMOVOpt }
  2068. top_const:
  2069. begin
  2070. { change
  2071. mov const, %treg
  2072. mov %treg, y
  2073. to
  2074. mov const, y
  2075. }
  2076. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  2077. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  2078. begin
  2079. if taicpu(hp1).oper[1]^.typ=top_reg then
  2080. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  2081. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  2082. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  2083. asml.remove(hp1);
  2084. hp1.free;
  2085. Result:=true;
  2086. Exit;
  2087. end;
  2088. end;
  2089. top_ref:
  2090. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  2091. begin
  2092. { change
  2093. mov mem, %treg
  2094. mov %treg, %reg
  2095. to
  2096. mov mem, %reg"
  2097. }
  2098. taicpu(p).loadreg(1, taicpu(hp1).oper[1]^.reg);
  2099. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  2100. asml.remove(hp1);
  2101. hp1.free;
  2102. Result:=true;
  2103. Exit;
  2104. end;
  2105. else
  2106. ;
  2107. end
  2108. else
  2109. { %treg is used afterwards, but all eventualities
  2110. other than the first MOV instruction being a constant
  2111. are covered by DeepMOVOpt, so only check for that }
  2112. if (taicpu(p).oper[0]^.typ = top_const) and
  2113. (
  2114. { For MOV operations, a size saving is only made if the register/const is byte-sized }
  2115. not (cs_opt_size in current_settings.optimizerswitches) or
  2116. (taicpu(hp1).opsize = S_B)
  2117. ) and
  2118. (
  2119. (taicpu(hp1).oper[1]^.typ = top_reg) or
  2120. ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
  2121. ) then
  2122. begin
  2123. DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
  2124. taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
  2125. end;
  2126. end;
  2127. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2128. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2129. { mov reg1, mem1 or mov mem1, reg1
  2130. mov mem2, reg2 mov reg2, mem2}
  2131. begin
  2132. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2133. { mov reg1, mem1 or mov mem1, reg1
  2134. mov mem2, reg1 mov reg2, mem1}
  2135. begin
  2136. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2137. { Removes the second statement from
  2138. mov reg1, mem1/reg2
  2139. mov mem1/reg2, reg1 }
  2140. begin
  2141. if taicpu(p).oper[0]^.typ=top_reg then
  2142. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2143. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  2144. asml.remove(hp1);
  2145. hp1.free;
  2146. Result:=true;
  2147. exit;
  2148. end
  2149. else
  2150. begin
  2151. TransferUsedRegs(TmpUsedRegs);
  2152. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2153. if (taicpu(p).oper[1]^.typ = top_ref) and
  2154. { mov reg1, mem1
  2155. mov mem2, reg1 }
  2156. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  2157. GetNextInstruction(hp1, hp2) and
  2158. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  2159. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2160. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  2161. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  2162. { change to
  2163. mov reg1, mem1 mov reg1, mem1
  2164. mov mem2, reg1 cmp reg1, mem2
  2165. cmp mem1, reg1
  2166. }
  2167. begin
  2168. asml.remove(hp2);
  2169. hp2.free;
  2170. taicpu(hp1).opcode := A_CMP;
  2171. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  2172. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  2173. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  2174. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  2175. end;
  2176. end;
  2177. end
  2178. else if (taicpu(p).oper[1]^.typ=top_ref) and
  2179. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2180. begin
  2181. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  2182. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  2183. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  2184. end
  2185. else
  2186. begin
  2187. TransferUsedRegs(TmpUsedRegs);
  2188. if GetNextInstruction(hp1, hp2) and
  2189. MatchOpType(taicpu(p),top_ref,top_reg) and
  2190. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2191. (taicpu(hp1).oper[1]^.typ = top_ref) and
  2192. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  2193. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  2194. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  2195. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  2196. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  2197. { mov mem1, %reg1
  2198. mov %reg1, mem2
  2199. mov mem2, reg2
  2200. to:
  2201. mov mem1, reg2
  2202. mov reg2, mem2}
  2203. begin
  2204. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  2205. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  2206. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  2207. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2208. asml.remove(hp2);
  2209. hp2.free;
  2210. end
  2211. {$ifdef i386}
  2212. { this is enabled for i386 only, as the rules to create the reg sets below
  2213. are too complicated for x86-64, so this makes this code too error prone
  2214. on x86-64
  2215. }
  2216. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  2217. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  2218. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  2219. { mov mem1, reg1 mov mem1, reg1
  2220. mov reg1, mem2 mov reg1, mem2
  2221. mov mem2, reg2 mov mem2, reg1
  2222. to: to:
  2223. mov mem1, reg1 mov mem1, reg1
  2224. mov mem1, reg2 mov reg1, mem2
  2225. mov reg1, mem2
  2226. or (if mem1 depends on reg1
  2227. and/or if mem2 depends on reg2)
  2228. to:
  2229. mov mem1, reg1
  2230. mov reg1, mem2
  2231. mov reg1, reg2
  2232. }
  2233. begin
  2234. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2235. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  2236. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  2237. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  2238. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  2239. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2240. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  2241. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  2242. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  2243. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  2244. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  2245. end
  2246. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  2247. begin
  2248. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  2249. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  2250. end
  2251. else
  2252. begin
  2253. asml.remove(hp2);
  2254. hp2.free;
  2255. end
  2256. {$endif i386}
  2257. ;
  2258. end;
  2259. end;
  2260. (* { movl [mem1],reg1
  2261. movl [mem1],reg2
  2262. to
  2263. movl [mem1],reg1
  2264. movl reg1,reg2
  2265. }
  2266. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2267. (taicpu(p).oper[1]^.typ = top_reg) and
  2268. (taicpu(hp1).oper[0]^.typ = top_ref) and
  2269. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2270. (taicpu(p).opsize = taicpu(hp1).opsize) and
  2271. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  2272. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  2273. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  2274. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  2275. else*)
  2276. { movl const1,[mem1]
  2277. movl [mem1],reg1
  2278. to
  2279. movl const1,reg1
  2280. movl reg1,[mem1]
  2281. }
  2282. if MatchOpType(Taicpu(p),top_const,top_ref) and
  2283. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  2284. (taicpu(p).opsize = taicpu(hp1).opsize) and
  2285. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  2286. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  2287. begin
  2288. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  2289. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  2290. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  2291. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  2292. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  2293. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  2294. Result:=true;
  2295. exit;
  2296. end;
  2297. { mov x,reg1; mov y,reg1 -> mov y,reg1 is handled by the Mov2Nop 5 optimisation }
  2298. end;
  2299. { search further than the next instruction for a mov }
  2300. if
  2301. { check as much as possible before the expensive GetNextInstructionUsingReg call }
  2302. (taicpu(p).oper[1]^.typ = top_reg) and
  2303. (taicpu(p).oper[0]^.typ in [top_reg,top_const]) and
  2304. not RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp1) and
  2305. { we work with hp2 here, so hp1 can be still used later on when
  2306. checking for GetNextInstruction_p }
  2307. { GetNextInstructionUsingReg only searches one instruction ahead unless -O3 is specified }
  2308. GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[1]^.reg) and
  2309. MatchInstruction(hp2,A_MOV,[]) and
  2310. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2311. ((taicpu(p).oper[0]^.typ=top_const) or
  2312. ((taicpu(p).oper[0]^.typ=top_reg) and
  2313. not(RegUsedBetween(taicpu(p).oper[0]^.reg, p, hp2))
  2314. )
  2315. ) then
  2316. begin
  2317. { we have
  2318. mov x, %treg
  2319. mov %treg, y
  2320. }
  2321. TransferUsedRegs(TmpUsedRegs);
  2322. TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
  2323. { We don't need to call UpdateUsedRegs for every instruction between
  2324. p and hp2 because the register we're concerned about will not
  2325. become deallocated (otherwise GetNextInstructionUsingReg would
  2326. have stopped at an earlier instruction). [Kit] }
  2327. TempRegUsed :=
  2328. RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs) or
  2329. RegReadByInstruction(taicpu(p).oper[1]^.reg, hp1);
  2330. case taicpu(p).oper[0]^.typ Of
  2331. top_reg:
  2332. begin
  2333. { change
  2334. mov %reg, %treg
  2335. mov %treg, y
  2336. to
  2337. mov %reg, y
  2338. }
  2339. CurrentReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
  2340. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  2341. if taicpu(hp2).oper[1]^.reg = CurrentReg then
  2342. begin
  2343. { %reg = y - remove hp2 completely (doing it here instead of relying on
  2344. the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
  2345. if TempRegUsed then
  2346. begin
  2347. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
  2348. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  2349. asml.remove(hp2);
  2350. hp2.Free;
  2351. end
  2352. else
  2353. begin
  2354. asml.remove(hp2);
  2355. hp2.Free;
  2356. { We can remove the original MOV too }
  2357. DebugMsg(SPeepholeOptimization + 'MovMov2NopNop 6b done',p);
  2358. { take care of the register (de)allocs following p }
  2359. UpdateUsedRegs(tai(p.next));
  2360. asml.remove(p);
  2361. p.free;
  2362. p:=hp1;
  2363. Result:=true;
  2364. Exit;
  2365. end;
  2366. end
  2367. else
  2368. begin
  2369. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  2370. taicpu(hp2).loadReg(0, CurrentReg);
  2371. if TempRegUsed then
  2372. begin
  2373. { Don't remove the first instruction if the temporary register is in use }
  2374. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(CurrentReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
  2375. { No need to set Result to True. If there's another instruction later on
  2376. that can be optimised, it will be detected when the main Pass 1 loop
  2377. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  2378. end
  2379. else
  2380. begin
  2381. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
  2382. { take care of the register (de)allocs following p }
  2383. UpdateUsedRegs(tai(p.next));
  2384. asml.remove(p);
  2385. p.free;
  2386. p:=hp1;
  2387. Result:=true;
  2388. Exit;
  2389. end;
  2390. end;
  2391. end;
  2392. top_const:
  2393. if not (cs_opt_size in current_settings.optimizerswitches) or (taicpu(hp2).opsize = S_B) then
  2394. begin
  2395. { change
  2396. mov const, %treg
  2397. mov %treg, y
  2398. to
  2399. mov const, y
  2400. }
  2401. if (taicpu(hp2).oper[1]^.typ=top_reg) or
  2402. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  2403. begin
  2404. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  2405. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  2406. if TempRegUsed then
  2407. begin
  2408. { Don't remove the first instruction if the temporary register is in use }
  2409. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 7a)',hp2);
  2410. { No need to set Result to True. If there's another instruction later on
  2411. that can be optimised, it will be detected when the main Pass 1 loop
  2412. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  2413. end
  2414. else
  2415. begin
  2416. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 7 done',p);
  2417. { take care of the register (de)allocs following p }
  2418. UpdateUsedRegs(tai(p.next));
  2419. asml.remove(p);
  2420. p.free;
  2421. p:=hp1;
  2422. Result:=true;
  2423. Exit;
  2424. end;
  2425. end;
  2426. end;
  2427. else
  2428. Internalerror(2019103001);
  2429. end;
  2430. end;
  2431. if (aoc_MovAnd2Mov_3 in OptsToCheck) and
  2432. (taicpu(p).oper[1]^.typ = top_reg) and
  2433. (taicpu(p).opsize = S_L) and
  2434. GetNextInstructionUsingRegTrackingUse(p,hp2,taicpu(p).oper[1]^.reg) and
  2435. (taicpu(hp2).opcode = A_AND) and
  2436. (MatchOpType(taicpu(hp2),top_const,top_reg) or
  2437. (MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2438. MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^))
  2439. ) then
  2440. begin
  2441. if SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp2).oper[1]^.reg) then
  2442. begin
  2443. if ((taicpu(hp2).oper[0]^.typ=top_const) and (taicpu(hp2).oper[0]^.val = $ffffffff)) or
  2444. ((taicpu(hp2).oper[0]^.typ=top_reg) and (taicpu(hp2).opsize=S_L)) then
  2445. begin
  2446. { Optimize out:
  2447. mov x, %reg
  2448. and ffffffffh, %reg
  2449. }
  2450. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 3 done',p);
  2451. asml.remove(hp2);
  2452. hp2.free;
  2453. Result:=true;
  2454. exit;
  2455. end;
  2456. end;
  2457. end;
  2458. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  2459. x >= RetOffset) as it doesn't do anything (it writes either to a
  2460. parameter or to the temporary storage room for the function
  2461. result)
  2462. }
  2463. if IsExitCode(hp1) and
  2464. (taicpu(p).oper[1]^.typ = top_ref) and
  2465. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  2466. (
  2467. (
  2468. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  2469. not (
  2470. assigned(current_procinfo.procdef.funcretsym) and
  2471. (taicpu(p).oper[1]^.ref^.offset <= tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)
  2472. )
  2473. ) or
  2474. { Also discard writes to the stack that are below the base pointer,
  2475. as this is temporary storage rather than a function result on the
  2476. stack, say. }
  2477. (
  2478. (taicpu(p).oper[1]^.ref^.base = NR_STACK_POINTER_REG) and
  2479. (taicpu(p).oper[1]^.ref^.offset < current_procinfo.final_localsize)
  2480. )
  2481. ) then
  2482. begin
  2483. asml.remove(p);
  2484. p.free;
  2485. p:=hp1;
  2486. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  2487. RemoveLastDeallocForFuncRes(p);
  2488. Result:=true;
  2489. exit;
  2490. end;
  2491. if MatchOpType(taicpu(p),top_reg,top_ref) and
  2492. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  2493. (taicpu(hp1).oper[1]^.typ = top_ref) and
  2494. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  2495. begin
  2496. { change
  2497. mov reg1, mem1
  2498. test/cmp x, mem1
  2499. to
  2500. mov reg1, mem1
  2501. test/cmp x, reg1
  2502. }
  2503. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  2504. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  2505. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2506. exit;
  2507. end;
  2508. if (taicpu(p).oper[1]^.typ = top_reg) and
  2509. (hp1.typ = ait_instruction) and
  2510. GetNextInstruction(hp1, hp2) and
  2511. MatchInstruction(hp2,A_MOV,[]) and
  2512. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  2513. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  2514. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  2515. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  2516. ) then
  2517. begin
  2518. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  2519. (taicpu(hp2).oper[0]^.typ=top_reg) then
  2520. { change movsX/movzX reg/ref, reg2
  2521. add/sub/or/... reg3/$const, reg2
  2522. mov reg2 reg/ref
  2523. dealloc reg2
  2524. to
  2525. add/sub/or/... reg3/$const, reg/ref }
  2526. begin
  2527. TransferUsedRegs(TmpUsedRegs);
  2528. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2529. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2530. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  2531. begin
  2532. { by example:
  2533. movswl %si,%eax movswl %si,%eax p
  2534. decl %eax addl %edx,%eax hp1
  2535. movw %ax,%si movw %ax,%si hp2
  2536. ->
  2537. movswl %si,%eax movswl %si,%eax p
  2538. decw %eax addw %edx,%eax hp1
  2539. movw %ax,%si movw %ax,%si hp2
  2540. }
  2541. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  2542. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  2543. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  2544. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  2545. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2546. {
  2547. ->
  2548. movswl %si,%eax movswl %si,%eax p
  2549. decw %si addw %dx,%si hp1
  2550. movw %ax,%si movw %ax,%si hp2
  2551. }
  2552. case taicpu(hp1).ops of
  2553. 1:
  2554. begin
  2555. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  2556. if taicpu(hp1).oper[0]^.typ=top_reg then
  2557. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2558. end;
  2559. 2:
  2560. begin
  2561. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  2562. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  2563. (taicpu(hp1).opcode<>A_SHL) and
  2564. (taicpu(hp1).opcode<>A_SHR) and
  2565. (taicpu(hp1).opcode<>A_SAR) then
  2566. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2567. end;
  2568. else
  2569. internalerror(2008042701);
  2570. end;
  2571. {
  2572. ->
  2573. decw %si addw %dx,%si p
  2574. }
  2575. asml.remove(hp2);
  2576. hp2.Free;
  2577. RemoveCurrentP(p);
  2578. Result:=True;
  2579. Exit;
  2580. end;
  2581. end;
  2582. if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2583. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  2584. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  2585. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  2586. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  2587. )
  2588. {$ifdef i386}
  2589. { byte registers of esi, edi, ebp, esp are not available on i386 }
  2590. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  2591. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  2592. {$endif i386}
  2593. then
  2594. { change movsX/movzX reg/ref, reg2
  2595. add/sub/or/... regX/$const, reg2
  2596. mov reg2, reg3
  2597. dealloc reg2
  2598. to
  2599. movsX/movzX reg/ref, reg3
  2600. add/sub/or/... reg3/$const, reg3
  2601. }
  2602. begin
  2603. TransferUsedRegs(TmpUsedRegs);
  2604. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2605. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2606. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  2607. begin
  2608. { by example:
  2609. movswl %si,%eax movswl %si,%eax p
  2610. decl %eax addl %edx,%eax hp1
  2611. movw %ax,%si movw %ax,%si hp2
  2612. ->
  2613. movswl %si,%eax movswl %si,%eax p
  2614. decw %eax addw %edx,%eax hp1
  2615. movw %ax,%si movw %ax,%si hp2
  2616. }
  2617. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  2618. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  2619. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  2620. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  2621. { limit size of constants as well to avoid assembler errors, but
  2622. check opsize to avoid overflow when left shifting the 1 }
  2623. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=63) then
  2624. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl topsize2memsize[taicpu(hp2).opsize])-1);
  2625. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2626. taicpu(p).changeopsize(taicpu(hp2).opsize);
  2627. if taicpu(p).oper[0]^.typ=top_reg then
  2628. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2629. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  2630. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  2631. {
  2632. ->
  2633. movswl %si,%eax movswl %si,%eax p
  2634. decw %si addw %dx,%si hp1
  2635. movw %ax,%si movw %ax,%si hp2
  2636. }
  2637. case taicpu(hp1).ops of
  2638. 1:
  2639. begin
  2640. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  2641. if taicpu(hp1).oper[0]^.typ=top_reg then
  2642. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2643. end;
  2644. 2:
  2645. begin
  2646. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  2647. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  2648. (taicpu(hp1).opcode<>A_SHL) and
  2649. (taicpu(hp1).opcode<>A_SHR) and
  2650. (taicpu(hp1).opcode<>A_SAR) then
  2651. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2652. end;
  2653. else
  2654. internalerror(2018111801);
  2655. end;
  2656. {
  2657. ->
  2658. decw %si addw %dx,%si p
  2659. }
  2660. asml.remove(hp2);
  2661. hp2.Free;
  2662. end;
  2663. end;
  2664. end;
  2665. if MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  2666. GetNextInstruction(hp1, hp2) and
  2667. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  2668. MatchOperand(Taicpu(p).oper[0]^,0) and
  2669. (Taicpu(p).oper[1]^.typ = top_reg) and
  2670. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  2671. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  2672. { mov reg1,0
  2673. bts reg1,operand1 --> mov reg1,operand2
  2674. or reg1,operand2 bts reg1,operand1}
  2675. begin
  2676. Taicpu(hp2).opcode:=A_MOV;
  2677. asml.remove(hp1);
  2678. insertllitem(hp2,hp2.next,hp1);
  2679. asml.remove(p);
  2680. p.free;
  2681. p:=hp1;
  2682. Result:=true;
  2683. exit;
  2684. end;
  2685. if MatchInstruction(hp1,A_LEA,[S_L]) and
  2686. MatchOpType(Taicpu(p),top_ref,top_reg) and
  2687. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  2688. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  2689. ) or
  2690. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  2691. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  2692. )
  2693. ) then
  2694. { mov reg1,ref
  2695. lea reg2,[reg1,reg2]
  2696. to
  2697. add reg2,ref}
  2698. begin
  2699. TransferUsedRegs(TmpUsedRegs);
  2700. { reg1 may not be used afterwards }
  2701. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  2702. begin
  2703. Taicpu(hp1).opcode:=A_ADD;
  2704. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  2705. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  2706. asml.remove(p);
  2707. p.free;
  2708. p:=hp1;
  2709. result:=true;
  2710. exit;
  2711. end;
  2712. end;
  2713. end;
  2714. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  2715. var
  2716. hp1 : tai;
  2717. begin
  2718. Result:=false;
  2719. if taicpu(p).ops <> 2 then
  2720. exit;
  2721. if GetNextInstruction(p,hp1) and
  2722. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  2723. (taicpu(hp1).ops = 2) then
  2724. begin
  2725. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2726. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2727. { movXX reg1, mem1 or movXX mem1, reg1
  2728. movXX mem2, reg2 movXX reg2, mem2}
  2729. begin
  2730. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2731. { movXX reg1, mem1 or movXX mem1, reg1
  2732. movXX mem2, reg1 movXX reg2, mem1}
  2733. begin
  2734. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2735. begin
  2736. { Removes the second statement from
  2737. movXX reg1, mem1/reg2
  2738. movXX mem1/reg2, reg1
  2739. }
  2740. if taicpu(p).oper[0]^.typ=top_reg then
  2741. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2742. { Removes the second statement from
  2743. movXX mem1/reg1, reg2
  2744. movXX reg2, mem1/reg1
  2745. }
  2746. if (taicpu(p).oper[1]^.typ=top_reg) and
  2747. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  2748. begin
  2749. asml.remove(p);
  2750. p.free;
  2751. GetNextInstruction(hp1,p);
  2752. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  2753. end
  2754. else
  2755. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  2756. asml.remove(hp1);
  2757. hp1.free;
  2758. Result:=true;
  2759. exit;
  2760. end
  2761. end;
  2762. end;
  2763. end;
  2764. end;
  2765. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  2766. var
  2767. hp1 : tai;
  2768. begin
  2769. result:=false;
  2770. { replace
  2771. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  2772. MovX %mreg2,%mreg1
  2773. dealloc %mreg2
  2774. by
  2775. <Op>X %mreg2,%mreg1
  2776. ?
  2777. }
  2778. if GetNextInstruction(p,hp1) and
  2779. { we mix single and double opperations here because we assume that the compiler
  2780. generates vmovapd only after double operations and vmovaps only after single operations }
  2781. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  2782. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2783. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  2784. (taicpu(p).oper[0]^.typ=top_reg) then
  2785. begin
  2786. TransferUsedRegs(TmpUsedRegs);
  2787. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2788. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2789. begin
  2790. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  2791. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2792. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  2793. asml.Remove(hp1);
  2794. hp1.Free;
  2795. result:=true;
  2796. end;
  2797. end;
  2798. end;
  2799. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  2800. var
  2801. hp1, hp2, hp3: tai;
  2802. l : ASizeInt;
  2803. ref: Integer;
  2804. saveref: treference;
  2805. begin
  2806. Result:=false;
  2807. { removes seg register prefixes from LEA operations, as they
  2808. don't do anything}
  2809. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  2810. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  2811. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2812. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  2813. { do not mess with leas acessing the stack pointer }
  2814. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  2815. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  2816. begin
  2817. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2818. (taicpu(p).oper[0]^.ref^.offset = 0) then
  2819. begin
  2820. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  2821. taicpu(p).oper[1]^.reg);
  2822. InsertLLItem(p.previous,p.next, hp1);
  2823. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  2824. p.free;
  2825. p:=hp1;
  2826. Result:=true;
  2827. exit;
  2828. end
  2829. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  2830. begin
  2831. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  2832. RemoveCurrentP(p);
  2833. Result:=true;
  2834. exit;
  2835. end
  2836. { continue to use lea to adjust the stack pointer,
  2837. it is the recommended way, but only if not optimizing for size }
  2838. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  2839. (cs_opt_size in current_settings.optimizerswitches) then
  2840. with taicpu(p).oper[0]^.ref^ do
  2841. if (base = taicpu(p).oper[1]^.reg) then
  2842. begin
  2843. l:=offset;
  2844. if (l=1) and UseIncDec then
  2845. begin
  2846. taicpu(p).opcode:=A_INC;
  2847. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2848. taicpu(p).ops:=1;
  2849. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  2850. end
  2851. else if (l=-1) and UseIncDec then
  2852. begin
  2853. taicpu(p).opcode:=A_DEC;
  2854. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2855. taicpu(p).ops:=1;
  2856. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2857. end
  2858. else
  2859. begin
  2860. if (l<0) and (l<>-2147483648) then
  2861. begin
  2862. taicpu(p).opcode:=A_SUB;
  2863. taicpu(p).loadConst(0,-l);
  2864. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2865. end
  2866. else
  2867. begin
  2868. taicpu(p).opcode:=A_ADD;
  2869. taicpu(p).loadConst(0,l);
  2870. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2871. end;
  2872. end;
  2873. Result:=true;
  2874. exit;
  2875. end;
  2876. end;
  2877. if GetNextInstruction(p,hp1) and
  2878. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  2879. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2880. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  2881. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  2882. begin
  2883. TransferUsedRegs(TmpUsedRegs);
  2884. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2885. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2886. begin
  2887. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2888. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  2889. asml.Remove(hp1);
  2890. hp1.Free;
  2891. result:=true;
  2892. end;
  2893. end;
  2894. { changes
  2895. lea offset1(regX), reg1
  2896. lea offset2(reg1), reg1
  2897. to
  2898. lea offset1+offset2(regX), reg1 }
  2899. { for now, we do not mess with the stack pointer, thought it might be usefull to remove
  2900. unneeded lea sequences on the stack pointer, it needs to be tested in detail }
  2901. if (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  2902. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  2903. MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
  2904. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2905. (taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg) and
  2906. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2907. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2908. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2909. (((taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2910. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2911. (taicpu(p).oper[0]^.ref^.index=taicpu(hp1).oper[0]^.ref^.index) and
  2912. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp1).oper[0]^.ref^.scalefactor)
  2913. ) or
  2914. ((taicpu(hp1).oper[0]^.ref^.scalefactor in [0,1]) and
  2915. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  2916. not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1)))
  2917. ) and
  2918. not(RegUsedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1)) and
  2919. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp1).oper[0]^.ref^.relsymbol) and
  2920. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp1).oper[0]^.ref^.segment) and
  2921. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp1).oper[0]^.ref^.symbol) then
  2922. begin
  2923. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea done',p);
  2924. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2925. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2926. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  2927. begin
  2928. taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
  2929. taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  2930. taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  2931. end;
  2932. RemoveCurrentP(p);
  2933. result:=true;
  2934. exit;
  2935. end;
  2936. { changes
  2937. lea <ref1>, reg1
  2938. <op> ...,<ref. with reg1>,...
  2939. to
  2940. <op> ...,<ref1>,... }
  2941. if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
  2942. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
  2943. GetNextInstruction(p,hp1) and
  2944. (hp1.typ=ait_instruction) and
  2945. not(MatchInstruction(hp1,A_LEA,[])) then
  2946. begin
  2947. { find a reference which uses reg1 }
  2948. if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
  2949. ref:=0
  2950. else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
  2951. ref:=1
  2952. else
  2953. ref:=-1;
  2954. if (ref<>-1) and
  2955. { reg1 must be either the base or the index }
  2956. ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
  2957. begin
  2958. { reg1 can be removed from the reference }
  2959. saveref:=taicpu(hp1).oper[ref]^.ref^;
  2960. if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
  2961. taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
  2962. else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
  2963. taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
  2964. else
  2965. Internalerror(2019111201);
  2966. { check if the can insert all data of the lea into the second instruction }
  2967. if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2968. ((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
  2969. ((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
  2970. ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
  2971. ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
  2972. ((taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2973. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
  2974. {$ifdef x86_64}
  2975. and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
  2976. and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
  2977. ((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
  2978. )
  2979. {$endif x86_64}
  2980. then
  2981. begin
  2982. { reg1 might not used by the second instruction after it is remove from the reference }
  2983. if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
  2984. begin
  2985. TransferUsedRegs(TmpUsedRegs);
  2986. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2987. { reg1 is not updated so it might not be used afterwards }
  2988. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2989. begin
  2990. DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
  2991. if taicpu(p).oper[0]^.ref^.base<>NR_NO then
  2992. taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2993. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  2994. taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  2995. if taicpu(p).oper[0]^.ref^.symbol<>nil then
  2996. taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
  2997. if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
  2998. taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
  2999. if not(taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) then
  3000. taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  3001. inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  3002. RemoveCurrentP(p);
  3003. result:=true;
  3004. exit;
  3005. end
  3006. end;
  3007. end;
  3008. { recover }
  3009. taicpu(hp1).oper[ref]^.ref^:=saveref;
  3010. end;
  3011. end;
  3012. end;
  3013. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  3014. var
  3015. hp1 : tai;
  3016. begin
  3017. DoSubAddOpt := False;
  3018. if GetLastInstruction(p, hp1) and
  3019. (hp1.typ = ait_instruction) and
  3020. (taicpu(hp1).opsize = taicpu(p).opsize) then
  3021. case taicpu(hp1).opcode Of
  3022. A_DEC:
  3023. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  3024. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  3025. begin
  3026. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  3027. asml.remove(hp1);
  3028. hp1.free;
  3029. end;
  3030. A_SUB:
  3031. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  3032. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  3033. begin
  3034. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  3035. asml.remove(hp1);
  3036. hp1.free;
  3037. end;
  3038. A_ADD:
  3039. begin
  3040. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  3041. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  3042. begin
  3043. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  3044. asml.remove(hp1);
  3045. hp1.free;
  3046. if (taicpu(p).oper[0]^.val = 0) then
  3047. begin
  3048. hp1 := tai(p.next);
  3049. asml.remove(p);
  3050. p.free;
  3051. if not GetLastInstruction(hp1, p) then
  3052. p := hp1;
  3053. DoSubAddOpt := True;
  3054. end
  3055. end;
  3056. end;
  3057. else
  3058. ;
  3059. end;
  3060. end;
  3061. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  3062. {$ifdef i386}
  3063. var
  3064. hp1 : tai;
  3065. {$endif i386}
  3066. begin
  3067. Result:=false;
  3068. { * change "subl $2, %esp; pushw x" to "pushl x"}
  3069. { * change "sub/add const1, reg" or "dec reg" followed by
  3070. "sub const2, reg" to one "sub ..., reg" }
  3071. if MatchOpType(taicpu(p),top_const,top_reg) then
  3072. begin
  3073. {$ifdef i386}
  3074. if (taicpu(p).oper[0]^.val = 2) and
  3075. (taicpu(p).oper[1]^.reg = NR_ESP) and
  3076. { Don't do the sub/push optimization if the sub }
  3077. { comes from setting up the stack frame (JM) }
  3078. (not(GetLastInstruction(p,hp1)) or
  3079. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  3080. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  3081. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  3082. begin
  3083. hp1 := tai(p.next);
  3084. while Assigned(hp1) and
  3085. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  3086. not RegReadByInstruction(NR_ESP,hp1) and
  3087. not RegModifiedByInstruction(NR_ESP,hp1) do
  3088. hp1 := tai(hp1.next);
  3089. if Assigned(hp1) and
  3090. MatchInstruction(hp1,A_PUSH,[S_W]) then
  3091. begin
  3092. taicpu(hp1).changeopsize(S_L);
  3093. if taicpu(hp1).oper[0]^.typ=top_reg then
  3094. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  3095. hp1 := tai(p.next);
  3096. asml.remove(p);
  3097. p.free;
  3098. p := hp1;
  3099. Result:=true;
  3100. exit;
  3101. end;
  3102. end;
  3103. {$endif i386}
  3104. if DoSubAddOpt(p) then
  3105. Result:=true;
  3106. end;
  3107. end;
  3108. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  3109. var
  3110. TmpBool1,TmpBool2 : Boolean;
  3111. tmpref : treference;
  3112. hp1,hp2: tai;
  3113. begin
  3114. Result:=false;
  3115. if MatchOpType(taicpu(p),top_const,top_reg) and
  3116. (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  3117. (taicpu(p).oper[0]^.val <= 3) then
  3118. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  3119. begin
  3120. { should we check the next instruction? }
  3121. TmpBool1 := True;
  3122. { have we found an add/sub which could be
  3123. integrated in the lea? }
  3124. TmpBool2 := False;
  3125. reference_reset(tmpref,2,[]);
  3126. TmpRef.index := taicpu(p).oper[1]^.reg;
  3127. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  3128. while TmpBool1 and
  3129. GetNextInstruction(p, hp1) and
  3130. (tai(hp1).typ = ait_instruction) and
  3131. ((((taicpu(hp1).opcode = A_ADD) or
  3132. (taicpu(hp1).opcode = A_SUB)) and
  3133. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  3134. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  3135. (((taicpu(hp1).opcode = A_INC) or
  3136. (taicpu(hp1).opcode = A_DEC)) and
  3137. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  3138. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg)) or
  3139. ((taicpu(hp1).opcode = A_LEA) and
  3140. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  3141. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg))) and
  3142. (not GetNextInstruction(hp1,hp2) or
  3143. not instrReadsFlags(hp2)) Do
  3144. begin
  3145. TmpBool1 := False;
  3146. if taicpu(hp1).opcode=A_LEA then
  3147. begin
  3148. if (TmpRef.base = NR_NO) and
  3149. (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
  3150. (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
  3151. (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
  3152. ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
  3153. (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
  3154. begin
  3155. TmpBool1 := True;
  3156. TmpBool2 := True;
  3157. inc(TmpRef.offset, taicpu(hp1).oper[0]^.ref^.offset);
  3158. if taicpu(hp1).oper[0]^.ref^.scalefactor<>0 then
  3159. tmpref.scalefactor:=tmpref.scalefactor*taicpu(hp1).oper[0]^.ref^.scalefactor;
  3160. TmpRef.base := taicpu(hp1).oper[0]^.ref^.base;
  3161. asml.remove(hp1);
  3162. hp1.free;
  3163. end
  3164. end
  3165. else if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  3166. begin
  3167. TmpBool1 := True;
  3168. TmpBool2 := True;
  3169. case taicpu(hp1).opcode of
  3170. A_ADD:
  3171. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  3172. A_SUB:
  3173. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  3174. else
  3175. internalerror(2019050536);
  3176. end;
  3177. asml.remove(hp1);
  3178. hp1.free;
  3179. end
  3180. else
  3181. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  3182. (((taicpu(hp1).opcode = A_ADD) and
  3183. (TmpRef.base = NR_NO)) or
  3184. (taicpu(hp1).opcode = A_INC) or
  3185. (taicpu(hp1).opcode = A_DEC)) then
  3186. begin
  3187. TmpBool1 := True;
  3188. TmpBool2 := True;
  3189. case taicpu(hp1).opcode of
  3190. A_ADD:
  3191. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  3192. A_INC:
  3193. inc(TmpRef.offset);
  3194. A_DEC:
  3195. dec(TmpRef.offset);
  3196. else
  3197. internalerror(2019050535);
  3198. end;
  3199. asml.remove(hp1);
  3200. hp1.free;
  3201. end;
  3202. end;
  3203. if TmpBool2
  3204. {$ifndef x86_64}
  3205. or
  3206. ((current_settings.optimizecputype < cpu_Pentium2) and
  3207. (taicpu(p).oper[0]^.val <= 3) and
  3208. not(cs_opt_size in current_settings.optimizerswitches))
  3209. {$endif x86_64}
  3210. then
  3211. begin
  3212. if not(TmpBool2) and
  3213. (taicpu(p).oper[0]^.val=1) then
  3214. begin
  3215. hp1:=taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  3216. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  3217. end
  3218. else
  3219. hp1:=taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  3220. taicpu(p).oper[1]^.reg);
  3221. DebugMsg(SPeepholeOptimization + 'ShlAddLeaSubIncDec2Lea',p);
  3222. InsertLLItem(p.previous, p.next, hp1);
  3223. p.free;
  3224. p := hp1;
  3225. end;
  3226. end
  3227. {$ifndef x86_64}
  3228. else if (current_settings.optimizecputype < cpu_Pentium2) and
  3229. MatchOpType(taicpu(p),top_const,top_reg) then
  3230. begin
  3231. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  3232. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  3233. (unlike shl, which is only Tairable in the U pipe) }
  3234. if taicpu(p).oper[0]^.val=1 then
  3235. begin
  3236. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  3237. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  3238. InsertLLItem(p.previous, p.next, hp1);
  3239. p.free;
  3240. p := hp1;
  3241. end
  3242. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  3243. "shl $3, %reg" to "lea (,%reg,8), %reg }
  3244. else if (taicpu(p).opsize = S_L) and
  3245. (taicpu(p).oper[0]^.val<= 3) then
  3246. begin
  3247. reference_reset(tmpref,2,[]);
  3248. TmpRef.index := taicpu(p).oper[1]^.reg;
  3249. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  3250. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  3251. InsertLLItem(p.previous, p.next, hp1);
  3252. p.free;
  3253. p := hp1;
  3254. end;
  3255. end
  3256. {$endif x86_64}
  3257. ;
  3258. end;
  3259. function TX86AsmOptimizer.OptPass1SETcc(var p: tai): boolean;
  3260. var
  3261. hp1,hp2,next: tai; SetC, JumpC: TAsmCond; Unconditional: Boolean;
  3262. begin
  3263. Result:=false;
  3264. if MatchOpType(taicpu(p),top_reg) and
  3265. GetNextInstruction(p, hp1) and
  3266. ((MatchInstruction(hp1, A_TEST, [S_B]) and
  3267. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3268. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg)) or
  3269. (MatchInstruction(hp1, A_CMP, [S_B]) and
  3270. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3271. (taicpu(hp1).oper[0]^.val=0))
  3272. ) and
  3273. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  3274. GetNextInstruction(hp1, hp2) and
  3275. MatchInstruction(hp2, A_Jcc, []) then
  3276. { Change from: To:
  3277. set(C) %reg j(~C) label
  3278. test %reg,%reg/cmp $0,%reg
  3279. je label
  3280. set(C) %reg j(C) label
  3281. test %reg,%reg/cmp $0,%reg
  3282. jne label
  3283. }
  3284. begin
  3285. next := tai(p.Next);
  3286. TransferUsedRegs(TmpUsedRegs);
  3287. UpdateUsedRegs(TmpUsedRegs, next);
  3288. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  3289. JumpC := taicpu(hp2).condition;
  3290. Unconditional := False;
  3291. if conditions_equal(JumpC, C_E) then
  3292. SetC := inverse_cond(taicpu(p).condition)
  3293. else if conditions_equal(JumpC, C_NE) then
  3294. SetC := taicpu(p).condition
  3295. else
  3296. { We've got something weird here (and inefficent) }
  3297. begin
  3298. DebugMsg('DEBUG: Inefficient jump - check code generation', p);
  3299. SetC := C_NONE;
  3300. { JAE/JNB will always branch (use 'condition_in', since C_AE <> C_NB normally) }
  3301. if condition_in(C_AE, JumpC) then
  3302. Unconditional := True
  3303. else
  3304. { Not sure what to do with this jump - drop out }
  3305. Exit;
  3306. end;
  3307. asml.Remove(hp1);
  3308. hp1.Free;
  3309. if Unconditional then
  3310. MakeUnconditional(taicpu(hp2))
  3311. else
  3312. begin
  3313. if SetC = C_NONE then
  3314. InternalError(2018061401);
  3315. taicpu(hp2).SetCondition(SetC);
  3316. end;
  3317. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs) then
  3318. begin
  3319. asml.Remove(p);
  3320. UpdateUsedRegs(next);
  3321. p.Free;
  3322. Result := True;
  3323. p := hp2;
  3324. end;
  3325. DebugMsg(SPeepholeOptimization + 'SETcc/TESTCmp/Jcc -> Jcc',p);
  3326. end;
  3327. end;
  3328. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  3329. { returns true if a "continue" should be done after this optimization }
  3330. var
  3331. hp1, hp2: tai;
  3332. begin
  3333. Result := false;
  3334. if MatchOpType(taicpu(p),top_ref) and
  3335. GetNextInstruction(p, hp1) and
  3336. (hp1.typ = ait_instruction) and
  3337. (((taicpu(hp1).opcode = A_FLD) and
  3338. (taicpu(p).opcode = A_FSTP)) or
  3339. ((taicpu(p).opcode = A_FISTP) and
  3340. (taicpu(hp1).opcode = A_FILD))) and
  3341. MatchOpType(taicpu(hp1),top_ref) and
  3342. (taicpu(hp1).opsize = taicpu(p).opsize) and
  3343. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  3344. begin
  3345. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  3346. if (taicpu(p).opsize=S_FX) and
  3347. GetNextInstruction(hp1, hp2) and
  3348. (hp2.typ = ait_instruction) and
  3349. IsExitCode(hp2) and
  3350. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  3351. not(assigned(current_procinfo.procdef.funcretsym) and
  3352. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  3353. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  3354. begin
  3355. asml.remove(p);
  3356. asml.remove(hp1);
  3357. p.free;
  3358. hp1.free;
  3359. p := hp2;
  3360. RemoveLastDeallocForFuncRes(p);
  3361. Result := true;
  3362. end
  3363. (* can't be done because the store operation rounds
  3364. else
  3365. { fst can't store an extended value! }
  3366. if (taicpu(p).opsize <> S_FX) and
  3367. (taicpu(p).opsize <> S_IQ) then
  3368. begin
  3369. if (taicpu(p).opcode = A_FSTP) then
  3370. taicpu(p).opcode := A_FST
  3371. else taicpu(p).opcode := A_FIST;
  3372. asml.remove(hp1);
  3373. hp1.free;
  3374. end
  3375. *)
  3376. end;
  3377. end;
  3378. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  3379. var
  3380. hp1, hp2: tai;
  3381. begin
  3382. result:=false;
  3383. if MatchOpType(taicpu(p),top_reg) and
  3384. GetNextInstruction(p, hp1) and
  3385. (hp1.typ = Ait_Instruction) and
  3386. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3387. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  3388. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  3389. { change to
  3390. fld reg fxxx reg,st
  3391. fxxxp st, st1 (hp1)
  3392. Remark: non commutative operations must be reversed!
  3393. }
  3394. begin
  3395. case taicpu(hp1).opcode Of
  3396. A_FMULP,A_FADDP,
  3397. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  3398. begin
  3399. case taicpu(hp1).opcode Of
  3400. A_FADDP: taicpu(hp1).opcode := A_FADD;
  3401. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  3402. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  3403. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  3404. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  3405. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  3406. else
  3407. internalerror(2019050534);
  3408. end;
  3409. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  3410. taicpu(hp1).oper[1]^.reg := NR_ST;
  3411. asml.remove(p);
  3412. p.free;
  3413. p := hp1;
  3414. Result:=true;
  3415. exit;
  3416. end;
  3417. else
  3418. ;
  3419. end;
  3420. end
  3421. else
  3422. if MatchOpType(taicpu(p),top_ref) and
  3423. GetNextInstruction(p, hp2) and
  3424. (hp2.typ = Ait_Instruction) and
  3425. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  3426. (taicpu(p).opsize in [S_FS, S_FL]) and
  3427. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  3428. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  3429. if GetLastInstruction(p, hp1) and
  3430. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  3431. MatchOpType(taicpu(hp1),top_ref) and
  3432. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  3433. if ((taicpu(hp2).opcode = A_FMULP) or
  3434. (taicpu(hp2).opcode = A_FADDP)) then
  3435. { change to
  3436. fld/fst mem1 (hp1) fld/fst mem1
  3437. fld mem1 (p) fadd/
  3438. faddp/ fmul st, st
  3439. fmulp st, st1 (hp2) }
  3440. begin
  3441. asml.remove(p);
  3442. p.free;
  3443. p := hp1;
  3444. if (taicpu(hp2).opcode = A_FADDP) then
  3445. taicpu(hp2).opcode := A_FADD
  3446. else
  3447. taicpu(hp2).opcode := A_FMUL;
  3448. taicpu(hp2).oper[1]^.reg := NR_ST;
  3449. end
  3450. else
  3451. { change to
  3452. fld/fst mem1 (hp1) fld/fst mem1
  3453. fld mem1 (p) fld st}
  3454. begin
  3455. taicpu(p).changeopsize(S_FL);
  3456. taicpu(p).loadreg(0,NR_ST);
  3457. end
  3458. else
  3459. begin
  3460. case taicpu(hp2).opcode Of
  3461. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  3462. { change to
  3463. fld/fst mem1 (hp1) fld/fst mem1
  3464. fld mem2 (p) fxxx mem2
  3465. fxxxp st, st1 (hp2) }
  3466. begin
  3467. case taicpu(hp2).opcode Of
  3468. A_FADDP: taicpu(p).opcode := A_FADD;
  3469. A_FMULP: taicpu(p).opcode := A_FMUL;
  3470. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  3471. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  3472. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  3473. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  3474. else
  3475. internalerror(2019050533);
  3476. end;
  3477. asml.remove(hp2);
  3478. hp2.free;
  3479. end
  3480. else
  3481. ;
  3482. end
  3483. end
  3484. end;
  3485. function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
  3486. var
  3487. v: TCGInt;
  3488. hp1, hp2: tai;
  3489. begin
  3490. Result:=false;
  3491. if taicpu(p).oper[0]^.typ = top_const then
  3492. begin
  3493. { Though GetNextInstruction can be factored out, it is an expensive
  3494. call, so delay calling it until we have first checked cheaper
  3495. conditions that are independent of it. }
  3496. if (taicpu(p).oper[0]^.val = 0) and
  3497. (taicpu(p).oper[1]^.typ = top_reg) and
  3498. GetNextInstruction(p, hp1) and
  3499. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) then
  3500. begin
  3501. hp2 := p;
  3502. { When dealing with "cmp $0,%reg", only ZF and SF contain
  3503. anything meaningful once it's converted to "test %reg,%reg";
  3504. additionally, some jumps will always (or never) branch, so
  3505. evaluate every jump immediately following the
  3506. comparison, optimising the conditions if possible.
  3507. Similarly with SETcc... those that are always set to 0 or 1
  3508. are changed to MOV instructions }
  3509. while GetNextInstruction(hp2, hp1) and
  3510. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) do
  3511. begin
  3512. case taicpu(hp1).condition of
  3513. C_B, C_C, C_NAE, C_O:
  3514. { For B/NAE:
  3515. Will never branch since an unsigned integer can never be below zero
  3516. For C/O:
  3517. Result cannot overflow because 0 is being subtracted
  3518. }
  3519. begin
  3520. if taicpu(hp1).opcode = A_Jcc then
  3521. begin
  3522. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (jump removed)', hp1);
  3523. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  3524. AsmL.Remove(hp1);
  3525. hp1.Free;
  3526. { Since hp1 was deleted, hp2 must not be updated }
  3527. Continue;
  3528. end
  3529. else
  3530. begin
  3531. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (set -> mov 0)', hp1);
  3532. { Convert "set(c) %reg" instruction to "movb 0,%reg" }
  3533. taicpu(hp1).opcode := A_MOV;
  3534. taicpu(hp1).condition := C_None;
  3535. taicpu(hp1).opsize := S_B;
  3536. taicpu(hp1).allocate_oper(2);
  3537. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  3538. taicpu(hp1).loadconst(0, 0);
  3539. end;
  3540. end;
  3541. C_BE, C_NA:
  3542. begin
  3543. { Will only branch if equal to zero }
  3544. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition BE/NA --> E', hp1);
  3545. taicpu(hp1).condition := C_E;
  3546. end;
  3547. C_A, C_NBE:
  3548. begin
  3549. { Will only branch if not equal to zero }
  3550. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition A/NBE --> NE', hp1);
  3551. taicpu(hp1).condition := C_NE;
  3552. end;
  3553. C_AE, C_NB, C_NC, C_NO:
  3554. begin
  3555. { Will always branch }
  3556. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition AE/NB/NC/NO --> Always', hp1);
  3557. if taicpu(hp1).opcode = A_Jcc then
  3558. begin
  3559. MakeUnconditional(taicpu(hp1));
  3560. { Any jumps/set that follow will now be dead code }
  3561. RemoveDeadCodeAfterJump(taicpu(hp1));
  3562. Break;
  3563. end
  3564. else
  3565. begin
  3566. { Convert "set(c) %reg" instruction to "movb 1,%reg" }
  3567. taicpu(hp1).opcode := A_MOV;
  3568. taicpu(hp1).condition := C_None;
  3569. taicpu(hp1).opsize := S_B;
  3570. taicpu(hp1).allocate_oper(2);
  3571. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  3572. taicpu(hp1).loadconst(0, 1);
  3573. end;
  3574. end;
  3575. C_None:
  3576. InternalError(2020012201);
  3577. C_P, C_PE, C_NP, C_PO:
  3578. { We can't handle parity checks and they should never be generated
  3579. after a general-purpose CMP (it's used in some floating-point
  3580. comparisons that don't use CMP) }
  3581. InternalError(2020012202);
  3582. else
  3583. { Zero/Equality, Sign, their complements and all of the
  3584. signed comparisons do not need to be converted };
  3585. end;
  3586. hp2 := hp1;
  3587. end;
  3588. { Convert the instruction to a TEST }
  3589. taicpu(p).opcode := A_TEST;
  3590. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  3591. Result := True;
  3592. Exit;
  3593. end
  3594. else if (taicpu(p).oper[0]^.val = 1) and
  3595. GetNextInstruction(p, hp1) and
  3596. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  3597. (taicpu(hp1).condition in [C_L, C_NGE]) then
  3598. begin
  3599. { Convert; To:
  3600. cmp $1,r/m cmp $0,r/m
  3601. jl @lbl jle @lbl
  3602. }
  3603. DebugMsg(SPeepholeOptimization + 'Cmp1Jl2Cmp0Jle', p);
  3604. taicpu(p).oper[0]^.val := 0;
  3605. taicpu(hp1).condition := C_LE;
  3606. { If the instruction is now "cmp $0,%reg", convert it to a
  3607. TEST (and effectively do the work of the "cmp $0,%reg" in
  3608. the block above)
  3609. If it's a reference, we can get away with not setting
  3610. Result to True because he haven't evaluated the jump
  3611. in this pass yet.
  3612. }
  3613. if (taicpu(p).oper[1]^.typ = top_reg) then
  3614. begin
  3615. taicpu(p).opcode := A_TEST;
  3616. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  3617. Result := True;
  3618. end;
  3619. Exit;
  3620. end
  3621. else if (taicpu(p).oper[1]^.typ = top_reg) then
  3622. begin
  3623. { cmp register,$8000 neg register
  3624. je target --> jo target
  3625. .... only if register is deallocated before jump.}
  3626. case Taicpu(p).opsize of
  3627. S_B: v:=$80;
  3628. S_W: v:=$8000;
  3629. S_L: v:=qword($80000000);
  3630. { S_Q will never happen: cmp with 64 bit constants is not possible }
  3631. S_Q:
  3632. Exit;
  3633. else
  3634. internalerror(2013112905);
  3635. end;
  3636. if (taicpu(p).oper[0]^.val=v) and
  3637. GetNextInstruction(p, hp1) and
  3638. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  3639. (Taicpu(hp1).condition in [C_E,C_NE]) then
  3640. begin
  3641. TransferUsedRegs(TmpUsedRegs);
  3642. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  3643. if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
  3644. begin
  3645. DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
  3646. Taicpu(p).opcode:=A_NEG;
  3647. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  3648. Taicpu(p).clearop(1);
  3649. Taicpu(p).ops:=1;
  3650. if Taicpu(hp1).condition=C_E then
  3651. Taicpu(hp1).condition:=C_O
  3652. else
  3653. Taicpu(hp1).condition:=C_NO;
  3654. Result:=true;
  3655. exit;
  3656. end;
  3657. end;
  3658. end;
  3659. end;
  3660. end;
  3661. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  3662. function IsXCHGAcceptable: Boolean; inline;
  3663. begin
  3664. { Always accept if optimising for size }
  3665. Result := (cs_opt_size in current_settings.optimizerswitches) or
  3666. (
  3667. {$ifdef x86_64}
  3668. { XCHG takes 3 cycles on AMD Athlon64 }
  3669. (current_settings.optimizecputype >= cpu_core_i)
  3670. {$else x86_64}
  3671. { From the Pentium M onwards, XCHG only has a latency of 2 rather
  3672. than 3, so it becomes a saving compared to three MOVs with two of
  3673. them able to execute simultaneously. [Kit] }
  3674. (current_settings.optimizecputype >= cpu_PentiumM)
  3675. {$endif x86_64}
  3676. );
  3677. end;
  3678. var
  3679. NewRef: TReference;
  3680. hp1,hp2,hp3: tai;
  3681. {$ifndef x86_64}
  3682. hp4: tai;
  3683. OperIdx: Integer;
  3684. {$endif x86_64}
  3685. begin
  3686. Result:=false;
  3687. if not GetNextInstruction(p, hp1) then
  3688. Exit;
  3689. if MatchInstruction(hp1, A_JMP, [S_NO]) then
  3690. begin
  3691. { Sometimes the MOVs that OptPass2JMP produces can be improved
  3692. further, but we can't just put this jump optimisation in pass 1
  3693. because it tends to perform worse when conditional jumps are
  3694. nearby (e.g. when converting CMOV instructions). [Kit] }
  3695. if OptPass2JMP(hp1) then
  3696. { call OptPass1MOV once to potentially merge any MOVs that were created }
  3697. Result := OptPass1MOV(p)
  3698. { OptPass2MOV will now exit but will be called again if OptPass1MOV
  3699. returned True and the instruction is still a MOV, thus checking
  3700. the optimisations below }
  3701. { If OptPass2JMP returned False, no optimisations were done to
  3702. the jump and there are no further optimisations that can be done
  3703. to the MOV instruction on this pass }
  3704. end
  3705. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3706. (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  3707. MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
  3708. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3709. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  3710. { be lazy, checking separately for sub would be slightly better }
  3711. (abs(taicpu(hp1).oper[0]^.val)<=$7fffffff) then
  3712. begin
  3713. { Change:
  3714. movl/q %reg1,%reg2 movl/q %reg1,%reg2
  3715. addl/q $x,%reg2 subl/q $x,%reg2
  3716. To:
  3717. leal/q x(%reg1),%reg2 leal/q -x(%reg1),%reg2
  3718. }
  3719. TransferUsedRegs(TmpUsedRegs);
  3720. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3721. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3722. if not GetNextInstruction(hp1, hp2) or
  3723. (
  3724. { The FLAGS register isn't always tracked properly, so do not
  3725. perform this optimisation if a conditional statement follows }
  3726. not RegReadByInstruction(NR_DEFAULTFLAGS, hp2) and
  3727. not RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp2, TmpUsedRegs)
  3728. ) then
  3729. begin
  3730. reference_reset(NewRef, 1, []);
  3731. NewRef.base := taicpu(p).oper[0]^.reg;
  3732. NewRef.scalefactor := 1;
  3733. if taicpu(hp1).opcode = A_ADD then
  3734. begin
  3735. DebugMsg(SPeepholeOptimization + 'MovAdd2Lea', p);
  3736. NewRef.offset := taicpu(hp1).oper[0]^.val;
  3737. end
  3738. else
  3739. begin
  3740. DebugMsg(SPeepholeOptimization + 'MovSub2Lea', p);
  3741. NewRef.offset := -taicpu(hp1).oper[0]^.val;
  3742. end;
  3743. taicpu(p).opcode := A_LEA;
  3744. taicpu(p).loadref(0, NewRef);
  3745. Asml.Remove(hp1);
  3746. hp1.Free;
  3747. Result := True;
  3748. Exit;
  3749. end;
  3750. end
  3751. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3752. {$ifdef x86_64}
  3753. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  3754. {$else x86_64}
  3755. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  3756. {$endif x86_64}
  3757. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3758. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  3759. { mov reg1, reg2 mov reg1, reg2
  3760. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  3761. begin
  3762. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  3763. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  3764. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  3765. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  3766. TransferUsedRegs(TmpUsedRegs);
  3767. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3768. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  3769. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  3770. then
  3771. begin
  3772. asml.remove(p);
  3773. p.free;
  3774. p := hp1;
  3775. Result:=true;
  3776. end;
  3777. exit;
  3778. end
  3779. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3780. IsXCHGAcceptable and
  3781. { XCHG doesn't support 8-byte registers }
  3782. (taicpu(p).opsize <> S_B) and
  3783. MatchInstruction(hp1, A_MOV, []) and
  3784. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3785. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  3786. GetNextInstruction(hp1, hp2) and
  3787. MatchInstruction(hp2, A_MOV, []) and
  3788. { Don't need to call MatchOpType for hp2 because the operand matches below cover for it }
  3789. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  3790. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) then
  3791. begin
  3792. { mov %reg1,%reg2
  3793. mov %reg3,%reg1 -> xchg %reg3,%reg1
  3794. mov %reg2,%reg3
  3795. (%reg2 not used afterwards)
  3796. Note that xchg takes 3 cycles to execute, and generally mov's take
  3797. only one cycle apiece, but the first two mov's can be executed in
  3798. parallel, only taking 2 cycles overall. Older processors should
  3799. therefore only optimise for size. [Kit]
  3800. }
  3801. TransferUsedRegs(TmpUsedRegs);
  3802. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3803. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3804. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs) then
  3805. begin
  3806. DebugMsg(SPeepholeOptimization + 'MovMovMov2XChg', p);
  3807. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp1, UsedRegs);
  3808. taicpu(hp1).opcode := A_XCHG;
  3809. asml.Remove(p);
  3810. asml.Remove(hp2);
  3811. p.Free;
  3812. hp2.Free;
  3813. p := hp1;
  3814. Result := True;
  3815. Exit;
  3816. end;
  3817. end
  3818. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3819. MatchInstruction(hp1, A_SAR, []) then
  3820. begin
  3821. if MatchOperand(taicpu(hp1).oper[0]^, 31) then
  3822. begin
  3823. { the use of %edx also covers the opsize being S_L }
  3824. if MatchOperand(taicpu(hp1).oper[1]^, NR_EDX) then
  3825. begin
  3826. { Note it has to be specifically "movl %eax,%edx", and those specific sub-registers }
  3827. if (taicpu(p).oper[0]^.reg = NR_EAX) and
  3828. (taicpu(p).oper[1]^.reg = NR_EDX) then
  3829. begin
  3830. { Change:
  3831. movl %eax,%edx
  3832. sarl $31,%edx
  3833. To:
  3834. cltd
  3835. }
  3836. DebugMsg(SPeepholeOptimization + 'MovSar2Cltd', p);
  3837. Asml.Remove(hp1);
  3838. hp1.Free;
  3839. taicpu(p).opcode := A_CDQ;
  3840. taicpu(p).opsize := S_NO;
  3841. taicpu(p).clearop(1);
  3842. taicpu(p).clearop(0);
  3843. taicpu(p).ops:=0;
  3844. Result := True;
  3845. end
  3846. else if (cs_opt_size in current_settings.optimizerswitches) and
  3847. (taicpu(p).oper[0]^.reg = NR_EDX) and
  3848. (taicpu(p).oper[1]^.reg = NR_EAX) then
  3849. begin
  3850. { Change:
  3851. movl %edx,%eax
  3852. sarl $31,%edx
  3853. To:
  3854. movl %edx,%eax
  3855. cltd
  3856. Note that this creates a dependency between the two instructions,
  3857. so only perform if optimising for size.
  3858. }
  3859. DebugMsg(SPeepholeOptimization + 'MovSar2MovCltd', p);
  3860. taicpu(hp1).opcode := A_CDQ;
  3861. taicpu(hp1).opsize := S_NO;
  3862. taicpu(hp1).clearop(1);
  3863. taicpu(hp1).clearop(0);
  3864. taicpu(hp1).ops:=0;
  3865. end;
  3866. {$ifndef x86_64}
  3867. end
  3868. { Don't bother if CMOV is supported, because a more optimal
  3869. sequence would have been generated for the Abs() intrinsic }
  3870. else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  3871. { the use of %eax also covers the opsize being S_L }
  3872. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) and
  3873. (taicpu(p).oper[0]^.reg = NR_EAX) and
  3874. (taicpu(p).oper[1]^.reg = NR_EDX) and
  3875. GetNextInstruction(hp1, hp2) and
  3876. MatchInstruction(hp2, A_XOR, [S_L]) and
  3877. MatchOperand(taicpu(hp2).oper[0]^, NR_EAX) and
  3878. MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) and
  3879. GetNextInstruction(hp2, hp3) and
  3880. MatchInstruction(hp3, A_SUB, [S_L]) and
  3881. MatchOperand(taicpu(hp3).oper[0]^, NR_EAX) and
  3882. MatchOperand(taicpu(hp3).oper[1]^, NR_EDX) then
  3883. begin
  3884. { Change:
  3885. movl %eax,%edx
  3886. sarl $31,%eax
  3887. xorl %eax,%edx
  3888. subl %eax,%edx
  3889. (Instruction that uses %edx)
  3890. (%eax deallocated)
  3891. (%edx deallocated)
  3892. To:
  3893. cltd
  3894. xorl %edx,%eax <-- Note the registers have swapped
  3895. subl %edx,%eax
  3896. (Instruction that uses %eax) <-- %eax rather than %edx
  3897. }
  3898. TransferUsedRegs(TmpUsedRegs);
  3899. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3900. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3901. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  3902. if not RegUsedAfterInstruction(NR_EAX, hp3, TmpUsedRegs) then
  3903. begin
  3904. if GetNextInstruction(hp3, hp4) and
  3905. not RegModifiedByInstruction(NR_EDX, hp4) and
  3906. not RegUsedAfterInstruction(NR_EDX, hp4, TmpUsedRegs) then
  3907. begin
  3908. DebugMsg(SPeepholeOptimization + 'abs() intrinsic optimisation', p);
  3909. taicpu(p).opcode := A_CDQ;
  3910. taicpu(p).clearop(1);
  3911. taicpu(p).clearop(0);
  3912. taicpu(p).ops:=0;
  3913. AsmL.Remove(hp1);
  3914. hp1.Free;
  3915. taicpu(hp2).loadreg(0, NR_EDX);
  3916. taicpu(hp2).loadreg(1, NR_EAX);
  3917. taicpu(hp3).loadreg(0, NR_EDX);
  3918. taicpu(hp3).loadreg(1, NR_EAX);
  3919. AllocRegBetween(NR_EAX, hp3, hp4, TmpUsedRegs);
  3920. { Convert references in the following instruction (hp4) from %edx to %eax }
  3921. for OperIdx := 0 to taicpu(hp4).ops - 1 do
  3922. with taicpu(hp4).oper[OperIdx]^ do
  3923. case typ of
  3924. top_reg:
  3925. if reg = NR_EDX then
  3926. reg := NR_EAX;
  3927. top_ref:
  3928. begin
  3929. if ref^.base = NR_EDX then
  3930. ref^.base := NR_EAX;
  3931. if ref^.index = NR_EDX then
  3932. ref^.index := NR_EAX;
  3933. end;
  3934. else
  3935. ;
  3936. end;
  3937. end;
  3938. end;
  3939. {$else x86_64}
  3940. end;
  3941. end
  3942. else if MatchOperand(taicpu(hp1).oper[0]^, 63) and
  3943. { the use of %rdx also covers the opsize being S_Q }
  3944. MatchOperand(taicpu(hp1).oper[1]^, NR_RDX) then
  3945. begin
  3946. { Note it has to be specifically "movq %rax,%rdx", and those specific sub-registers }
  3947. if (taicpu(p).oper[0]^.reg = NR_RAX) and
  3948. (taicpu(p).oper[1]^.reg = NR_RDX) then
  3949. begin
  3950. { Change:
  3951. movq %rax,%rdx
  3952. sarq $63,%rdx
  3953. To:
  3954. cqto
  3955. }
  3956. DebugMsg(SPeepholeOptimization + 'MovSar2Cqto', p);
  3957. Asml.Remove(hp1);
  3958. hp1.Free;
  3959. taicpu(p).opcode := A_CQO;
  3960. taicpu(p).opsize := S_NO;
  3961. taicpu(p).clearop(1);
  3962. taicpu(p).clearop(0);
  3963. taicpu(p).ops:=0;
  3964. Result := True;
  3965. end
  3966. else if (cs_opt_size in current_settings.optimizerswitches) and
  3967. (taicpu(p).oper[0]^.reg = NR_RDX) and
  3968. (taicpu(p).oper[1]^.reg = NR_RAX) then
  3969. begin
  3970. { Change:
  3971. movq %rdx,%rax
  3972. sarq $63,%rdx
  3973. To:
  3974. movq %rdx,%rax
  3975. cqto
  3976. Note that this creates a dependency between the two instructions,
  3977. so only perform if optimising for size.
  3978. }
  3979. DebugMsg(SPeepholeOptimization + 'MovSar2MovCqto', p);
  3980. taicpu(hp1).opcode := A_CQO;
  3981. taicpu(hp1).opsize := S_NO;
  3982. taicpu(hp1).clearop(1);
  3983. taicpu(hp1).clearop(0);
  3984. taicpu(hp1).ops:=0;
  3985. {$endif x86_64}
  3986. end;
  3987. end;
  3988. end
  3989. else if MatchInstruction(hp1, A_MOV, []) and
  3990. (taicpu(hp1).oper[1]^.typ = top_reg) then
  3991. { Though "GetNextInstruction" could be factored out, along with
  3992. the instructions that depend on hp2, it is an expensive call that
  3993. should be delayed for as long as possible, hence we do cheaper
  3994. checks first that are likely to be False. [Kit] }
  3995. begin
  3996. if MatchOperand(taicpu(p).oper[1]^, NR_EDX) and
  3997. (
  3998. (
  3999. (taicpu(hp1).oper[1]^.reg = NR_EAX) and
  4000. (
  4001. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  4002. MatchOperand(taicpu(hp1).oper[0]^, NR_EDX)
  4003. )
  4004. ) or
  4005. (
  4006. (taicpu(hp1).oper[1]^.reg = NR_EDX) and
  4007. (
  4008. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  4009. MatchOperand(taicpu(hp1).oper[0]^, NR_EAX)
  4010. )
  4011. )
  4012. ) and
  4013. GetNextInstruction(hp1, hp2) and
  4014. MatchInstruction(hp2, A_SAR, []) and
  4015. MatchOperand(taicpu(hp2).oper[0]^, 31) then
  4016. begin
  4017. if MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) then
  4018. begin
  4019. { Change:
  4020. movl r/m,%edx movl r/m,%eax movl r/m,%edx movl r/m,%eax
  4021. movl %edx,%eax or movl %eax,%edx or movl r/m,%eax or movl r/m,%edx
  4022. sarl $31,%edx sarl $31,%edx sarl $31,%edx sarl $31,%edx
  4023. To:
  4024. movl r/m,%eax <- Note the change in register
  4025. cltd
  4026. }
  4027. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCltd', p);
  4028. AllocRegBetween(NR_EAX, p, hp1, UsedRegs);
  4029. taicpu(p).loadreg(1, NR_EAX);
  4030. taicpu(hp1).opcode := A_CDQ;
  4031. taicpu(hp1).clearop(1);
  4032. taicpu(hp1).clearop(0);
  4033. taicpu(hp1).ops:=0;
  4034. AsmL.Remove(hp2);
  4035. hp2.Free;
  4036. (*
  4037. {$ifdef x86_64}
  4038. end
  4039. else if MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) and
  4040. { This code sequence does not get generated - however it might become useful
  4041. if and when 128-bit signed integer types make an appearance, so the code
  4042. is kept here for when it is eventually needed. [Kit] }
  4043. (
  4044. (
  4045. (taicpu(hp1).oper[1]^.reg = NR_RAX) and
  4046. (
  4047. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  4048. MatchOperand(taicpu(hp1).oper[0]^, NR_RDX)
  4049. )
  4050. ) or
  4051. (
  4052. (taicpu(hp1).oper[1]^.reg = NR_RDX) and
  4053. (
  4054. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  4055. MatchOperand(taicpu(hp1).oper[0]^, NR_RAX)
  4056. )
  4057. )
  4058. ) and
  4059. GetNextInstruction(hp1, hp2) and
  4060. MatchInstruction(hp2, A_SAR, [S_Q]) and
  4061. MatchOperand(taicpu(hp2).oper[0]^, 63) and
  4062. MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) then
  4063. begin
  4064. { Change:
  4065. movq r/m,%rdx movq r/m,%rax movq r/m,%rdx movq r/m,%rax
  4066. movq %rdx,%rax or movq %rax,%rdx or movq r/m,%rax or movq r/m,%rdx
  4067. sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx
  4068. To:
  4069. movq r/m,%rax <- Note the change in register
  4070. cqto
  4071. }
  4072. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCqto', p);
  4073. AllocRegBetween(NR_RAX, p, hp1, UsedRegs);
  4074. taicpu(p).loadreg(1, NR_RAX);
  4075. taicpu(hp1).opcode := A_CQO;
  4076. taicpu(hp1).clearop(1);
  4077. taicpu(hp1).clearop(0);
  4078. taicpu(hp1).ops:=0;
  4079. AsmL.Remove(hp2);
  4080. hp2.Free;
  4081. {$endif x86_64}
  4082. *)
  4083. end;
  4084. end;
  4085. end
  4086. else if (taicpu(p).oper[0]^.typ = top_ref) and
  4087. (hp1.typ = ait_instruction) and
  4088. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  4089. doing it separately in both branches allows to do the cheap checks
  4090. with low probability earlier }
  4091. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  4092. GetNextInstruction(hp1,hp2) and
  4093. MatchInstruction(hp2,A_MOV,[])
  4094. ) or
  4095. ((taicpu(hp1).opcode=A_LEA) and
  4096. GetNextInstruction(hp1,hp2) and
  4097. MatchInstruction(hp2,A_MOV,[]) and
  4098. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  4099. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  4100. ) or
  4101. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  4102. taicpu(p).oper[1]^.reg) and
  4103. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  4104. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  4105. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  4106. ) and
  4107. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  4108. )
  4109. ) and
  4110. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  4111. (taicpu(hp2).oper[1]^.typ = top_ref) then
  4112. begin
  4113. TransferUsedRegs(TmpUsedRegs);
  4114. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  4115. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  4116. if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and
  4117. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then
  4118. { change mov (ref), reg
  4119. add/sub/or/... reg2/$const, reg
  4120. mov reg, (ref)
  4121. # release reg
  4122. to add/sub/or/... reg2/$const, (ref) }
  4123. begin
  4124. case taicpu(hp1).opcode of
  4125. A_INC,A_DEC,A_NOT,A_NEG :
  4126. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  4127. A_LEA :
  4128. begin
  4129. taicpu(hp1).opcode:=A_ADD;
  4130. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  4131. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  4132. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  4133. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  4134. else
  4135. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  4136. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  4137. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  4138. end
  4139. else
  4140. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  4141. end;
  4142. asml.remove(p);
  4143. asml.remove(hp2);
  4144. p.free;
  4145. hp2.free;
  4146. p := hp1
  4147. end;
  4148. Exit;
  4149. {$ifdef x86_64}
  4150. end
  4151. else if (taicpu(p).opsize = S_L) and
  4152. (taicpu(p).oper[1]^.typ = top_reg) and
  4153. (
  4154. MatchInstruction(hp1, A_MOV,[]) and
  4155. (taicpu(hp1).opsize = S_L) and
  4156. (taicpu(hp1).oper[1]^.typ = top_reg)
  4157. ) and (
  4158. GetNextInstruction(hp1, hp2) and
  4159. (tai(hp2).typ=ait_instruction) and
  4160. (taicpu(hp2).opsize = S_Q) and
  4161. (
  4162. (
  4163. MatchInstruction(hp2, A_ADD,[]) and
  4164. (taicpu(hp2).opsize = S_Q) and
  4165. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  4166. (
  4167. (
  4168. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  4169. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  4170. ) or (
  4171. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4172. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  4173. )
  4174. )
  4175. ) or (
  4176. MatchInstruction(hp2, A_LEA,[]) and
  4177. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  4178. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  4179. (
  4180. (
  4181. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  4182. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  4183. ) or (
  4184. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4185. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  4186. )
  4187. ) and (
  4188. (
  4189. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  4190. ) or (
  4191. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  4192. )
  4193. )
  4194. )
  4195. )
  4196. ) and (
  4197. GetNextInstruction(hp2, hp3) and
  4198. MatchInstruction(hp3, A_SHR,[]) and
  4199. (taicpu(hp3).opsize = S_Q) and
  4200. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  4201. (taicpu(hp3).oper[0]^.val = 1) and
  4202. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  4203. ) then
  4204. begin
  4205. { Change movl x, reg1d movl x, reg1d
  4206. movl y, reg2d movl y, reg2d
  4207. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  4208. shrq $1, reg1q shrq $1, reg1q
  4209. ( reg1d and reg2d can be switched around in the first two instructions )
  4210. To movl x, reg1d
  4211. addl y, reg1d
  4212. rcrl $1, reg1d
  4213. This corresponds to the common expression (x + y) shr 1, where
  4214. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  4215. smaller code, but won't account for x + y causing an overflow). [Kit]
  4216. }
  4217. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  4218. { Change first MOV command to have the same register as the final output }
  4219. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  4220. else
  4221. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  4222. { Change second MOV command to an ADD command. This is easier than
  4223. converting the existing command because it means we don't have to
  4224. touch 'y', which might be a complicated reference, and also the
  4225. fact that the third command might either be ADD or LEA. [Kit] }
  4226. taicpu(hp1).opcode := A_ADD;
  4227. { Delete old ADD/LEA instruction }
  4228. asml.remove(hp2);
  4229. hp2.free;
  4230. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  4231. taicpu(hp3).opcode := A_RCR;
  4232. taicpu(hp3).changeopsize(S_L);
  4233. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  4234. {$endif x86_64}
  4235. end;
  4236. end;
  4237. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  4238. var
  4239. hp1 : tai;
  4240. begin
  4241. Result:=false;
  4242. if (taicpu(p).ops >= 2) and
  4243. ((taicpu(p).oper[0]^.typ = top_const) or
  4244. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  4245. (taicpu(p).oper[1]^.typ = top_reg) and
  4246. ((taicpu(p).ops = 2) or
  4247. ((taicpu(p).oper[2]^.typ = top_reg) and
  4248. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  4249. GetLastInstruction(p,hp1) and
  4250. MatchInstruction(hp1,A_MOV,[]) and
  4251. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  4252. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4253. begin
  4254. TransferUsedRegs(TmpUsedRegs);
  4255. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) or
  4256. ((taicpu(p).ops = 3) and (taicpu(p).oper[1]^.reg=taicpu(p).oper[2]^.reg)) then
  4257. { change
  4258. mov reg1,reg2
  4259. imul y,reg2 to imul y,reg1,reg2 }
  4260. begin
  4261. taicpu(p).ops := 3;
  4262. taicpu(p).loadreg(2,taicpu(p).oper[1]^.reg);
  4263. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  4264. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  4265. asml.remove(hp1);
  4266. hp1.free;
  4267. result:=true;
  4268. end;
  4269. end;
  4270. end;
  4271. procedure TX86AsmOptimizer.ConvertJumpToRET(const p: tai; const ret_p: tai);
  4272. var
  4273. ThisLabel: TAsmLabel;
  4274. begin
  4275. ThisLabel := tasmlabel(taicpu(p).oper[0]^.ref^.symbol);
  4276. ThisLabel.decrefs;
  4277. taicpu(p).opcode := A_RET;
  4278. taicpu(p).is_jmp := false;
  4279. taicpu(p).ops := taicpu(ret_p).ops;
  4280. case taicpu(ret_p).ops of
  4281. 0:
  4282. taicpu(p).clearop(0);
  4283. 1:
  4284. taicpu(p).loadconst(0,taicpu(ret_p).oper[0]^.val);
  4285. else
  4286. internalerror(2016041301);
  4287. end;
  4288. { If the original label is now dead, it might turn out that the label
  4289. immediately follows p. As a result, everything beyond it, which will
  4290. be just some final register configuration and a RET instruction, is
  4291. now dead code. [Kit] }
  4292. { NOTE: This is much faster than introducing a OptPass2RET routine and
  4293. running RemoveDeadCodeAfterJump for each RET instruction, because
  4294. this optimisation rarely happens and most RETs appear at the end of
  4295. routines where there is nothing that can be stripped. [Kit] }
  4296. if not ThisLabel.is_used then
  4297. RemoveDeadCodeAfterJump(p);
  4298. end;
  4299. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  4300. var
  4301. hp1, hp2, hp3: tai;
  4302. OperIdx: Integer;
  4303. begin
  4304. result:=false;
  4305. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  4306. (taicpu(p).oper[0]^.ref^.index=NR_NO) and (taicpu(p).oper[0]^.ref^.symbol is tasmlabel) then
  4307. begin
  4308. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  4309. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ = ait_instruction) then
  4310. begin
  4311. case taicpu(hp1).opcode of
  4312. A_RET:
  4313. {
  4314. change
  4315. jmp .L1
  4316. ...
  4317. .L1:
  4318. ret
  4319. into
  4320. ret
  4321. }
  4322. begin
  4323. ConvertJumpToRET(p, hp1);
  4324. result:=true;
  4325. end;
  4326. A_MOV:
  4327. {
  4328. change
  4329. jmp .L1
  4330. ...
  4331. .L1:
  4332. mov ##, ##
  4333. ret
  4334. into
  4335. mov ##, ##
  4336. ret
  4337. }
  4338. { This optimisation tends to increase code size if the pass 1 MOV optimisations aren't
  4339. re-run, so only do this particular optimisation if optimising for speed or when
  4340. optimisations are very in-depth. [Kit] }
  4341. if (current_settings.optimizerswitches * [cs_opt_level3, cs_opt_size]) <> [cs_opt_size] then
  4342. begin
  4343. GetNextInstruction(hp1, hp2);
  4344. if not Assigned(hp2) then
  4345. Exit;
  4346. if (hp2.typ in [ait_label, ait_align]) then
  4347. SkipLabels(hp2,hp2);
  4348. if Assigned(hp2) and MatchInstruction(hp2, A_RET, [S_NO]) then
  4349. begin
  4350. { Duplicate the MOV instruction }
  4351. hp3:=tai(hp1.getcopy);
  4352. asml.InsertBefore(hp3, p);
  4353. { Make sure the compiler knows about any final registers written here }
  4354. for OperIdx := 0 to 1 do
  4355. with taicpu(hp3).oper[OperIdx]^ do
  4356. begin
  4357. case typ of
  4358. top_ref:
  4359. begin
  4360. if (ref^.base <> NR_NO) {$ifdef x86_64} and (ref^.base <> NR_RIP) {$endif x86_64} then
  4361. AllocRegBetween(ref^.base, hp3, tai(p.Next), UsedRegs);
  4362. if (ref^.index <> NR_NO) {$ifdef x86_64} and (ref^.index <> NR_RIP) {$endif x86_64} then
  4363. AllocRegBetween(ref^.index, hp3, tai(p.Next), UsedRegs);
  4364. end;
  4365. top_reg:
  4366. AllocRegBetween(reg, hp3, tai(p.Next), UsedRegs);
  4367. else
  4368. ;
  4369. end;
  4370. end;
  4371. { Now change the jump into a RET instruction }
  4372. ConvertJumpToRET(p, hp2);
  4373. result:=true;
  4374. end;
  4375. end;
  4376. else
  4377. ;
  4378. end;
  4379. end;
  4380. end;
  4381. end;
  4382. class function TX86AsmOptimizer.CanBeCMOV(p : tai) : boolean;
  4383. begin
  4384. CanBeCMOV:=assigned(p) and
  4385. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  4386. { we can't use cmov ref,reg because
  4387. ref could be nil and cmov still throws an exception
  4388. if ref=nil but the mov isn't done (FK)
  4389. or ((taicpu(p).oper[0]^.typ = top_ref) and
  4390. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  4391. }
  4392. (taicpu(p).oper[1]^.typ = top_reg) and
  4393. (
  4394. (taicpu(p).oper[0]^.typ = top_reg) or
  4395. { allow references, but only pure symbols or got rel. addressing with RIP as based,
  4396. it is not expected that this can cause a seg. violation }
  4397. (
  4398. (taicpu(p).oper[0]^.typ = top_ref) and
  4399. IsRefSafe(taicpu(p).oper[0]^.ref)
  4400. )
  4401. );
  4402. end;
  4403. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  4404. var
  4405. hp1,hp2,hp3,hp4,hpmov2: tai;
  4406. carryadd_opcode : TAsmOp;
  4407. l : Longint;
  4408. condition : TAsmCond;
  4409. symbol: TAsmSymbol;
  4410. begin
  4411. result:=false;
  4412. symbol:=nil;
  4413. if GetNextInstruction(p,hp1) then
  4414. begin
  4415. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  4416. if (hp1.typ=ait_instruction) and
  4417. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  4418. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  4419. { jb @@1 cmc
  4420. inc/dec operand --> adc/sbb operand,0
  4421. @@1:
  4422. ... and ...
  4423. jnb @@1
  4424. inc/dec operand --> adc/sbb operand,0
  4425. @@1: }
  4426. begin
  4427. carryadd_opcode:=A_NONE;
  4428. if Taicpu(p).condition in [C_NAE,C_B] then
  4429. begin
  4430. if Taicpu(hp1).opcode=A_INC then
  4431. carryadd_opcode:=A_ADC;
  4432. if Taicpu(hp1).opcode=A_DEC then
  4433. carryadd_opcode:=A_SBB;
  4434. if carryadd_opcode<>A_NONE then
  4435. begin
  4436. Taicpu(p).clearop(0);
  4437. Taicpu(p).ops:=0;
  4438. Taicpu(p).is_jmp:=false;
  4439. Taicpu(p).opcode:=A_CMC;
  4440. Taicpu(p).condition:=C_NONE;
  4441. Taicpu(hp1).ops:=2;
  4442. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  4443. Taicpu(hp1).loadconst(0,0);
  4444. Taicpu(hp1).opcode:=carryadd_opcode;
  4445. result:=true;
  4446. exit;
  4447. end;
  4448. end;
  4449. if Taicpu(p).condition in [C_AE,C_NB] then
  4450. begin
  4451. if Taicpu(hp1).opcode=A_INC then
  4452. carryadd_opcode:=A_ADC;
  4453. if Taicpu(hp1).opcode=A_DEC then
  4454. carryadd_opcode:=A_SBB;
  4455. if carryadd_opcode<>A_NONE then
  4456. begin
  4457. asml.remove(p);
  4458. p.free;
  4459. Taicpu(hp1).ops:=2;
  4460. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  4461. Taicpu(hp1).loadconst(0,0);
  4462. Taicpu(hp1).opcode:=carryadd_opcode;
  4463. p:=hp1;
  4464. result:=true;
  4465. exit;
  4466. end;
  4467. end;
  4468. end;
  4469. { Detect the following:
  4470. jmp<cond> @Lbl1
  4471. jmp @Lbl2
  4472. ...
  4473. @Lbl1:
  4474. ret
  4475. Change to:
  4476. jmp<inv_cond> @Lbl2
  4477. ret
  4478. }
  4479. if MatchInstruction(hp1,A_JMP,[]) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  4480. begin
  4481. hp2:=getlabelwithsym(TAsmLabel(symbol));
  4482. if Assigned(hp2) and SkipLabels(hp2,hp2) and
  4483. MatchInstruction(hp2,A_RET,[S_NO]) then
  4484. begin
  4485. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  4486. { Change label address to that of the unconditional jump }
  4487. taicpu(p).loadoper(0, taicpu(hp1).oper[0]^);
  4488. TAsmLabel(symbol).DecRefs;
  4489. taicpu(hp1).opcode := A_RET;
  4490. taicpu(hp1).is_jmp := false;
  4491. taicpu(hp1).ops := taicpu(hp2).ops;
  4492. DebugMsg(SPeepholeOptimization+'JccJmpRet2J!ccRet',p);
  4493. case taicpu(hp2).ops of
  4494. 0:
  4495. taicpu(hp1).clearop(0);
  4496. 1:
  4497. taicpu(hp1).loadconst(0,taicpu(hp2).oper[0]^.val);
  4498. else
  4499. internalerror(2016041302);
  4500. end;
  4501. end;
  4502. end;
  4503. end;
  4504. {$ifndef i8086}
  4505. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  4506. begin
  4507. { check for
  4508. jCC xxx
  4509. <several movs>
  4510. xxx:
  4511. }
  4512. l:=0;
  4513. GetNextInstruction(p, hp1);
  4514. while assigned(hp1) and
  4515. CanBeCMOV(hp1) and
  4516. { stop on labels }
  4517. not(hp1.typ=ait_label) do
  4518. begin
  4519. inc(l);
  4520. GetNextInstruction(hp1,hp1);
  4521. end;
  4522. if assigned(hp1) then
  4523. begin
  4524. if FindLabel(tasmlabel(symbol),hp1) then
  4525. begin
  4526. if (l<=4) and (l>0) then
  4527. begin
  4528. condition:=inverse_cond(taicpu(p).condition);
  4529. GetNextInstruction(p,hp1);
  4530. repeat
  4531. if not Assigned(hp1) then
  4532. InternalError(2018062900);
  4533. taicpu(hp1).opcode:=A_CMOVcc;
  4534. taicpu(hp1).condition:=condition;
  4535. UpdateUsedRegs(hp1);
  4536. GetNextInstruction(hp1,hp1);
  4537. until not(CanBeCMOV(hp1));
  4538. { Remember what hp1 is in case there's multiple aligns to get rid of }
  4539. hp2 := hp1;
  4540. repeat
  4541. if not Assigned(hp2) then
  4542. InternalError(2018062910);
  4543. case hp2.typ of
  4544. ait_label:
  4545. { What we expected - break out of the loop (it won't be a dead label at the top of
  4546. a cluster because that was optimised at an earlier stage) }
  4547. Break;
  4548. ait_align:
  4549. { Go to the next entry until a label is found (may be multiple aligns before it) }
  4550. begin
  4551. hp2 := tai(hp2.Next);
  4552. Continue;
  4553. end;
  4554. else
  4555. begin
  4556. { Might be a comment or temporary allocation entry }
  4557. if not (hp2.typ in SkipInstr) then
  4558. InternalError(2018062911);
  4559. hp2 := tai(hp2.Next);
  4560. Continue;
  4561. end;
  4562. end;
  4563. until False;
  4564. { Now we can safely decrement the reference count }
  4565. tasmlabel(symbol).decrefs;
  4566. DebugMsg(SPeepholeOptimization+'JccMov2CMov',p);
  4567. { Remove the original jump }
  4568. asml.Remove(p);
  4569. p.Free;
  4570. GetNextInstruction(hp2, p); { Instruction after the label }
  4571. { Remove the label if this is its final reference }
  4572. if (tasmlabel(symbol).getrefs=0) then
  4573. StripLabelFast(hp1);
  4574. if Assigned(p) then
  4575. begin
  4576. UpdateUsedRegs(p);
  4577. result:=true;
  4578. end;
  4579. exit;
  4580. end;
  4581. end
  4582. else
  4583. begin
  4584. { check further for
  4585. jCC xxx
  4586. <several movs 1>
  4587. jmp yyy
  4588. xxx:
  4589. <several movs 2>
  4590. yyy:
  4591. }
  4592. { hp2 points to jmp yyy }
  4593. hp2:=hp1;
  4594. { skip hp1 to xxx (or an align right before it) }
  4595. GetNextInstruction(hp1, hp1);
  4596. if assigned(hp2) and
  4597. assigned(hp1) and
  4598. (l<=3) and
  4599. (hp2.typ=ait_instruction) and
  4600. (taicpu(hp2).is_jmp) and
  4601. (taicpu(hp2).condition=C_None) and
  4602. { real label and jump, no further references to the
  4603. label are allowed }
  4604. (tasmlabel(symbol).getrefs=1) and
  4605. FindLabel(tasmlabel(symbol),hp1) then
  4606. begin
  4607. l:=0;
  4608. { skip hp1 to <several moves 2> }
  4609. if (hp1.typ = ait_align) then
  4610. GetNextInstruction(hp1, hp1);
  4611. GetNextInstruction(hp1, hpmov2);
  4612. hp1 := hpmov2;
  4613. while assigned(hp1) and
  4614. CanBeCMOV(hp1) do
  4615. begin
  4616. inc(l);
  4617. GetNextInstruction(hp1, hp1);
  4618. end;
  4619. { hp1 points to yyy (or an align right before it) }
  4620. hp3 := hp1;
  4621. if assigned(hp1) and
  4622. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  4623. begin
  4624. condition:=inverse_cond(taicpu(p).condition);
  4625. GetNextInstruction(p,hp1);
  4626. repeat
  4627. taicpu(hp1).opcode:=A_CMOVcc;
  4628. taicpu(hp1).condition:=condition;
  4629. UpdateUsedRegs(hp1);
  4630. GetNextInstruction(hp1,hp1);
  4631. until not(assigned(hp1)) or
  4632. not(CanBeCMOV(hp1));
  4633. condition:=inverse_cond(condition);
  4634. hp1 := hpmov2;
  4635. { hp1 is now at <several movs 2> }
  4636. while Assigned(hp1) and CanBeCMOV(hp1) do
  4637. begin
  4638. taicpu(hp1).opcode:=A_CMOVcc;
  4639. taicpu(hp1).condition:=condition;
  4640. UpdateUsedRegs(hp1);
  4641. GetNextInstruction(hp1,hp1);
  4642. end;
  4643. hp1 := p;
  4644. { Get first instruction after label }
  4645. GetNextInstruction(hp3, p);
  4646. if assigned(p) and (hp3.typ = ait_align) then
  4647. GetNextInstruction(p, p);
  4648. { Don't dereference yet, as doing so will cause
  4649. GetNextInstruction to skip the label and
  4650. optional align marker. [Kit] }
  4651. GetNextInstruction(hp2, hp4);
  4652. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2CMovCMov',hp1);
  4653. { remove jCC }
  4654. asml.remove(hp1);
  4655. hp1.free;
  4656. { Now we can safely decrement it }
  4657. tasmlabel(symbol).decrefs;
  4658. { Remove label xxx (it will have a ref of zero due to the initial check }
  4659. StripLabelFast(hp4);
  4660. { remove jmp }
  4661. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  4662. asml.remove(hp2);
  4663. hp2.free;
  4664. { As before, now we can safely decrement it }
  4665. tasmlabel(symbol).decrefs;
  4666. { Remove label yyy (and the optional alignment) if its reference falls to zero }
  4667. if tasmlabel(symbol).getrefs = 0 then
  4668. StripLabelFast(hp3);
  4669. if Assigned(p) then
  4670. begin
  4671. UpdateUsedRegs(p);
  4672. result:=true;
  4673. end;
  4674. exit;
  4675. end;
  4676. end;
  4677. end;
  4678. end;
  4679. end;
  4680. {$endif i8086}
  4681. end;
  4682. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  4683. var
  4684. hp1,hp2: tai;
  4685. reg_and_hp1_is_instr: Boolean;
  4686. begin
  4687. result:=false;
  4688. reg_and_hp1_is_instr:=(taicpu(p).oper[1]^.typ = top_reg) and
  4689. GetNextInstruction(p,hp1) and
  4690. (hp1.typ = ait_instruction);
  4691. if reg_and_hp1_is_instr and
  4692. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  4693. GetNextInstruction(hp1,hp2) and
  4694. MatchInstruction(hp2,A_MOV,[]) and
  4695. (taicpu(hp2).oper[0]^.typ = top_reg) and
  4696. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  4697. {$ifdef i386}
  4698. { not all registers have byte size sub registers on i386 }
  4699. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  4700. {$endif i386}
  4701. (((taicpu(hp1).ops=2) and
  4702. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  4703. ((taicpu(hp1).ops=1) and
  4704. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  4705. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  4706. begin
  4707. { change movsX/movzX reg/ref, reg2
  4708. add/sub/or/... reg3/$const, reg2
  4709. mov reg2 reg/ref
  4710. to add/sub/or/... reg3/$const, reg/ref }
  4711. { by example:
  4712. movswl %si,%eax movswl %si,%eax p
  4713. decl %eax addl %edx,%eax hp1
  4714. movw %ax,%si movw %ax,%si hp2
  4715. ->
  4716. movswl %si,%eax movswl %si,%eax p
  4717. decw %eax addw %edx,%eax hp1
  4718. movw %ax,%si movw %ax,%si hp2
  4719. }
  4720. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  4721. {
  4722. ->
  4723. movswl %si,%eax movswl %si,%eax p
  4724. decw %si addw %dx,%si hp1
  4725. movw %ax,%si movw %ax,%si hp2
  4726. }
  4727. case taicpu(hp1).ops of
  4728. 1:
  4729. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  4730. 2:
  4731. begin
  4732. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  4733. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  4734. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  4735. end;
  4736. else
  4737. internalerror(2008042701);
  4738. end;
  4739. {
  4740. ->
  4741. decw %si addw %dx,%si p
  4742. }
  4743. DebugMsg(SPeepholeOptimization + 'var3',p);
  4744. asml.remove(p);
  4745. asml.remove(hp2);
  4746. p.free;
  4747. hp2.free;
  4748. p:=hp1;
  4749. end
  4750. else if taicpu(p).opcode=A_MOVZX then
  4751. begin
  4752. { removes superfluous And's after movzx's }
  4753. if reg_and_hp1_is_instr and
  4754. (taicpu(hp1).opcode = A_AND) and
  4755. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4756. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4757. begin
  4758. case taicpu(p).opsize Of
  4759. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  4760. if (taicpu(hp1).oper[0]^.val = $ff) then
  4761. begin
  4762. DebugMsg(SPeepholeOptimization + 'var4',p);
  4763. asml.remove(hp1);
  4764. hp1.free;
  4765. end;
  4766. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  4767. if (taicpu(hp1).oper[0]^.val = $ffff) then
  4768. begin
  4769. DebugMsg(SPeepholeOptimization + 'var5',p);
  4770. asml.remove(hp1);
  4771. hp1.free;
  4772. end;
  4773. {$ifdef x86_64}
  4774. S_LQ:
  4775. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  4776. begin
  4777. if (cs_asm_source in current_settings.globalswitches) then
  4778. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  4779. asml.remove(hp1);
  4780. hp1.Free;
  4781. end;
  4782. {$endif x86_64}
  4783. else
  4784. ;
  4785. end;
  4786. end;
  4787. { changes some movzx constructs to faster synonyms (all examples
  4788. are given with eax/ax, but are also valid for other registers)}
  4789. if MatchOpType(taicpu(p),top_reg,top_reg) then
  4790. begin
  4791. case taicpu(p).opsize of
  4792. { Technically, movzbw %al,%ax cannot be encoded in 32/64-bit mode
  4793. (the machine code is equivalent to movzbl %al,%eax), but the
  4794. code generator still generates that assembler instruction and
  4795. it is silently converted. This should probably be checked.
  4796. [Kit] }
  4797. S_BW:
  4798. begin
  4799. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  4800. (
  4801. not IsMOVZXAcceptable
  4802. { and $0xff,%ax has a smaller encoding but risks a partial write penalty }
  4803. or (
  4804. (cs_opt_size in current_settings.optimizerswitches) and
  4805. (taicpu(p).oper[1]^.reg = NR_AX)
  4806. )
  4807. ) then
  4808. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  4809. begin
  4810. DebugMsg(SPeepholeOptimization + 'var7',p);
  4811. taicpu(p).opcode := A_AND;
  4812. taicpu(p).changeopsize(S_W);
  4813. taicpu(p).loadConst(0,$ff);
  4814. Result := True;
  4815. end
  4816. else if not IsMOVZXAcceptable and
  4817. GetNextInstruction(p, hp1) and
  4818. (tai(hp1).typ = ait_instruction) and
  4819. (taicpu(hp1).opcode = A_AND) and
  4820. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4821. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4822. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  4823. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  4824. begin
  4825. DebugMsg(SPeepholeOptimization + 'var8',p);
  4826. taicpu(p).opcode := A_MOV;
  4827. taicpu(p).changeopsize(S_W);
  4828. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  4829. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  4830. Result := True;
  4831. end;
  4832. end;
  4833. {$ifndef i8086} { movzbl %al,%eax cannot be encoded in 16-bit mode (the machine code is equivalent to movzbw %al,%ax }
  4834. S_BL:
  4835. begin
  4836. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  4837. (
  4838. not IsMOVZXAcceptable
  4839. { and $0xff,%eax has a smaller encoding but risks a partial write penalty }
  4840. or (
  4841. (cs_opt_size in current_settings.optimizerswitches) and
  4842. (taicpu(p).oper[1]^.reg = NR_EAX)
  4843. )
  4844. ) then
  4845. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  4846. begin
  4847. DebugMsg(SPeepholeOptimization + 'var9',p);
  4848. taicpu(p).opcode := A_AND;
  4849. taicpu(p).changeopsize(S_L);
  4850. taicpu(p).loadConst(0,$ff);
  4851. Result := True;
  4852. end
  4853. else if not IsMOVZXAcceptable and
  4854. GetNextInstruction(p, hp1) and
  4855. (tai(hp1).typ = ait_instruction) and
  4856. (taicpu(hp1).opcode = A_AND) and
  4857. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4858. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4859. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  4860. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  4861. begin
  4862. DebugMsg(SPeepholeOptimization + 'var10',p);
  4863. taicpu(p).opcode := A_MOV;
  4864. taicpu(p).changeopsize(S_L);
  4865. { do not use R_SUBWHOLE
  4866. as movl %rdx,%eax
  4867. is invalid in assembler PM }
  4868. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  4869. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  4870. Result := True;
  4871. end;
  4872. end;
  4873. {$endif i8086}
  4874. S_WL:
  4875. if not IsMOVZXAcceptable then
  4876. begin
  4877. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) then
  4878. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  4879. begin
  4880. DebugMsg(SPeepholeOptimization + 'var11',p);
  4881. taicpu(p).opcode := A_AND;
  4882. taicpu(p).changeopsize(S_L);
  4883. taicpu(p).loadConst(0,$ffff);
  4884. Result := True;
  4885. end
  4886. else if GetNextInstruction(p, hp1) and
  4887. (tai(hp1).typ = ait_instruction) and
  4888. (taicpu(hp1).opcode = A_AND) and
  4889. (taicpu(hp1).oper[0]^.typ = top_const) and
  4890. (taicpu(hp1).oper[1]^.typ = top_reg) and
  4891. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4892. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  4893. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  4894. begin
  4895. DebugMsg(SPeepholeOptimization + 'var12',p);
  4896. taicpu(p).opcode := A_MOV;
  4897. taicpu(p).changeopsize(S_L);
  4898. { do not use R_SUBWHOLE
  4899. as movl %rdx,%eax
  4900. is invalid in assembler PM }
  4901. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  4902. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  4903. Result := True;
  4904. end;
  4905. end;
  4906. else
  4907. InternalError(2017050705);
  4908. end;
  4909. end
  4910. else if not IsMOVZXAcceptable and (taicpu(p).oper[0]^.typ = top_ref) then
  4911. begin
  4912. if GetNextInstruction(p, hp1) and
  4913. (tai(hp1).typ = ait_instruction) and
  4914. (taicpu(hp1).opcode = A_AND) and
  4915. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4916. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4917. begin
  4918. //taicpu(p).opcode := A_MOV;
  4919. case taicpu(p).opsize Of
  4920. S_BL:
  4921. begin
  4922. DebugMsg(SPeepholeOptimization + 'var13',p);
  4923. taicpu(hp1).changeopsize(S_L);
  4924. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  4925. end;
  4926. S_WL:
  4927. begin
  4928. DebugMsg(SPeepholeOptimization + 'var14',p);
  4929. taicpu(hp1).changeopsize(S_L);
  4930. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  4931. end;
  4932. S_BW:
  4933. begin
  4934. DebugMsg(SPeepholeOptimization + 'var15',p);
  4935. taicpu(hp1).changeopsize(S_W);
  4936. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  4937. end;
  4938. else
  4939. Internalerror(2017050704)
  4940. end;
  4941. Result := True;
  4942. end;
  4943. end;
  4944. end;
  4945. end;
  4946. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  4947. var
  4948. hp1 : tai;
  4949. MaskLength : Cardinal;
  4950. begin
  4951. Result:=false;
  4952. if GetNextInstruction(p, hp1) then
  4953. begin
  4954. if MatchOpType(taicpu(p),top_const,top_reg) and
  4955. MatchInstruction(hp1,A_AND,[]) and
  4956. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4957. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4958. { the second register must contain the first one, so compare their subreg types }
  4959. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  4960. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  4961. { change
  4962. and const1, reg
  4963. and const2, reg
  4964. to
  4965. and (const1 and const2), reg
  4966. }
  4967. begin
  4968. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  4969. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  4970. asml.remove(p);
  4971. p.Free;
  4972. p:=hp1;
  4973. Result:=true;
  4974. exit;
  4975. end
  4976. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4977. MatchInstruction(hp1,A_MOVZX,[]) and
  4978. (taicpu(hp1).oper[0]^.typ = top_reg) and
  4979. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  4980. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4981. (((taicpu(p).opsize=S_W) and
  4982. (taicpu(hp1).opsize=S_BW)) or
  4983. ((taicpu(p).opsize=S_L) and
  4984. (taicpu(hp1).opsize in [S_WL,S_BL]))
  4985. {$ifdef x86_64}
  4986. or
  4987. ((taicpu(p).opsize=S_Q) and
  4988. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  4989. {$endif x86_64}
  4990. ) then
  4991. begin
  4992. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  4993. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  4994. ) or
  4995. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  4996. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  4997. then
  4998. begin
  4999. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  5000. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  5001. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  5002. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  5003. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  5004. }
  5005. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  5006. asml.remove(hp1);
  5007. hp1.free;
  5008. Exit;
  5009. end;
  5010. end
  5011. else if MatchOpType(taicpu(p),top_const,top_reg) and
  5012. MatchInstruction(hp1,A_SHL,[]) and
  5013. MatchOpType(taicpu(hp1),top_const,top_reg) and
  5014. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  5015. begin
  5016. {$ifopt R+}
  5017. {$define RANGE_WAS_ON}
  5018. {$R-}
  5019. {$endif}
  5020. { get length of potential and mask }
  5021. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  5022. { really a mask? }
  5023. {$ifdef RANGE_WAS_ON}
  5024. {$R+}
  5025. {$endif}
  5026. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  5027. { unmasked part shifted out? }
  5028. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  5029. begin
  5030. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  5031. { take care of the register (de)allocs following p }
  5032. UpdateUsedRegs(tai(p.next));
  5033. asml.remove(p);
  5034. p.free;
  5035. p:=hp1;
  5036. Result:=true;
  5037. exit;
  5038. end;
  5039. end
  5040. else if MatchOpType(taicpu(p),top_const,top_reg) and
  5041. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  5042. (taicpu(hp1).oper[0]^.typ = top_reg) and
  5043. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  5044. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  5045. (((taicpu(p).opsize=S_W) and
  5046. (taicpu(hp1).opsize=S_BW)) or
  5047. ((taicpu(p).opsize=S_L) and
  5048. (taicpu(hp1).opsize in [S_WL,S_BL]))
  5049. {$ifdef x86_64}
  5050. or
  5051. ((taicpu(p).opsize=S_Q) and
  5052. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  5053. {$endif x86_64}
  5054. ) then
  5055. begin
  5056. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  5057. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  5058. ) or
  5059. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  5060. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  5061. {$ifdef x86_64}
  5062. or
  5063. (((taicpu(hp1).opsize)=S_LQ) and
  5064. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  5065. )
  5066. {$endif x86_64}
  5067. then
  5068. begin
  5069. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  5070. asml.remove(hp1);
  5071. hp1.free;
  5072. Exit;
  5073. end;
  5074. end
  5075. else if (taicpu(p).oper[1]^.typ = top_reg) and
  5076. (hp1.typ = ait_instruction) and
  5077. (taicpu(hp1).is_jmp) and
  5078. (taicpu(hp1).opcode<>A_JMP) and
  5079. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  5080. begin
  5081. { change
  5082. and x, reg
  5083. jxx
  5084. to
  5085. test x, reg
  5086. jxx
  5087. if reg is deallocated before the
  5088. jump, but only if it's a conditional jump (PFV)
  5089. }
  5090. taicpu(p).opcode := A_TEST;
  5091. Exit;
  5092. end;
  5093. end;
  5094. { Lone AND tests }
  5095. if MatchOpType(taicpu(p),top_const,top_reg) then
  5096. begin
  5097. {
  5098. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  5099. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  5100. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  5101. }
  5102. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  5103. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  5104. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  5105. begin
  5106. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  5107. if taicpu(p).opsize = S_L then
  5108. Include(OptsToCheck,aoc_MovAnd2Mov_3);
  5109. end;
  5110. end;
  5111. end;
  5112. function TX86AsmOptimizer.OptPass2Lea(var p : tai) : Boolean;
  5113. begin
  5114. Result:=false;
  5115. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  5116. MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  5117. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  5118. begin
  5119. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  5120. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  5121. taicpu(p).opcode:=A_ADD;
  5122. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  5123. result:=true;
  5124. end
  5125. else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  5126. MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  5127. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  5128. begin
  5129. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  5130. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  5131. taicpu(p).opcode:=A_ADD;
  5132. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  5133. result:=true;
  5134. end;
  5135. end;
  5136. function TX86AsmOptimizer.OptPass2SUB(var p: tai): Boolean;
  5137. var
  5138. hp1: tai; NewRef: TReference;
  5139. begin
  5140. { Change:
  5141. subl/q $x,%reg1
  5142. movl/q %reg1,%reg2
  5143. To:
  5144. leal/q $-x(%reg1),%reg2
  5145. subl/q $x,%reg1
  5146. Breaks the dependency chain and potentially permits the removal of
  5147. a CMP instruction if one follows.
  5148. }
  5149. Result := False;
  5150. if not (cs_opt_size in current_settings.optimizerswitches) and
  5151. (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  5152. MatchOpType(taicpu(p),top_const,top_reg) and
  5153. GetNextInstruction(p, hp1) and
  5154. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  5155. (taicpu(hp1).oper[1]^.typ = top_reg) and
  5156. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) then
  5157. begin
  5158. { Change the MOV instruction to a LEA instruction, and update the
  5159. first operand }
  5160. reference_reset(NewRef, 1, []);
  5161. NewRef.base := taicpu(p).oper[1]^.reg;
  5162. NewRef.scalefactor := 1;
  5163. NewRef.offset := -taicpu(p).oper[0]^.val;
  5164. taicpu(hp1).opcode := A_LEA;
  5165. taicpu(hp1).loadref(0, NewRef);
  5166. { Move what is now the LEA instruction to before the SUB instruction }
  5167. Asml.Remove(hp1);
  5168. Asml.InsertBefore(hp1, p);
  5169. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
  5170. DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
  5171. Result := True;
  5172. end;
  5173. end;
  5174. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  5175. function SkipSimpleInstructions(var hp1 : tai) : Boolean;
  5176. begin
  5177. { we can skip all instructions not messing with the stack pointer }
  5178. while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
  5179. A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
  5180. A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
  5181. A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
  5182. ({(taicpu(hp1).ops=0) or }
  5183. ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
  5184. (MatchOpType(taicpu(hp1),top_ref,top_reg))
  5185. ) and }
  5186. not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
  5187. )
  5188. ) do
  5189. GetNextInstruction(hp1,hp1);
  5190. Result:=assigned(hp1);
  5191. end;
  5192. var
  5193. hp1, hp2, hp3: tai;
  5194. begin
  5195. Result:=false;
  5196. { replace
  5197. leal(q) x(<stackpointer>),<stackpointer>
  5198. call procname
  5199. leal(q) -x(<stackpointer>),<stackpointer>
  5200. ret
  5201. by
  5202. jmp procname
  5203. but do it only on level 4 because it destroys stack back traces
  5204. }
  5205. if (cs_opt_level4 in current_settings.optimizerswitches) and
  5206. MatchOpType(taicpu(p),top_ref,top_reg) and
  5207. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  5208. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  5209. { the -8 or -24 are not required, but bail out early if possible,
  5210. higher values are unlikely }
  5211. ((taicpu(p).oper[0]^.ref^.offset=-8) or
  5212. (taicpu(p).oper[0]^.ref^.offset=-24)) and
  5213. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  5214. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  5215. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  5216. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  5217. GetNextInstruction(p, hp1) and
  5218. { trick to skip label }
  5219. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  5220. SkipSimpleInstructions(hp1) and
  5221. MatchInstruction(hp1,A_CALL,[S_NO]) and
  5222. GetNextInstruction(hp1, hp2) and
  5223. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  5224. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  5225. (taicpu(hp2).oper[0]^.ref^.offset=-taicpu(p).oper[0]^.ref^.offset) and
  5226. (taicpu(hp2).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  5227. (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
  5228. (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
  5229. (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
  5230. (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
  5231. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  5232. GetNextInstruction(hp2, hp3) and
  5233. { trick to skip label }
  5234. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  5235. MatchInstruction(hp3,A_RET,[S_NO]) and
  5236. (taicpu(hp3).ops=0) then
  5237. begin
  5238. taicpu(hp1).opcode := A_JMP;
  5239. taicpu(hp1).is_jmp := true;
  5240. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  5241. RemoveCurrentP(p);
  5242. AsmL.Remove(hp2);
  5243. hp2.free;
  5244. AsmL.Remove(hp3);
  5245. hp3.free;
  5246. Result:=true;
  5247. end;
  5248. end;
  5249. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  5250. var
  5251. Value, RegName: string;
  5252. begin
  5253. Result:=false;
  5254. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  5255. begin
  5256. case taicpu(p).oper[0]^.val of
  5257. 0:
  5258. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  5259. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  5260. begin
  5261. { change "mov $0,%reg" into "xor %reg,%reg" }
  5262. taicpu(p).opcode := A_XOR;
  5263. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  5264. Result := True;
  5265. end;
  5266. $1..$FFFFFFFF:
  5267. begin
  5268. { Code size reduction by J. Gareth "Kit" Moreton }
  5269. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  5270. case taicpu(p).opsize of
  5271. S_Q:
  5272. begin
  5273. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  5274. Value := debug_tostr(taicpu(p).oper[0]^.val);
  5275. { The actual optimization }
  5276. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  5277. taicpu(p).changeopsize(S_L);
  5278. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  5279. Result := True;
  5280. end;
  5281. else
  5282. { Do nothing };
  5283. end;
  5284. end;
  5285. -1:
  5286. { Don't make this optimisation if the CPU flags are required, since OR scrambles them }
  5287. if (cs_opt_size in current_settings.optimizerswitches) and
  5288. (taicpu(p).opsize <> S_B) and
  5289. not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  5290. begin
  5291. { change "mov $-1,%reg" into "or $-1,%reg" }
  5292. { NOTES:
  5293. - No size saving is made when changing a Word-sized assignment unless the register is AX (smaller encoding)
  5294. - This operation creates a false dependency on the register, so only do it when optimising for size
  5295. - It is possible to set memory operands using this method, but this creates an even greater false dependency, so don't do this at all
  5296. }
  5297. taicpu(p).opcode := A_OR;
  5298. Result := True;
  5299. end;
  5300. end;
  5301. end;
  5302. end;
  5303. function TX86AsmOptimizer.PostPeepholeOptMOVSX(var p : tai) : boolean;
  5304. begin
  5305. Result := False;
  5306. if not MatchOpType(taicpu(p), top_reg, top_reg) then
  5307. Exit;
  5308. { Convert:
  5309. movswl %ax,%eax -> cwtl
  5310. movslq %eax,%rax -> cdqe
  5311. NOTE: Don't convert movswl %al,%ax to cbw, because cbw and cwde
  5312. refer to the same opcode and depends only on the assembler's
  5313. current operand-size attribute. [Kit]
  5314. }
  5315. with taicpu(p) do
  5316. case opsize of
  5317. S_WL:
  5318. if (oper[0]^.reg = NR_AX) and (oper[1]^.reg = NR_EAX) then
  5319. begin
  5320. DebugMsg(SPeepholeOptimization + 'Converted movswl %ax,%eax to cwtl', p);
  5321. opcode := A_CWDE;
  5322. clearop(0);
  5323. clearop(1);
  5324. ops := 0;
  5325. Result := True;
  5326. end;
  5327. {$ifdef x86_64}
  5328. S_LQ:
  5329. if (oper[0]^.reg = NR_EAX) and (oper[1]^.reg = NR_RAX) then
  5330. begin
  5331. DebugMsg(SPeepholeOptimization + 'Converted movslq %eax,%rax to cltq', p);
  5332. opcode := A_CDQE;
  5333. clearop(0);
  5334. clearop(1);
  5335. ops := 0;
  5336. Result := True;
  5337. end;
  5338. {$endif x86_64}
  5339. else
  5340. ;
  5341. end;
  5342. end;
  5343. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  5344. begin
  5345. Result:=false;
  5346. { change "cmp $0, %reg" to "test %reg, %reg" }
  5347. if MatchOpType(taicpu(p),top_const,top_reg) and
  5348. (taicpu(p).oper[0]^.val = 0) then
  5349. begin
  5350. taicpu(p).opcode := A_TEST;
  5351. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  5352. Result:=true;
  5353. end;
  5354. end;
  5355. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  5356. var
  5357. IsTestConstX : Boolean;
  5358. hp1,hp2 : tai;
  5359. begin
  5360. Result:=false;
  5361. { removes the line marked with (x) from the sequence
  5362. and/or/xor/add/sub/... $x, %y
  5363. test/or %y, %y | test $-1, %y (x)
  5364. j(n)z _Label
  5365. as the first instruction already adjusts the ZF
  5366. %y operand may also be a reference }
  5367. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  5368. MatchOperand(taicpu(p).oper[0]^,-1);
  5369. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  5370. GetLastInstruction(p, hp1) and
  5371. (tai(hp1).typ = ait_instruction) and
  5372. GetNextInstruction(p,hp2) and
  5373. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  5374. case taicpu(hp1).opcode Of
  5375. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  5376. begin
  5377. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  5378. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  5379. { and in case of carry for A(E)/B(E)/C/NC }
  5380. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  5381. ((taicpu(hp1).opcode <> A_ADD) and
  5382. (taicpu(hp1).opcode <> A_SUB))) then
  5383. begin
  5384. hp1 := tai(p.next);
  5385. asml.remove(p);
  5386. p.free;
  5387. p := tai(hp1);
  5388. Result:=true;
  5389. end;
  5390. end;
  5391. A_SHL, A_SAL, A_SHR, A_SAR:
  5392. begin
  5393. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  5394. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  5395. { therefore, it's only safe to do this optimization for }
  5396. { shifts by a (nonzero) constant }
  5397. (taicpu(hp1).oper[0]^.typ = top_const) and
  5398. (taicpu(hp1).oper[0]^.val <> 0) and
  5399. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  5400. { and in case of carry for A(E)/B(E)/C/NC }
  5401. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  5402. begin
  5403. hp1 := tai(p.next);
  5404. asml.remove(p);
  5405. p.free;
  5406. p := tai(hp1);
  5407. Result:=true;
  5408. end;
  5409. end;
  5410. A_DEC, A_INC, A_NEG:
  5411. begin
  5412. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  5413. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  5414. { and in case of carry for A(E)/B(E)/C/NC }
  5415. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  5416. begin
  5417. case taicpu(hp1).opcode of
  5418. A_DEC, A_INC:
  5419. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  5420. begin
  5421. case taicpu(hp1).opcode Of
  5422. A_DEC: taicpu(hp1).opcode := A_SUB;
  5423. A_INC: taicpu(hp1).opcode := A_ADD;
  5424. else
  5425. ;
  5426. end;
  5427. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  5428. taicpu(hp1).loadConst(0,1);
  5429. taicpu(hp1).ops:=2;
  5430. end;
  5431. else
  5432. ;
  5433. end;
  5434. hp1 := tai(p.next);
  5435. asml.remove(p);
  5436. p.free;
  5437. p := tai(hp1);
  5438. Result:=true;
  5439. end;
  5440. end
  5441. else
  5442. { change "test $-1,%reg" into "test %reg,%reg" }
  5443. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  5444. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  5445. end { case }
  5446. { change "test $-1,%reg" into "test %reg,%reg" }
  5447. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  5448. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  5449. end;
  5450. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  5451. var
  5452. hp1 : tai;
  5453. {$ifndef x86_64}
  5454. hp2 : taicpu;
  5455. {$endif x86_64}
  5456. begin
  5457. Result:=false;
  5458. {$ifndef x86_64}
  5459. { don't do this on modern CPUs, this really hurts them due to
  5460. broken call/ret pairing }
  5461. if (current_settings.optimizecputype < cpu_Pentium2) and
  5462. not(cs_create_pic in current_settings.moduleswitches) and
  5463. GetNextInstruction(p, hp1) and
  5464. MatchInstruction(hp1,A_JMP,[S_NO]) and
  5465. MatchOpType(taicpu(hp1),top_ref) and
  5466. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  5467. begin
  5468. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  5469. InsertLLItem(p.previous, p, hp2);
  5470. taicpu(p).opcode := A_JMP;
  5471. taicpu(p).is_jmp := true;
  5472. asml.remove(hp1);
  5473. hp1.free;
  5474. Result:=true;
  5475. end
  5476. else
  5477. {$endif x86_64}
  5478. { replace
  5479. call procname
  5480. ret
  5481. by
  5482. jmp procname
  5483. but do it only on level 4 because it destroys stack back traces
  5484. else if the subroutine is marked as no return, remove the ret
  5485. }
  5486. if ((cs_opt_level4 in current_settings.optimizerswitches) or
  5487. (po_noreturn in current_procinfo.procdef.procoptions)) and
  5488. GetNextInstruction(p, hp1) and
  5489. MatchInstruction(hp1,A_RET,[S_NO]) and
  5490. (taicpu(hp1).ops=0) then
  5491. begin
  5492. if cs_opt_level4 in current_settings.optimizerswitches then
  5493. begin
  5494. taicpu(p).opcode := A_JMP;
  5495. taicpu(p).is_jmp := true;
  5496. DebugMsg(SPeepholeOptimization + 'CallRet2Jmp done',p);
  5497. end
  5498. else
  5499. DebugMsg(SPeepholeOptimization + 'CallRet2Call done',p);
  5500. asml.remove(hp1);
  5501. hp1.free;
  5502. Result:=true;
  5503. end;
  5504. end;
  5505. {$ifdef x86_64}
  5506. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  5507. var
  5508. PreMessage: string;
  5509. begin
  5510. Result := False;
  5511. { Code size reduction by J. Gareth "Kit" Moreton }
  5512. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  5513. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  5514. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  5515. then
  5516. begin
  5517. { Has 64-bit register name and opcode suffix }
  5518. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  5519. { The actual optimization }
  5520. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  5521. if taicpu(p).opsize = S_BQ then
  5522. taicpu(p).changeopsize(S_BL)
  5523. else
  5524. taicpu(p).changeopsize(S_WL);
  5525. DebugMsg(SPeepholeOptimization + PreMessage +
  5526. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  5527. end;
  5528. end;
  5529. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  5530. var
  5531. PreMessage, RegName: string;
  5532. begin
  5533. { Code size reduction by J. Gareth "Kit" Moreton }
  5534. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  5535. as this removes the REX prefix }
  5536. Result := False;
  5537. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  5538. Exit;
  5539. if taicpu(p).oper[0]^.typ <> top_reg then
  5540. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  5541. InternalError(2018011500);
  5542. case taicpu(p).opsize of
  5543. S_Q:
  5544. begin
  5545. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  5546. begin
  5547. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  5548. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  5549. { The actual optimization }
  5550. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  5551. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  5552. taicpu(p).changeopsize(S_L);
  5553. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  5554. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  5555. end;
  5556. end;
  5557. else
  5558. ;
  5559. end;
  5560. end;
  5561. {$endif}
  5562. class procedure TX86AsmOptimizer.OptimizeRefs(var p: taicpu);
  5563. var
  5564. OperIdx: Integer;
  5565. begin
  5566. for OperIdx := 0 to p.ops - 1 do
  5567. if p.oper[OperIdx]^.typ = top_ref then
  5568. optimize_ref(p.oper[OperIdx]^.ref^, False);
  5569. end;
  5570. end.