aoptx86.pas 444 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TOptsToCheck = (
  29. aoc_MovAnd2Mov_3
  30. );
  31. TX86AsmOptimizer = class(TAsmOptimizer)
  32. { some optimizations are very expensive to check, so the
  33. pre opt pass can be used to set some flags, depending on the found
  34. instructions if it is worth to check a certain optimization }
  35. OptsToCheck : set of TOptsToCheck;
  36. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  37. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  38. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  39. function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  40. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  41. { This version of GetNextInstructionUsingReg will look across conditional jumps,
  42. potentially allowing further optimisation (although it might need to know if
  43. it crossed a conditional jump. }
  44. function GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var CrossJump: Boolean): Boolean;
  45. {
  46. In comparison with GetNextInstructionUsingReg, GetNextInstructionUsingRegTrackingUse tracks
  47. the use of a register by allocs/dealloc, so it can ignore calls.
  48. In the following example, GetNextInstructionUsingReg will return the second movq,
  49. GetNextInstructionUsingRegTrackingUse won't.
  50. movq %rdi,%rax
  51. # Register rdi released
  52. # Register rdi allocated
  53. movq %rax,%rdi
  54. While in this example:
  55. movq %rdi,%rax
  56. call proc
  57. movq %rdi,%rax
  58. GetNextInstructionUsingRegTrackingUse will return the second instruction while GetNextInstructionUsingReg
  59. won't.
  60. }
  61. function GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
  62. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  63. private
  64. function SkipSimpleInstructions(var hp1: tai): Boolean;
  65. protected
  66. class function IsMOVZXAcceptable: Boolean; static; inline;
  67. { Attempts to allocate a volatile integer register for use between p and hp,
  68. using AUsedRegs for the current register usage information. Returns NR_NO
  69. if no free register could be found }
  70. function GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  71. { Attempts to allocate a volatile MM register for use between p and hp,
  72. using AUsedRegs for the current register usage information. Returns NR_NO
  73. if no free register could be found }
  74. function GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  75. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  76. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  77. { checks whether reading the value in reg1 depends on the value of reg2. This
  78. is very similar to SuperRegisterEquals, except it takes into account that
  79. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  80. depend on the value in AH). }
  81. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  82. { Replaces all references to AOldReg in a memory reference to ANewReg }
  83. class function ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean; static;
  84. { Replaces all references to AOldReg in an operand to ANewReg }
  85. class function ReplaceRegisterInOper(const p: taicpu; const OperIdx: Integer; const AOldReg, ANewReg: TRegister): Boolean; static;
  86. { Replaces all references to AOldReg in an instruction to ANewReg,
  87. except where the register is being written }
  88. function ReplaceRegisterInInstruction(const p: taicpu; const AOldReg, ANewReg: TRegister): Boolean;
  89. { Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents),
  90. or writes to a global symbol }
  91. class function IsRefSafe(const ref: PReference): Boolean; static; inline;
  92. { Returns true if the given MOV instruction can be safely converted to CMOV }
  93. class function CanBeCMOV(p : tai) : boolean; static;
  94. { Converts the LEA instruction to ADD/INC/SUB/DEC. Returns True if the
  95. conversion was successful }
  96. function ConvertLEA(const p : taicpu): Boolean;
  97. function DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
  98. procedure DebugMsg(const s : string; p : tai);inline;
  99. class function IsExitCode(p : tai) : boolean; static;
  100. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean; static;
  101. procedure RemoveLastDeallocForFuncRes(p : tai);
  102. function DoSubAddOpt(var p : tai) : Boolean;
  103. function PrePeepholeOptSxx(var p : tai) : boolean;
  104. function PrePeepholeOptIMUL(var p : tai) : boolean;
  105. function PrePeepholeOptAND(var p : tai) : boolean;
  106. function OptPass1Test(var p: tai): boolean;
  107. function OptPass1Add(var p: tai): boolean;
  108. function OptPass1AND(var p : tai) : boolean;
  109. function OptPass1_V_MOVAP(var p : tai) : boolean;
  110. function OptPass1VOP(var p : tai) : boolean;
  111. function OptPass1MOV(var p : tai) : boolean;
  112. function OptPass1Movx(var p : tai) : boolean;
  113. function OptPass1MOVXX(var p : tai) : boolean;
  114. function OptPass1OP(var p : tai) : boolean;
  115. function OptPass1LEA(var p : tai) : boolean;
  116. function OptPass1Sub(var p : tai) : boolean;
  117. function OptPass1SHLSAL(var p : tai) : boolean;
  118. function OptPass1FSTP(var p : tai) : boolean;
  119. function OptPass1FLD(var p : tai) : boolean;
  120. function OptPass1Cmp(var p : tai) : boolean;
  121. function OptPass1PXor(var p : tai) : boolean;
  122. function OptPass1VPXor(var p: tai): boolean;
  123. function OptPass1Imul(var p : tai) : boolean;
  124. function OptPass1Jcc(var p : tai) : boolean;
  125. function OptPass1SHXX(var p: tai): boolean;
  126. function OptPass2Movx(var p : tai): Boolean;
  127. function OptPass2MOV(var p : tai) : boolean;
  128. function OptPass2Imul(var p : tai) : boolean;
  129. function OptPass2Jmp(var p : tai) : boolean;
  130. function OptPass2Jcc(var p : tai) : boolean;
  131. function OptPass2Lea(var p: tai): Boolean;
  132. function OptPass2SUB(var p: tai): Boolean;
  133. function OptPass2ADD(var p : tai): Boolean;
  134. function OptPass2SETcc(var p : tai) : boolean;
  135. function CheckMemoryWrite(var first_mov, second_mov: taicpu): Boolean;
  136. function PostPeepholeOptMov(var p : tai) : Boolean;
  137. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  138. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  139. function PostPeepholeOptXor(var p : tai) : Boolean;
  140. {$endif}
  141. function PostPeepholeOptAnd(var p : tai) : boolean;
  142. function PostPeepholeOptMOVSX(var p : tai) : boolean;
  143. function PostPeepholeOptCmp(var p : tai) : Boolean;
  144. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  145. function PostPeepholeOptCall(var p : tai) : Boolean;
  146. function PostPeepholeOptLea(var p : tai) : Boolean;
  147. function PostPeepholeOptPush(var p: tai): Boolean;
  148. function PostPeepholeOptShr(var p : tai) : boolean;
  149. procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
  150. function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
  151. procedure SwapMovCmp(var p, hp1: tai);
  152. { Processor-dependent reference optimisation }
  153. class procedure OptimizeRefs(var p: taicpu); static;
  154. end;
  155. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  156. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  157. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  158. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  159. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  160. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  161. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  162. {$if max_operands>2}
  163. function MatchOperand(const oper1: TOper; const oper2: TOper; const oper3: TOper): boolean;
  164. {$endif max_operands>2}
  165. function RefsEqual(const r1, r2: treference): boolean;
  166. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  167. { returns true, if ref is a reference using only the registers passed as base and index
  168. and having an offset }
  169. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  170. implementation
  171. uses
  172. cutils,verbose,
  173. systems,
  174. globals,
  175. cpuinfo,
  176. procinfo,
  177. paramgr,
  178. aasmbase,
  179. aoptbase,aoptutils,
  180. symconst,symsym,
  181. cgx86,
  182. itcpugas;
  183. {$ifdef DEBUG_AOPTCPU}
  184. const
  185. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  186. {$else DEBUG_AOPTCPU}
  187. { Empty strings help the optimizer to remove string concatenations that won't
  188. ever appear to the user on release builds. [Kit] }
  189. const
  190. SPeepholeOptimization = '';
  191. {$endif DEBUG_AOPTCPU}
  192. LIST_STEP_SIZE = 4;
  193. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  194. begin
  195. result :=
  196. (instr.typ = ait_instruction) and
  197. (taicpu(instr).opcode = op) and
  198. ((opsize = []) or (taicpu(instr).opsize in opsize));
  199. end;
  200. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  201. begin
  202. result :=
  203. (instr.typ = ait_instruction) and
  204. ((taicpu(instr).opcode = op1) or
  205. (taicpu(instr).opcode = op2)
  206. ) and
  207. ((opsize = []) or (taicpu(instr).opsize in opsize));
  208. end;
  209. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  210. begin
  211. result :=
  212. (instr.typ = ait_instruction) and
  213. ((taicpu(instr).opcode = op1) or
  214. (taicpu(instr).opcode = op2) or
  215. (taicpu(instr).opcode = op3)
  216. ) and
  217. ((opsize = []) or (taicpu(instr).opsize in opsize));
  218. end;
  219. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  220. const opsize : topsizes) : boolean;
  221. var
  222. op : TAsmOp;
  223. begin
  224. result:=false;
  225. for op in ops do
  226. begin
  227. if (instr.typ = ait_instruction) and
  228. (taicpu(instr).opcode = op) and
  229. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  230. begin
  231. result:=true;
  232. exit;
  233. end;
  234. end;
  235. end;
  236. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  237. begin
  238. result := (oper.typ = top_reg) and (oper.reg = reg);
  239. end;
  240. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  241. begin
  242. result := (oper.typ = top_const) and (oper.val = a);
  243. end;
  244. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  245. begin
  246. result := oper1.typ = oper2.typ;
  247. if result then
  248. case oper1.typ of
  249. top_const:
  250. Result:=oper1.val = oper2.val;
  251. top_reg:
  252. Result:=oper1.reg = oper2.reg;
  253. top_ref:
  254. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  255. else
  256. internalerror(2013102801);
  257. end
  258. end;
  259. function MatchOperand(const oper1: TOper; const oper2: TOper; const oper3: TOper): boolean;
  260. begin
  261. result := (oper1.typ = oper2.typ) and (oper1.typ = oper3.typ);
  262. if result then
  263. case oper1.typ of
  264. top_const:
  265. Result:=(oper1.val = oper2.val) and (oper1.val = oper3.val);
  266. top_reg:
  267. Result:=(oper1.reg = oper2.reg) and (oper1.reg = oper3.reg);
  268. top_ref:
  269. Result:=RefsEqual(oper1.ref^, oper2.ref^) and RefsEqual(oper1.ref^, oper3.ref^);
  270. else
  271. internalerror(2020052401);
  272. end
  273. end;
  274. function RefsEqual(const r1, r2: treference): boolean;
  275. begin
  276. RefsEqual :=
  277. (r1.offset = r2.offset) and
  278. (r1.segment = r2.segment) and (r1.base = r2.base) and
  279. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  280. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  281. (r1.relsymbol = r2.relsymbol) and
  282. (r1.volatility=[]) and
  283. (r2.volatility=[]);
  284. end;
  285. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  286. begin
  287. Result:=(ref.offset=0) and
  288. (ref.scalefactor in [0,1]) and
  289. (ref.segment=NR_NO) and
  290. (ref.symbol=nil) and
  291. (ref.relsymbol=nil) and
  292. ((base=NR_INVALID) or
  293. (ref.base=base)) and
  294. ((index=NR_INVALID) or
  295. (ref.index=index)) and
  296. (ref.volatility=[]);
  297. end;
  298. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  299. begin
  300. Result:=(ref.scalefactor in [0,1]) and
  301. (ref.segment=NR_NO) and
  302. (ref.symbol=nil) and
  303. (ref.relsymbol=nil) and
  304. ((base=NR_INVALID) or
  305. (ref.base=base)) and
  306. ((index=NR_INVALID) or
  307. (ref.index=index)) and
  308. (ref.volatility=[]);
  309. end;
  310. function InstrReadsFlags(p: tai): boolean;
  311. begin
  312. InstrReadsFlags := true;
  313. case p.typ of
  314. ait_instruction:
  315. if InsProp[taicpu(p).opcode].Ch*
  316. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  317. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  318. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  319. exit;
  320. ait_label:
  321. exit;
  322. else
  323. ;
  324. end;
  325. InstrReadsFlags := false;
  326. end;
  327. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  328. begin
  329. Next:=Current;
  330. repeat
  331. Result:=GetNextInstruction(Next,Next);
  332. until not (Result) or
  333. not(cs_opt_level3 in current_settings.optimizerswitches) or
  334. (Next.typ<>ait_instruction) or
  335. RegInInstruction(reg,Next) or
  336. is_calljmp(taicpu(Next).opcode);
  337. end;
  338. function TX86AsmOptimizer.GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var CrossJump: Boolean): Boolean;
  339. begin
  340. { Note, CrossJump keeps its input value if a conditional jump is not found - it doesn't get set to False }
  341. Next := Current;
  342. repeat
  343. Result := GetNextInstruction(Next,Next);
  344. if Result and (Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) then
  345. if is_calljmpuncondret(taicpu(Next).opcode) then
  346. begin
  347. Result := False;
  348. Exit;
  349. end
  350. else
  351. CrossJump := True;
  352. until not Result or
  353. not (cs_opt_level3 in current_settings.optimizerswitches) or
  354. (Next.typ <> ait_instruction) or
  355. RegInInstruction(reg,Next);
  356. end;
  357. function TX86AsmOptimizer.GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
  358. begin
  359. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  360. begin
  361. Result:=GetNextInstruction(Current,Next);
  362. exit;
  363. end;
  364. Next:=tai(Current.Next);
  365. Result:=false;
  366. while assigned(Next) do
  367. begin
  368. if ((Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) and not(taicpu(Next).opcode=A_CALL)) or
  369. ((Next.typ=ait_regalloc) and (getsupreg(tai_regalloc(Next).reg)=getsupreg(reg))) or
  370. ((Next.typ=ait_label) and not(labelCanBeSkipped(Tai_Label(Next)))) then
  371. exit
  372. else if (Next.typ=ait_instruction) and RegInInstruction(reg,Next) and not(taicpu(Next).opcode=A_CALL) then
  373. begin
  374. Result:=true;
  375. exit;
  376. end;
  377. Next:=tai(Next.Next);
  378. end;
  379. end;
  380. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  381. begin
  382. Result:=RegReadByInstruction(reg,hp);
  383. end;
  384. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  385. var
  386. p: taicpu;
  387. opcount: longint;
  388. begin
  389. RegReadByInstruction := false;
  390. if hp.typ <> ait_instruction then
  391. exit;
  392. p := taicpu(hp);
  393. case p.opcode of
  394. A_CALL:
  395. regreadbyinstruction := true;
  396. A_IMUL:
  397. case p.ops of
  398. 1:
  399. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  400. (
  401. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  402. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  403. );
  404. 2,3:
  405. regReadByInstruction :=
  406. reginop(reg,p.oper[0]^) or
  407. reginop(reg,p.oper[1]^);
  408. else
  409. InternalError(2019112801);
  410. end;
  411. A_MUL:
  412. begin
  413. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  414. (
  415. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  416. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  417. );
  418. end;
  419. A_IDIV,A_DIV:
  420. begin
  421. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  422. (
  423. (getregtype(reg)=R_INTREGISTER) and
  424. (
  425. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  426. )
  427. );
  428. end;
  429. else
  430. begin
  431. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  432. begin
  433. RegReadByInstruction := false;
  434. exit;
  435. end;
  436. for opcount := 0 to p.ops-1 do
  437. if (p.oper[opCount]^.typ = top_ref) and
  438. RegInRef(reg,p.oper[opcount]^.ref^) then
  439. begin
  440. RegReadByInstruction := true;
  441. exit
  442. end;
  443. { special handling for SSE MOVSD }
  444. if (p.opcode=A_MOVSD) and (p.ops>0) then
  445. begin
  446. if p.ops<>2 then
  447. internalerror(2017042702);
  448. regReadByInstruction := reginop(reg,p.oper[0]^) or
  449. (
  450. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  451. );
  452. exit;
  453. end;
  454. with insprop[p.opcode] do
  455. begin
  456. if getregtype(reg)=R_INTREGISTER then
  457. begin
  458. case getsupreg(reg) of
  459. RS_EAX:
  460. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  461. begin
  462. RegReadByInstruction := true;
  463. exit
  464. end;
  465. RS_ECX:
  466. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  467. begin
  468. RegReadByInstruction := true;
  469. exit
  470. end;
  471. RS_EDX:
  472. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  473. begin
  474. RegReadByInstruction := true;
  475. exit
  476. end;
  477. RS_EBX:
  478. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  479. begin
  480. RegReadByInstruction := true;
  481. exit
  482. end;
  483. RS_ESP:
  484. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  485. begin
  486. RegReadByInstruction := true;
  487. exit
  488. end;
  489. RS_EBP:
  490. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  491. begin
  492. RegReadByInstruction := true;
  493. exit
  494. end;
  495. RS_ESI:
  496. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  497. begin
  498. RegReadByInstruction := true;
  499. exit
  500. end;
  501. RS_EDI:
  502. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  503. begin
  504. RegReadByInstruction := true;
  505. exit
  506. end;
  507. end;
  508. end;
  509. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  510. begin
  511. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  512. begin
  513. case p.condition of
  514. C_A,C_NBE, { CF=0 and ZF=0 }
  515. C_BE,C_NA: { CF=1 or ZF=1 }
  516. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  517. C_AE,C_NB,C_NC, { CF=0 }
  518. C_B,C_NAE,C_C: { CF=1 }
  519. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  520. C_NE,C_NZ, { ZF=0 }
  521. C_E,C_Z: { ZF=1 }
  522. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  523. C_G,C_NLE, { ZF=0 and SF=OF }
  524. C_LE,C_NG: { ZF=1 or SF<>OF }
  525. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  526. C_GE,C_NL, { SF=OF }
  527. C_L,C_NGE: { SF<>OF }
  528. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  529. C_NO, { OF=0 }
  530. C_O: { OF=1 }
  531. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  532. C_NP,C_PO, { PF=0 }
  533. C_P,C_PE: { PF=1 }
  534. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  535. C_NS, { SF=0 }
  536. C_S: { SF=1 }
  537. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  538. else
  539. internalerror(2017042701);
  540. end;
  541. if RegReadByInstruction then
  542. exit;
  543. end;
  544. case getsubreg(reg) of
  545. R_SUBW,R_SUBD,R_SUBQ:
  546. RegReadByInstruction :=
  547. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  548. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  549. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  550. R_SUBFLAGCARRY:
  551. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  552. R_SUBFLAGPARITY:
  553. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  554. R_SUBFLAGAUXILIARY:
  555. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  556. R_SUBFLAGZERO:
  557. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  558. R_SUBFLAGSIGN:
  559. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  560. R_SUBFLAGOVERFLOW:
  561. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  562. R_SUBFLAGINTERRUPT:
  563. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  564. R_SUBFLAGDIRECTION:
  565. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  566. else
  567. internalerror(2017042601);
  568. end;
  569. exit;
  570. end;
  571. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  572. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  573. (p.oper[0]^.reg=p.oper[1]^.reg) then
  574. exit;
  575. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  576. begin
  577. RegReadByInstruction := true;
  578. exit
  579. end;
  580. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  581. begin
  582. RegReadByInstruction := true;
  583. exit
  584. end;
  585. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  586. begin
  587. RegReadByInstruction := true;
  588. exit
  589. end;
  590. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  591. begin
  592. RegReadByInstruction := true;
  593. exit
  594. end;
  595. end;
  596. end;
  597. end;
  598. end;
  599. function TX86AsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  600. begin
  601. result:=false;
  602. if p1.typ<>ait_instruction then
  603. exit;
  604. if (Ch_All in insprop[taicpu(p1).opcode].Ch) then
  605. exit(true);
  606. if (getregtype(reg)=R_INTREGISTER) and
  607. { change information for xmm movsd are not correct }
  608. ((taicpu(p1).opcode<>A_MOVSD) or (taicpu(p1).ops=0)) then
  609. begin
  610. case getsupreg(reg) of
  611. { RS_EAX = RS_RAX on x86-64 }
  612. RS_EAX:
  613. result:=([Ch_REAX,Ch_RRAX,Ch_WEAX,Ch_WRAX,Ch_RWEAX,Ch_RWRAX,Ch_MEAX,Ch_MRAX]*insprop[taicpu(p1).opcode].Ch)<>[];
  614. RS_ECX:
  615. result:=([Ch_RECX,Ch_RRCX,Ch_WECX,Ch_WRCX,Ch_RWECX,Ch_RWRCX,Ch_MECX,Ch_MRCX]*insprop[taicpu(p1).opcode].Ch)<>[];
  616. RS_EDX:
  617. result:=([Ch_REDX,Ch_RRDX,Ch_WEDX,Ch_WRDX,Ch_RWEDX,Ch_RWRDX,Ch_MEDX,Ch_MRDX]*insprop[taicpu(p1).opcode].Ch)<>[];
  618. RS_EBX:
  619. result:=([Ch_REBX,Ch_RRBX,Ch_WEBX,Ch_WRBX,Ch_RWEBX,Ch_RWRBX,Ch_MEBX,Ch_MRBX]*insprop[taicpu(p1).opcode].Ch)<>[];
  620. RS_ESP:
  621. result:=([Ch_RESP,Ch_RRSP,Ch_WESP,Ch_WRSP,Ch_RWESP,Ch_RWRSP,Ch_MESP,Ch_MRSP]*insprop[taicpu(p1).opcode].Ch)<>[];
  622. RS_EBP:
  623. result:=([Ch_REBP,Ch_RRBP,Ch_WEBP,Ch_WRBP,Ch_RWEBP,Ch_RWRBP,Ch_MEBP,Ch_MRBP]*insprop[taicpu(p1).opcode].Ch)<>[];
  624. RS_ESI:
  625. result:=([Ch_RESI,Ch_RRSI,Ch_WESI,Ch_WRSI,Ch_RWESI,Ch_RWRSI,Ch_MESI,Ch_MRSI,Ch_RMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  626. RS_EDI:
  627. result:=([Ch_REDI,Ch_RRDI,Ch_WEDI,Ch_WRDI,Ch_RWEDI,Ch_RWRDI,Ch_MEDI,Ch_MRDI,Ch_WMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  628. else
  629. ;
  630. end;
  631. if result then
  632. exit;
  633. end
  634. else if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  635. begin
  636. if ([Ch_RFlags,Ch_WFlags,Ch_RWFlags,Ch_RFLAGScc]*insprop[taicpu(p1).opcode].Ch)<>[] then
  637. exit(true);
  638. case getsubreg(reg) of
  639. R_SUBFLAGCARRY:
  640. Result:=([Ch_RCarryFlag,Ch_RWCarryFlag,Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  641. R_SUBFLAGPARITY:
  642. Result:=([Ch_RParityFlag,Ch_RWParityFlag,Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  643. R_SUBFLAGAUXILIARY:
  644. Result:=([Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  645. R_SUBFLAGZERO:
  646. Result:=([Ch_RZeroFlag,Ch_RWZeroFlag,Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  647. R_SUBFLAGSIGN:
  648. Result:=([Ch_RSignFlag,Ch_RWSignFlag,Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  649. R_SUBFLAGOVERFLOW:
  650. Result:=([Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  651. R_SUBFLAGINTERRUPT:
  652. Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  653. R_SUBFLAGDIRECTION:
  654. Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  655. else
  656. ;
  657. end;
  658. if result then
  659. exit;
  660. end
  661. else if (getregtype(reg)=R_FPUREGISTER) and (Ch_FPU in insprop[taicpu(p1).opcode].Ch) then
  662. exit(true);
  663. Result:=inherited RegInInstruction(Reg, p1);
  664. end;
  665. function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  666. begin
  667. Result := False;
  668. if p1.typ <> ait_instruction then
  669. exit;
  670. with insprop[taicpu(p1).opcode] do
  671. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  672. begin
  673. case getsubreg(reg) of
  674. R_SUBW,R_SUBD,R_SUBQ:
  675. Result :=
  676. [Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
  677. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  678. Ch_W0DirFlag,Ch_W1DirFlag,Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  679. R_SUBFLAGCARRY:
  680. Result:=[Ch_WCarryFlag,Ch_RWCarryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  681. R_SUBFLAGPARITY:
  682. Result:=[Ch_WParityFlag,Ch_RWParityFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  683. R_SUBFLAGAUXILIARY:
  684. Result:=[Ch_WAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  685. R_SUBFLAGZERO:
  686. Result:=[Ch_WZeroFlag,Ch_RWZeroFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  687. R_SUBFLAGSIGN:
  688. Result:=[Ch_WSignFlag,Ch_RWSignFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  689. R_SUBFLAGOVERFLOW:
  690. Result:=[Ch_WOverflowFlag,Ch_RWOverflowFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  691. R_SUBFLAGINTERRUPT:
  692. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  693. R_SUBFLAGDIRECTION:
  694. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  695. else
  696. internalerror(2017042602);
  697. end;
  698. exit;
  699. end;
  700. case taicpu(p1).opcode of
  701. A_CALL:
  702. { We could potentially set Result to False if the register in
  703. question is non-volatile for the subroutine's calling convention,
  704. but this would require detecting the calling convention in use and
  705. also assuming that the routine doesn't contain malformed assembly
  706. language, for example... so it could only be done under -O4 as it
  707. would be considered a side-effect. [Kit] }
  708. Result := True;
  709. A_MOVSD:
  710. { special handling for SSE MOVSD }
  711. if (taicpu(p1).ops>0) then
  712. begin
  713. if taicpu(p1).ops<>2 then
  714. internalerror(2017042703);
  715. Result := (taicpu(p1).oper[1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[1]^);
  716. end;
  717. { VMOVSS and VMOVSD has two and three operand flavours, this cannot modelled by x86ins.dat
  718. so fix it here (FK)
  719. }
  720. A_VMOVSS,
  721. A_VMOVSD:
  722. begin
  723. Result := (taicpu(p1).ops=3) and (taicpu(p1).oper[2]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[2]^);
  724. exit;
  725. end;
  726. A_IMUL:
  727. Result := (taicpu(p1).oper[taicpu(p1).ops-1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[taicpu(p1).ops-1]^);
  728. else
  729. ;
  730. end;
  731. if Result then
  732. exit;
  733. with insprop[taicpu(p1).opcode] do
  734. begin
  735. if getregtype(reg)=R_INTREGISTER then
  736. begin
  737. case getsupreg(reg) of
  738. RS_EAX:
  739. if [Ch_WEAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  740. begin
  741. Result := True;
  742. exit
  743. end;
  744. RS_ECX:
  745. if [Ch_WECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  746. begin
  747. Result := True;
  748. exit
  749. end;
  750. RS_EDX:
  751. if [Ch_WEDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  752. begin
  753. Result := True;
  754. exit
  755. end;
  756. RS_EBX:
  757. if [Ch_WEBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  758. begin
  759. Result := True;
  760. exit
  761. end;
  762. RS_ESP:
  763. if [Ch_WESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  764. begin
  765. Result := True;
  766. exit
  767. end;
  768. RS_EBP:
  769. if [Ch_WEBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  770. begin
  771. Result := True;
  772. exit
  773. end;
  774. RS_ESI:
  775. if [Ch_WESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  776. begin
  777. Result := True;
  778. exit
  779. end;
  780. RS_EDI:
  781. if [Ch_WEDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  782. begin
  783. Result := True;
  784. exit
  785. end;
  786. end;
  787. end;
  788. if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
  789. begin
  790. Result := true;
  791. exit
  792. end;
  793. if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
  794. begin
  795. Result := true;
  796. exit
  797. end;
  798. if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
  799. begin
  800. Result := true;
  801. exit
  802. end;
  803. if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
  804. begin
  805. Result := true;
  806. exit
  807. end;
  808. end;
  809. end;
  810. {$ifdef DEBUG_AOPTCPU}
  811. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  812. begin
  813. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  814. end;
  815. function debug_tostr(i: tcgint): string; inline;
  816. begin
  817. Result := tostr(i);
  818. end;
  819. function debug_regname(r: TRegister): string; inline;
  820. begin
  821. Result := '%' + std_regname(r);
  822. end;
  823. { Debug output function - creates a string representation of an operator }
  824. function debug_operstr(oper: TOper): string;
  825. begin
  826. case oper.typ of
  827. top_const:
  828. Result := '$' + debug_tostr(oper.val);
  829. top_reg:
  830. Result := debug_regname(oper.reg);
  831. top_ref:
  832. begin
  833. if oper.ref^.offset <> 0 then
  834. Result := debug_tostr(oper.ref^.offset) + '('
  835. else
  836. Result := '(';
  837. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  838. begin
  839. Result := Result + debug_regname(oper.ref^.base);
  840. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  841. Result := Result + ',' + debug_regname(oper.ref^.index);
  842. end
  843. else
  844. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  845. Result := Result + debug_regname(oper.ref^.index);
  846. if (oper.ref^.scalefactor > 1) then
  847. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  848. else
  849. Result := Result + ')';
  850. end;
  851. else
  852. Result := '[UNKNOWN]';
  853. end;
  854. end;
  855. function debug_op2str(opcode: tasmop): string; inline;
  856. begin
  857. Result := std_op2str[opcode];
  858. end;
  859. function debug_opsize2str(opsize: topsize): string; inline;
  860. begin
  861. Result := gas_opsize2str[opsize];
  862. end;
  863. {$else DEBUG_AOPTCPU}
  864. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  865. begin
  866. end;
  867. function debug_tostr(i: tcgint): string; inline;
  868. begin
  869. Result := '';
  870. end;
  871. function debug_regname(r: TRegister): string; inline;
  872. begin
  873. Result := '';
  874. end;
  875. function debug_operstr(oper: TOper): string; inline;
  876. begin
  877. Result := '';
  878. end;
  879. function debug_op2str(opcode: tasmop): string; inline;
  880. begin
  881. Result := '';
  882. end;
  883. function debug_opsize2str(opsize: topsize): string; inline;
  884. begin
  885. Result := '';
  886. end;
  887. {$endif DEBUG_AOPTCPU}
  888. class function TX86AsmOptimizer.IsMOVZXAcceptable: Boolean; inline;
  889. begin
  890. {$ifdef x86_64}
  891. { Always fine on x86-64 }
  892. Result := True;
  893. {$else x86_64}
  894. Result :=
  895. {$ifdef i8086}
  896. (current_settings.cputype >= cpu_386) and
  897. {$endif i8086}
  898. (
  899. { Always accept if optimising for size }
  900. (cs_opt_size in current_settings.optimizerswitches) or
  901. { From the Pentium II onwards, MOVZX only takes 1 cycle. [Kit] }
  902. (current_settings.optimizecputype >= cpu_Pentium2)
  903. );
  904. {$endif x86_64}
  905. end;
  906. { Attempts to allocate a volatile integer register for use between p and hp,
  907. using AUsedRegs for the current register usage information. Returns NR_NO
  908. if no free register could be found }
  909. function TX86AsmOptimizer.GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  910. var
  911. RegSet: TCPURegisterSet;
  912. CurrentSuperReg: Integer;
  913. CurrentReg: TRegister;
  914. Currentp: tai;
  915. Breakout: Boolean;
  916. begin
  917. { TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
  918. Result := NR_NO;
  919. RegSet := paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption);
  920. for CurrentSuperReg in RegSet do
  921. begin
  922. CurrentReg := newreg(R_INTREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  923. if not AUsedRegs[R_INTREGISTER].IsUsed(CurrentReg) then
  924. begin
  925. Currentp := p;
  926. Breakout := False;
  927. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  928. begin
  929. case Currentp.typ of
  930. ait_instruction:
  931. begin
  932. if RegInInstruction(CurrentReg, Currentp) then
  933. begin
  934. Breakout := True;
  935. Break;
  936. end;
  937. { Cannot allocate across an unconditional jump }
  938. if is_calljmpuncondret(taicpu(Currentp).opcode) then
  939. Exit;
  940. end;
  941. ait_marker:
  942. { Don't try anything more if a marker is hit }
  943. Exit;
  944. ait_regalloc:
  945. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  946. begin
  947. Breakout := True;
  948. Break;
  949. end;
  950. else
  951. ;
  952. end;
  953. end;
  954. if Breakout then
  955. { Try the next register }
  956. Continue;
  957. { We have a free register available }
  958. Result := CurrentReg;
  959. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  960. Exit;
  961. end;
  962. end;
  963. end;
  964. { Attempts to allocate a volatile MM register for use between p and hp,
  965. using AUsedRegs for the current register usage information. Returns NR_NO
  966. if no free register could be found }
  967. function TX86AsmOptimizer.GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  968. var
  969. RegSet: TCPURegisterSet;
  970. CurrentSuperReg: Integer;
  971. CurrentReg: TRegister;
  972. Currentp: tai;
  973. Breakout: Boolean;
  974. begin
  975. { TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
  976. Result := NR_NO;
  977. RegSet := paramanager.get_volatile_registers_mm(current_procinfo.procdef.proccalloption);
  978. for CurrentSuperReg in RegSet do
  979. begin
  980. CurrentReg := newreg(R_MMREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  981. if not AUsedRegs[R_MMREGISTER].IsUsed(CurrentReg) then
  982. begin
  983. Currentp := p;
  984. Breakout := False;
  985. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  986. begin
  987. case Currentp.typ of
  988. ait_instruction:
  989. begin
  990. if RegInInstruction(CurrentReg, Currentp) then
  991. begin
  992. Breakout := True;
  993. Break;
  994. end;
  995. { Cannot allocate across an unconditional jump }
  996. if is_calljmpuncondret(taicpu(Currentp).opcode) then
  997. Exit;
  998. end;
  999. ait_marker:
  1000. { Don't try anything more if a marker is hit }
  1001. Exit;
  1002. ait_regalloc:
  1003. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  1004. begin
  1005. Breakout := True;
  1006. Break;
  1007. end;
  1008. else
  1009. ;
  1010. end;
  1011. end;
  1012. if Breakout then
  1013. { Try the next register }
  1014. Continue;
  1015. { We have a free register available }
  1016. Result := CurrentReg;
  1017. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  1018. Exit;
  1019. end;
  1020. end;
  1021. end;
  1022. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  1023. begin
  1024. if not SuperRegistersEqual(reg1,reg2) then
  1025. exit(false);
  1026. if getregtype(reg1)<>R_INTREGISTER then
  1027. exit(true); {because SuperRegisterEqual is true}
  1028. case getsubreg(reg1) of
  1029. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  1030. higher, it preserves the high bits, so the new value depends on
  1031. reg2's previous value. In other words, it is equivalent to doing:
  1032. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  1033. R_SUBL:
  1034. exit(getsubreg(reg2)=R_SUBL);
  1035. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  1036. higher, it actually does a:
  1037. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  1038. R_SUBH:
  1039. exit(getsubreg(reg2)=R_SUBH);
  1040. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  1041. bits of reg2:
  1042. reg2 := (reg2 and $ffff0000) or word(reg1); }
  1043. R_SUBW:
  1044. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  1045. { a write to R_SUBD always overwrites every other subregister,
  1046. because it clears the high 32 bits of R_SUBQ on x86_64 }
  1047. R_SUBD,
  1048. R_SUBQ:
  1049. exit(true);
  1050. else
  1051. internalerror(2017042801);
  1052. end;
  1053. end;
  1054. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  1055. begin
  1056. if not SuperRegistersEqual(reg1,reg2) then
  1057. exit(false);
  1058. if getregtype(reg1)<>R_INTREGISTER then
  1059. exit(true); {because SuperRegisterEqual is true}
  1060. case getsubreg(reg1) of
  1061. R_SUBL:
  1062. exit(getsubreg(reg2)<>R_SUBH);
  1063. R_SUBH:
  1064. exit(getsubreg(reg2)<>R_SUBL);
  1065. R_SUBW,
  1066. R_SUBD,
  1067. R_SUBQ:
  1068. exit(true);
  1069. else
  1070. internalerror(2017042802);
  1071. end;
  1072. end;
  1073. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  1074. var
  1075. hp1 : tai;
  1076. l : TCGInt;
  1077. begin
  1078. result:=false;
  1079. { changes the code sequence
  1080. shr/sar const1, x
  1081. shl const2, x
  1082. to
  1083. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  1084. if GetNextInstruction(p, hp1) and
  1085. MatchInstruction(hp1,A_SHL,[]) and
  1086. (taicpu(p).oper[0]^.typ = top_const) and
  1087. (taicpu(hp1).oper[0]^.typ = top_const) and
  1088. (taicpu(hp1).opsize = taicpu(p).opsize) and
  1089. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  1090. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  1091. begin
  1092. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  1093. not(cs_opt_size in current_settings.optimizerswitches) then
  1094. begin
  1095. { shr/sar const1, %reg
  1096. shl const2, %reg
  1097. with const1 > const2 }
  1098. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  1099. taicpu(hp1).opcode := A_AND;
  1100. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  1101. case taicpu(p).opsize Of
  1102. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  1103. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  1104. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  1105. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1106. else
  1107. Internalerror(2017050703)
  1108. end;
  1109. end
  1110. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  1111. not(cs_opt_size in current_settings.optimizerswitches) then
  1112. begin
  1113. { shr/sar const1, %reg
  1114. shl const2, %reg
  1115. with const1 < const2 }
  1116. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  1117. taicpu(p).opcode := A_AND;
  1118. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  1119. case taicpu(p).opsize Of
  1120. S_B: taicpu(p).loadConst(0,l Xor $ff);
  1121. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  1122. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  1123. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1124. else
  1125. Internalerror(2017050702)
  1126. end;
  1127. end
  1128. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  1129. begin
  1130. { shr/sar const1, %reg
  1131. shl const2, %reg
  1132. with const1 = const2 }
  1133. taicpu(p).opcode := A_AND;
  1134. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  1135. case taicpu(p).opsize Of
  1136. S_B: taicpu(p).loadConst(0,l Xor $ff);
  1137. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  1138. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  1139. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1140. else
  1141. Internalerror(2017050701)
  1142. end;
  1143. RemoveInstruction(hp1);
  1144. end;
  1145. end;
  1146. end;
  1147. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  1148. var
  1149. opsize : topsize;
  1150. hp1 : tai;
  1151. tmpref : treference;
  1152. ShiftValue : Cardinal;
  1153. BaseValue : TCGInt;
  1154. begin
  1155. result:=false;
  1156. opsize:=taicpu(p).opsize;
  1157. { changes certain "imul const, %reg"'s to lea sequences }
  1158. if (MatchOpType(taicpu(p),top_const,top_reg) or
  1159. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  1160. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  1161. if (taicpu(p).oper[0]^.val = 1) then
  1162. if (taicpu(p).ops = 2) then
  1163. { remove "imul $1, reg" }
  1164. begin
  1165. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  1166. Result := RemoveCurrentP(p);
  1167. end
  1168. else
  1169. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  1170. begin
  1171. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  1172. InsertLLItem(p.previous, p.next, hp1);
  1173. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  1174. p.free;
  1175. p := hp1;
  1176. end
  1177. else if ((taicpu(p).ops <= 2) or
  1178. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  1179. not(cs_opt_size in current_settings.optimizerswitches) and
  1180. (not(GetNextInstruction(p, hp1)) or
  1181. not((tai(hp1).typ = ait_instruction) and
  1182. ((taicpu(hp1).opcode=A_Jcc) and
  1183. (taicpu(hp1).condition in [C_O,C_NO])))) then
  1184. begin
  1185. {
  1186. imul X, reg1, reg2 to
  1187. lea (reg1,reg1,Y), reg2
  1188. shl ZZ,reg2
  1189. imul XX, reg1 to
  1190. lea (reg1,reg1,YY), reg1
  1191. shl ZZ,reg2
  1192. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  1193. it does not exist as a separate optimization target in FPC though.
  1194. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  1195. at most two zeros
  1196. }
  1197. reference_reset(tmpref,1,[]);
  1198. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  1199. begin
  1200. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  1201. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  1202. TmpRef.base := taicpu(p).oper[1]^.reg;
  1203. TmpRef.index := taicpu(p).oper[1]^.reg;
  1204. if not(BaseValue in [3,5,9]) then
  1205. Internalerror(2018110101);
  1206. TmpRef.ScaleFactor := BaseValue-1;
  1207. if (taicpu(p).ops = 2) then
  1208. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  1209. else
  1210. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  1211. AsmL.InsertAfter(hp1,p);
  1212. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  1213. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  1214. RemoveCurrentP(p, hp1);
  1215. if ShiftValue>0 then
  1216. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  1217. end;
  1218. end;
  1219. end;
  1220. function TX86AsmOptimizer.PrePeepholeOptAND(var p : tai) : boolean;
  1221. begin
  1222. Result := False;
  1223. if MatchOperand(taicpu(p).oper[0]^, 0) and
  1224. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) then
  1225. begin
  1226. DebugMsg(SPeepholeOptimization + 'AND 0 -> MOV 0', p);
  1227. taicpu(p).opcode := A_MOV;
  1228. Result := True;
  1229. end;
  1230. end;
  1231. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  1232. var
  1233. p: taicpu absolute hp;
  1234. i: Integer;
  1235. begin
  1236. Result := False;
  1237. if not assigned(hp) or
  1238. (hp.typ <> ait_instruction) then
  1239. Exit;
  1240. // p := taicpu(hp);
  1241. Prefetch(insprop[p.opcode]);
  1242. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  1243. with insprop[p.opcode] do
  1244. begin
  1245. case getsubreg(reg) of
  1246. R_SUBW,R_SUBD,R_SUBQ:
  1247. Result:=
  1248. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  1249. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  1250. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  1251. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  1252. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  1253. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  1254. R_SUBFLAGCARRY:
  1255. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  1256. R_SUBFLAGPARITY:
  1257. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  1258. R_SUBFLAGAUXILIARY:
  1259. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  1260. R_SUBFLAGZERO:
  1261. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  1262. R_SUBFLAGSIGN:
  1263. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  1264. R_SUBFLAGOVERFLOW:
  1265. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  1266. R_SUBFLAGINTERRUPT:
  1267. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  1268. R_SUBFLAGDIRECTION:
  1269. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  1270. else
  1271. begin
  1272. writeln(getsubreg(reg));
  1273. internalerror(2017050501);
  1274. end;
  1275. end;
  1276. exit;
  1277. end;
  1278. { Handle special cases first }
  1279. case p.opcode of
  1280. A_MOV, A_MOVZX, A_MOVSX, A_LEA, A_VMOVSS, A_VMOVSD, A_VMOVAPD,
  1281. A_VMOVAPS, A_VMOVQ, A_MOVSS, A_MOVSD, A_MOVQ, A_MOVAPD, A_MOVAPS:
  1282. begin
  1283. Result :=
  1284. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  1285. (p.oper[1]^.typ = top_reg) and
  1286. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  1287. (
  1288. (p.oper[0]^.typ = top_const) or
  1289. (
  1290. (p.oper[0]^.typ = top_reg) and
  1291. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))
  1292. ) or (
  1293. (p.oper[0]^.typ = top_ref) and
  1294. not RegInRef(reg,p.oper[0]^.ref^)
  1295. )
  1296. );
  1297. end;
  1298. A_MUL, A_IMUL:
  1299. Result :=
  1300. (
  1301. (p.ops=3) and { IMUL only }
  1302. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  1303. (
  1304. (
  1305. (p.oper[1]^.typ=top_reg) and
  1306. not Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg)
  1307. ) or (
  1308. (p.oper[1]^.typ=top_ref) and
  1309. not RegInRef(reg,p.oper[1]^.ref^)
  1310. )
  1311. )
  1312. ) or (
  1313. (
  1314. (p.ops=1) and
  1315. (
  1316. (
  1317. (
  1318. (p.oper[0]^.typ=top_reg) and
  1319. not Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg)
  1320. )
  1321. ) or (
  1322. (p.oper[0]^.typ=top_ref) and
  1323. not RegInRef(reg,p.oper[0]^.ref^)
  1324. )
  1325. ) and (
  1326. (
  1327. (p.opsize=S_B) and
  1328. Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and
  1329. not Reg1ReadDependsOnReg2(NR_AL,reg)
  1330. ) or (
  1331. (p.opsize=S_W) and
  1332. Reg1WriteOverwritesReg2Entirely(NR_DX,reg)
  1333. ) or (
  1334. (p.opsize=S_L) and
  1335. Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)
  1336. {$ifdef x86_64}
  1337. ) or (
  1338. (p.opsize=S_Q) and
  1339. Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)
  1340. {$endif x86_64}
  1341. )
  1342. )
  1343. )
  1344. );
  1345. A_CBW:
  1346. Result := Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg));
  1347. {$ifndef x86_64}
  1348. A_LDS:
  1349. Result := (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1350. A_LES:
  1351. Result := (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^));
  1352. {$endif not x86_64}
  1353. A_LFS:
  1354. Result := (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1355. A_LGS:
  1356. Result := (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1357. A_LSS:
  1358. Result := (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1359. A_LAHF{$ifndef x86_64}, A_AAM{$endif not x86_64}:
  1360. Result := Reg1WriteOverwritesReg2Entirely(NR_AH,reg);
  1361. A_LODSB:
  1362. Result := Reg1WriteOverwritesReg2Entirely(NR_AL,reg);
  1363. A_LODSW:
  1364. Result := Reg1WriteOverwritesReg2Entirely(NR_AX,reg);
  1365. {$ifdef x86_64}
  1366. A_LODSQ:
  1367. Result := Reg1WriteOverwritesReg2Entirely(NR_RAX,reg);
  1368. {$endif x86_64}
  1369. A_LODSD:
  1370. Result := Reg1WriteOverwritesReg2Entirely(NR_EAX,reg);
  1371. A_FSTSW, A_FNSTSW:
  1372. Result := (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg);
  1373. else
  1374. begin
  1375. with insprop[p.opcode] do
  1376. begin
  1377. if (
  1378. { xor %reg,%reg etc. is classed as a new value }
  1379. (([Ch_NoReadIfEqualRegs]*Ch)<>[]) and
  1380. MatchOpType(p, top_reg, top_reg) and
  1381. (p.oper[0]^.reg = p.oper[1]^.reg) and
  1382. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)
  1383. ) then
  1384. begin
  1385. Result := True;
  1386. Exit;
  1387. end;
  1388. { Make sure the entire register is overwritten }
  1389. if (getregtype(reg) = R_INTREGISTER) then
  1390. begin
  1391. if (p.ops > 0) then
  1392. begin
  1393. if RegInOp(reg, p.oper[0]^) then
  1394. begin
  1395. if (p.oper[0]^.typ = top_ref) then
  1396. begin
  1397. if RegInRef(reg, p.oper[0]^.ref^) then
  1398. begin
  1399. Result := False;
  1400. Exit;
  1401. end;
  1402. end
  1403. else if (p.oper[0]^.typ = top_reg) then
  1404. begin
  1405. if ([Ch_ROp1, Ch_RWOp1, Ch_MOp1]*Ch<>[]) then
  1406. begin
  1407. Result := False;
  1408. Exit;
  1409. end
  1410. else if ([Ch_WOp1]*Ch<>[]) then
  1411. begin
  1412. if Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg, reg) then
  1413. Result := True
  1414. else
  1415. begin
  1416. Result := False;
  1417. Exit;
  1418. end;
  1419. end;
  1420. end;
  1421. end;
  1422. if (p.ops > 1) then
  1423. begin
  1424. if RegInOp(reg, p.oper[1]^) then
  1425. begin
  1426. if (p.oper[1]^.typ = top_ref) then
  1427. begin
  1428. if RegInRef(reg, p.oper[1]^.ref^) then
  1429. begin
  1430. Result := False;
  1431. Exit;
  1432. end;
  1433. end
  1434. else if (p.oper[1]^.typ = top_reg) then
  1435. begin
  1436. if ([Ch_ROp2, Ch_RWOp2, Ch_MOp2]*Ch<>[]) then
  1437. begin
  1438. Result := False;
  1439. Exit;
  1440. end
  1441. else if ([Ch_WOp2]*Ch<>[]) then
  1442. begin
  1443. if Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg, reg) then
  1444. Result := True
  1445. else
  1446. begin
  1447. Result := False;
  1448. Exit;
  1449. end;
  1450. end;
  1451. end;
  1452. end;
  1453. if (p.ops > 2) then
  1454. begin
  1455. if RegInOp(reg, p.oper[2]^) then
  1456. begin
  1457. if (p.oper[2]^.typ = top_ref) then
  1458. begin
  1459. if RegInRef(reg, p.oper[2]^.ref^) then
  1460. begin
  1461. Result := False;
  1462. Exit;
  1463. end;
  1464. end
  1465. else if (p.oper[2]^.typ = top_reg) then
  1466. begin
  1467. if ([Ch_ROp3, Ch_RWOp3, Ch_MOp3]*Ch<>[]) then
  1468. begin
  1469. Result := False;
  1470. Exit;
  1471. end
  1472. else if ([Ch_WOp3]*Ch<>[]) then
  1473. begin
  1474. if Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg, reg) then
  1475. Result := True
  1476. else
  1477. begin
  1478. Result := False;
  1479. Exit;
  1480. end;
  1481. end;
  1482. end;
  1483. end;
  1484. if (p.ops > 3) and RegInOp(reg, p.oper[3]^) then
  1485. begin
  1486. if (p.oper[3]^.typ = top_ref) then
  1487. begin
  1488. if RegInRef(reg, p.oper[3]^.ref^) then
  1489. begin
  1490. Result := False;
  1491. Exit;
  1492. end;
  1493. end
  1494. else if (p.oper[3]^.typ = top_reg) then
  1495. begin
  1496. if ([Ch_ROp4, Ch_RWOp4, Ch_MOp4]*Ch<>[]) then
  1497. begin
  1498. Result := False;
  1499. Exit;
  1500. end
  1501. else if ([Ch_WOp4]*Ch<>[]) then
  1502. begin
  1503. if Reg1WriteOverwritesReg2Entirely(p.oper[3]^.reg, reg) then
  1504. Result := True
  1505. else
  1506. begin
  1507. Result := False;
  1508. Exit;
  1509. end;
  1510. end;
  1511. end;
  1512. end;
  1513. end;
  1514. end;
  1515. end;
  1516. { Don't do these ones first in case an input operand is equal to an explicit output registers }
  1517. case getsupreg(reg) of
  1518. RS_EAX:
  1519. if ([Ch_WEAX{$ifdef x86_64},Ch_WRAX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EAX, reg) then
  1520. begin
  1521. Result := True;
  1522. Exit;
  1523. end;
  1524. RS_ECX:
  1525. if ([Ch_WECX{$ifdef x86_64},Ch_WRCX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ECX, reg) then
  1526. begin
  1527. Result := True;
  1528. Exit;
  1529. end;
  1530. RS_EDX:
  1531. if ([Ch_REDX{$ifdef x86_64},Ch_WRDX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EDX, reg) then
  1532. begin
  1533. Result := True;
  1534. Exit;
  1535. end;
  1536. RS_EBX:
  1537. if ([Ch_WEBX{$ifdef x86_64},Ch_WRBX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EBX, reg) then
  1538. begin
  1539. Result := True;
  1540. Exit;
  1541. end;
  1542. RS_ESP:
  1543. if ([Ch_WESP{$ifdef x86_64},Ch_WRSP{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ESP, reg) then
  1544. begin
  1545. Result := True;
  1546. Exit;
  1547. end;
  1548. RS_EBP:
  1549. if ([Ch_WEBP{$ifdef x86_64},Ch_WRBP{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EBP, reg) then
  1550. begin
  1551. Result := True;
  1552. Exit;
  1553. end;
  1554. RS_ESI:
  1555. if ([Ch_WESI{$ifdef x86_64},Ch_WRSI{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ESI, reg) then
  1556. begin
  1557. Result := True;
  1558. Exit;
  1559. end;
  1560. RS_EDI:
  1561. if ([Ch_WEDI{$ifdef x86_64},Ch_WRDI{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EDI, reg) then
  1562. begin
  1563. Result := True;
  1564. Exit;
  1565. end;
  1566. else
  1567. ;
  1568. end;
  1569. end;
  1570. end;
  1571. end;
  1572. end;
  1573. end;
  1574. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  1575. var
  1576. hp2,hp3 : tai;
  1577. begin
  1578. { some x86-64 issue a NOP before the real exit code }
  1579. if MatchInstruction(p,A_NOP,[]) then
  1580. GetNextInstruction(p,p);
  1581. result:=assigned(p) and (p.typ=ait_instruction) and
  1582. ((taicpu(p).opcode = A_RET) or
  1583. ((taicpu(p).opcode=A_LEAVE) and
  1584. GetNextInstruction(p,hp2) and
  1585. MatchInstruction(hp2,A_RET,[S_NO])
  1586. ) or
  1587. (((taicpu(p).opcode=A_LEA) and
  1588. MatchOpType(taicpu(p),top_ref,top_reg) and
  1589. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  1590. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1591. ) and
  1592. GetNextInstruction(p,hp2) and
  1593. MatchInstruction(hp2,A_RET,[S_NO])
  1594. ) or
  1595. ((((taicpu(p).opcode=A_MOV) and
  1596. MatchOpType(taicpu(p),top_reg,top_reg) and
  1597. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  1598. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  1599. ((taicpu(p).opcode=A_LEA) and
  1600. MatchOpType(taicpu(p),top_ref,top_reg) and
  1601. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  1602. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1603. )
  1604. ) and
  1605. GetNextInstruction(p,hp2) and
  1606. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  1607. MatchOpType(taicpu(hp2),top_reg) and
  1608. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  1609. GetNextInstruction(hp2,hp3) and
  1610. MatchInstruction(hp3,A_RET,[S_NO])
  1611. )
  1612. );
  1613. end;
  1614. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1615. begin
  1616. isFoldableArithOp := False;
  1617. case hp1.opcode of
  1618. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1619. isFoldableArithOp :=
  1620. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1621. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1622. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1623. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1624. (taicpu(hp1).oper[1]^.reg = reg);
  1625. A_INC,A_DEC,A_NEG,A_NOT:
  1626. isFoldableArithOp :=
  1627. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1628. (taicpu(hp1).oper[0]^.reg = reg);
  1629. else
  1630. ;
  1631. end;
  1632. end;
  1633. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  1634. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  1635. var
  1636. hp2: tai;
  1637. begin
  1638. hp2 := p;
  1639. repeat
  1640. hp2 := tai(hp2.previous);
  1641. if assigned(hp2) and
  1642. (hp2.typ = ait_regalloc) and
  1643. (tai_regalloc(hp2).ratype=ra_dealloc) and
  1644. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  1645. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  1646. begin
  1647. RemoveInstruction(hp2);
  1648. break;
  1649. end;
  1650. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  1651. end;
  1652. begin
  1653. case current_procinfo.procdef.returndef.typ of
  1654. arraydef,recorddef,pointerdef,
  1655. stringdef,enumdef,procdef,objectdef,errordef,
  1656. filedef,setdef,procvardef,
  1657. classrefdef,forwarddef:
  1658. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1659. orddef:
  1660. if current_procinfo.procdef.returndef.size <> 0 then
  1661. begin
  1662. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1663. { for int64/qword }
  1664. if current_procinfo.procdef.returndef.size = 8 then
  1665. DoRemoveLastDeallocForFuncRes(RS_EDX);
  1666. end;
  1667. else
  1668. ;
  1669. end;
  1670. end;
  1671. function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
  1672. var
  1673. hp1,hp2 : tai;
  1674. begin
  1675. result:=false;
  1676. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1677. begin
  1678. { vmova* reg1,reg1
  1679. =>
  1680. <nop> }
  1681. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1682. begin
  1683. RemoveCurrentP(p);
  1684. result:=true;
  1685. exit;
  1686. end
  1687. else if GetNextInstruction(p,hp1) then
  1688. begin
  1689. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1690. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1691. begin
  1692. { vmova* reg1,reg2
  1693. vmova* reg2,reg3
  1694. dealloc reg2
  1695. =>
  1696. vmova* reg1,reg3 }
  1697. TransferUsedRegs(TmpUsedRegs);
  1698. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1699. if MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1700. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1701. begin
  1702. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 1',p);
  1703. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1704. RemoveInstruction(hp1);
  1705. result:=true;
  1706. exit;
  1707. end
  1708. { special case:
  1709. vmova* reg1,<op>
  1710. vmova* <op>,reg1
  1711. =>
  1712. vmova* reg1,<op> }
  1713. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1714. ((taicpu(p).oper[0]^.typ<>top_ref) or
  1715. (not(vol_read in taicpu(p).oper[0]^.ref^.volatility))
  1716. ) then
  1717. begin
  1718. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 2',p);
  1719. RemoveInstruction(hp1);
  1720. result:=true;
  1721. exit;
  1722. end
  1723. end
  1724. else if ((MatchInstruction(p,[A_MOVAPS,A_VMOVAPS],[S_NO]) and
  1725. MatchInstruction(hp1,[A_MOVSS,A_VMOVSS],[S_NO])) or
  1726. ((MatchInstruction(p,[A_MOVAPD,A_VMOVAPD],[S_NO]) and
  1727. MatchInstruction(hp1,[A_MOVSD,A_VMOVSD],[S_NO])))
  1728. ) and
  1729. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1730. begin
  1731. { vmova* reg1,reg2
  1732. vmovs* reg2,<op>
  1733. dealloc reg2
  1734. =>
  1735. vmovs* reg1,reg3 }
  1736. TransferUsedRegs(TmpUsedRegs);
  1737. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1738. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1739. begin
  1740. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVS*2(V)MOVS* 1',p);
  1741. taicpu(p).opcode:=taicpu(hp1).opcode;
  1742. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1743. RemoveInstruction(hp1);
  1744. result:=true;
  1745. exit;
  1746. end
  1747. end;
  1748. end;
  1749. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  1750. begin
  1751. if MatchInstruction(hp1,[A_VFMADDPD,
  1752. A_VFMADD132PD,
  1753. A_VFMADD132PS,
  1754. A_VFMADD132SD,
  1755. A_VFMADD132SS,
  1756. A_VFMADD213PD,
  1757. A_VFMADD213PS,
  1758. A_VFMADD213SD,
  1759. A_VFMADD213SS,
  1760. A_VFMADD231PD,
  1761. A_VFMADD231PS,
  1762. A_VFMADD231SD,
  1763. A_VFMADD231SS,
  1764. A_VFMADDSUB132PD,
  1765. A_VFMADDSUB132PS,
  1766. A_VFMADDSUB213PD,
  1767. A_VFMADDSUB213PS,
  1768. A_VFMADDSUB231PD,
  1769. A_VFMADDSUB231PS,
  1770. A_VFMSUB132PD,
  1771. A_VFMSUB132PS,
  1772. A_VFMSUB132SD,
  1773. A_VFMSUB132SS,
  1774. A_VFMSUB213PD,
  1775. A_VFMSUB213PS,
  1776. A_VFMSUB213SD,
  1777. A_VFMSUB213SS,
  1778. A_VFMSUB231PD,
  1779. A_VFMSUB231PS,
  1780. A_VFMSUB231SD,
  1781. A_VFMSUB231SS,
  1782. A_VFMSUBADD132PD,
  1783. A_VFMSUBADD132PS,
  1784. A_VFMSUBADD213PD,
  1785. A_VFMSUBADD213PS,
  1786. A_VFMSUBADD231PD,
  1787. A_VFMSUBADD231PS,
  1788. A_VFNMADD132PD,
  1789. A_VFNMADD132PS,
  1790. A_VFNMADD132SD,
  1791. A_VFNMADD132SS,
  1792. A_VFNMADD213PD,
  1793. A_VFNMADD213PS,
  1794. A_VFNMADD213SD,
  1795. A_VFNMADD213SS,
  1796. A_VFNMADD231PD,
  1797. A_VFNMADD231PS,
  1798. A_VFNMADD231SD,
  1799. A_VFNMADD231SS,
  1800. A_VFNMSUB132PD,
  1801. A_VFNMSUB132PS,
  1802. A_VFNMSUB132SD,
  1803. A_VFNMSUB132SS,
  1804. A_VFNMSUB213PD,
  1805. A_VFNMSUB213PS,
  1806. A_VFNMSUB213SD,
  1807. A_VFNMSUB213SS,
  1808. A_VFNMSUB231PD,
  1809. A_VFNMSUB231PS,
  1810. A_VFNMSUB231SD,
  1811. A_VFNMSUB231SS],[S_NO]) and
  1812. { we mix single and double opperations here because we assume that the compiler
  1813. generates vmovapd only after double operations and vmovaps only after single operations }
  1814. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1815. GetNextInstruction(hp1,hp2) and
  1816. MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and
  1817. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1818. begin
  1819. TransferUsedRegs(TmpUsedRegs);
  1820. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1821. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1822. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1823. begin
  1824. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1825. RemoveCurrentP(p, hp1); // <-- Is this actually safe? hp1 is not necessarily the next instruction. [Kit]
  1826. RemoveInstruction(hp2);
  1827. end;
  1828. end
  1829. else if (hp1.typ = ait_instruction) and
  1830. GetNextInstruction(hp1, hp2) and
  1831. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  1832. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1833. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1834. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1835. (((taicpu(p).opcode=A_MOVAPS) and
  1836. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  1837. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  1838. ((taicpu(p).opcode=A_MOVAPD) and
  1839. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  1840. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  1841. ) then
  1842. { change
  1843. movapX reg,reg2
  1844. addsX/subsX/... reg3, reg2
  1845. movapX reg2,reg
  1846. to
  1847. addsX/subsX/... reg3,reg
  1848. }
  1849. begin
  1850. TransferUsedRegs(TmpUsedRegs);
  1851. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1852. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1853. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1854. begin
  1855. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1856. debug_op2str(taicpu(p).opcode)+' '+
  1857. debug_op2str(taicpu(hp1).opcode)+' '+
  1858. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1859. { we cannot eliminate the first move if
  1860. the operations uses the same register for source and dest }
  1861. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1862. RemoveCurrentP(p, nil);
  1863. p:=hp1;
  1864. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1865. RemoveInstruction(hp2);
  1866. result:=true;
  1867. end;
  1868. end;
  1869. end;
  1870. end;
  1871. end;
  1872. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1873. var
  1874. hp1 : tai;
  1875. begin
  1876. result:=false;
  1877. { replace
  1878. V<Op>X %mreg1,%mreg2,%mreg3
  1879. VMovX %mreg3,%mreg4
  1880. dealloc %mreg3
  1881. by
  1882. V<Op>X %mreg1,%mreg2,%mreg4
  1883. ?
  1884. }
  1885. if GetNextInstruction(p,hp1) and
  1886. { we mix single and double operations here because we assume that the compiler
  1887. generates vmovapd only after double operations and vmovaps only after single operations }
  1888. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1889. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1890. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1891. begin
  1892. TransferUsedRegs(TmpUsedRegs);
  1893. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1894. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  1895. begin
  1896. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1897. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1898. RemoveInstruction(hp1);
  1899. result:=true;
  1900. end;
  1901. end;
  1902. end;
  1903. { Replaces all references to AOldReg in a memory reference to ANewReg }
  1904. class function TX86AsmOptimizer.ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean;
  1905. begin
  1906. Result := False;
  1907. { For safety reasons, only check for exact register matches }
  1908. { Check base register }
  1909. if (ref.base = AOldReg) then
  1910. begin
  1911. ref.base := ANewReg;
  1912. Result := True;
  1913. end;
  1914. { Check index register }
  1915. if (ref.index = AOldReg) then
  1916. begin
  1917. ref.index := ANewReg;
  1918. Result := True;
  1919. end;
  1920. end;
  1921. { Replaces all references to AOldReg in an operand to ANewReg }
  1922. class function TX86AsmOptimizer.ReplaceRegisterInOper(const p: taicpu; const OperIdx: Integer; const AOldReg, ANewReg: TRegister): Boolean;
  1923. var
  1924. OldSupReg, NewSupReg: TSuperRegister;
  1925. OldSubReg, NewSubReg: TSubRegister;
  1926. OldRegType: TRegisterType;
  1927. ThisOper: POper;
  1928. begin
  1929. ThisOper := p.oper[OperIdx]; { Faster to access overall }
  1930. Result := False;
  1931. if (AOldReg = NR_NO) or (ANewReg = NR_NO) then
  1932. InternalError(2020011801);
  1933. OldSupReg := getsupreg(AOldReg);
  1934. OldSubReg := getsubreg(AOldReg);
  1935. OldRegType := getregtype(AOldReg);
  1936. NewSupReg := getsupreg(ANewReg);
  1937. NewSubReg := getsubreg(ANewReg);
  1938. if OldRegType <> getregtype(ANewReg) then
  1939. InternalError(2020011802);
  1940. if OldSubReg <> NewSubReg then
  1941. InternalError(2020011803);
  1942. case ThisOper^.typ of
  1943. top_reg:
  1944. if (
  1945. (ThisOper^.reg = AOldReg) or
  1946. (
  1947. (OldRegType = R_INTREGISTER) and
  1948. (getsupreg(ThisOper^.reg) = OldSupReg) and
  1949. (getregtype(ThisOper^.reg) = R_INTREGISTER) and
  1950. (
  1951. (getsubreg(ThisOper^.reg) <= OldSubReg)
  1952. {$ifndef x86_64}
  1953. and (
  1954. { Under i386 and i8086, ESI, EDI, EBP and ESP
  1955. don't have an 8-bit representation }
  1956. (getsubreg(ThisOper^.reg) >= R_SUBW) or
  1957. not (NewSupReg in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  1958. )
  1959. {$endif x86_64}
  1960. )
  1961. )
  1962. ) then
  1963. begin
  1964. ThisOper^.reg := newreg(getregtype(ANewReg), NewSupReg, getsubreg(p.oper[OperIdx]^.reg));
  1965. Result := True;
  1966. end;
  1967. top_ref:
  1968. if ReplaceRegisterInRef(ThisOper^.ref^, AOldReg, ANewReg) then
  1969. Result := True;
  1970. else
  1971. ;
  1972. end;
  1973. end;
  1974. { Replaces all references to AOldReg in an instruction to ANewReg }
  1975. function TX86AsmOptimizer.ReplaceRegisterInInstruction(const p: taicpu; const AOldReg, ANewReg: TRegister): Boolean;
  1976. const
  1977. ReadFlag: array[0..3] of TInsChange = (Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Rop4);
  1978. var
  1979. OperIdx: Integer;
  1980. begin
  1981. Result := False;
  1982. for OperIdx := 0 to p.ops - 1 do
  1983. if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) and
  1984. { The shift and rotate instructions can only use CL }
  1985. not (
  1986. (OperIdx = 0) and
  1987. { This second condition just helps to avoid unnecessarily
  1988. calling MatchInstruction for 10 different opcodes }
  1989. (p.oper[0]^.reg = NR_CL) and
  1990. MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], [])
  1991. ) then
  1992. Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
  1993. end;
  1994. class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; inline;
  1995. begin
  1996. Result :=
  1997. (ref^.index = NR_NO) and
  1998. (
  1999. {$ifdef x86_64}
  2000. (
  2001. (ref^.base = NR_RIP) and
  2002. (ref^.refaddr in [addr_pic, addr_pic_no_got])
  2003. ) or
  2004. {$endif x86_64}
  2005. (ref^.base = NR_STACK_POINTER_REG) or
  2006. (ref^.base = current_procinfo.framepointer)
  2007. );
  2008. end;
  2009. function TX86AsmOptimizer.ConvertLEA(const p: taicpu): Boolean;
  2010. var
  2011. l: asizeint;
  2012. begin
  2013. Result := False;
  2014. { Should have been checked previously }
  2015. if p.opcode <> A_LEA then
  2016. InternalError(2020072501);
  2017. { do not mess with the stack point as adjusting it by lea is recommend, except if we optimize for size }
  2018. if (p.oper[1]^.reg=NR_STACK_POINTER_REG) and
  2019. not(cs_opt_size in current_settings.optimizerswitches) then
  2020. exit;
  2021. with p.oper[0]^.ref^ do
  2022. begin
  2023. if (base <> p.oper[1]^.reg) or
  2024. (index <> NR_NO) or
  2025. assigned(symbol) then
  2026. exit;
  2027. l:=offset;
  2028. if (l=1) and UseIncDec then
  2029. begin
  2030. p.opcode:=A_INC;
  2031. p.loadreg(0,p.oper[1]^.reg);
  2032. p.ops:=1;
  2033. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  2034. end
  2035. else if (l=-1) and UseIncDec then
  2036. begin
  2037. p.opcode:=A_DEC;
  2038. p.loadreg(0,p.oper[1]^.reg);
  2039. p.ops:=1;
  2040. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2041. end
  2042. else
  2043. begin
  2044. if (l<0) and (l<>-2147483648) then
  2045. begin
  2046. p.opcode:=A_SUB;
  2047. p.loadConst(0,-l);
  2048. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2049. end
  2050. else
  2051. begin
  2052. p.opcode:=A_ADD;
  2053. p.loadConst(0,l);
  2054. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2055. end;
  2056. end;
  2057. end;
  2058. Result := True;
  2059. end;
  2060. function TX86AsmOptimizer.DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
  2061. var
  2062. CurrentReg, ReplaceReg: TRegister;
  2063. begin
  2064. Result := False;
  2065. ReplaceReg := taicpu(p_mov).oper[0]^.reg;
  2066. CurrentReg := taicpu(p_mov).oper[1]^.reg;
  2067. case hp.opcode of
  2068. A_FSTSW, A_FNSTSW,
  2069. A_IN, A_INS, A_OUT, A_OUTS,
  2070. A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS:
  2071. { These routines have explicit operands, but they are restricted in
  2072. what they can be (e.g. IN and OUT can only read from AL, AX or
  2073. EAX. }
  2074. Exit;
  2075. A_IMUL:
  2076. begin
  2077. { The 1-operand version writes to implicit registers
  2078. The 2-operand version reads from the first operator, and reads
  2079. from and writes to the second (equivalent to Ch_ROp1, ChRWOp2).
  2080. the 3-operand version reads from a register that it doesn't write to
  2081. }
  2082. case hp.ops of
  2083. 1:
  2084. if (
  2085. (
  2086. (hp.opsize = S_B) and (getsupreg(CurrentReg) <> RS_EAX)
  2087. ) or
  2088. not (getsupreg(CurrentReg) in [RS_EAX, RS_EDX])
  2089. ) and ReplaceRegisterInOper(hp, 0, CurrentReg, ReplaceReg) then
  2090. begin
  2091. Result := True;
  2092. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 1)', hp);
  2093. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2094. end;
  2095. 2:
  2096. { Only modify the first parameter }
  2097. if ReplaceRegisterInOper(hp, 0, CurrentReg, ReplaceReg) then
  2098. begin
  2099. Result := True;
  2100. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 2)', hp);
  2101. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2102. end;
  2103. 3:
  2104. { Only modify the second parameter }
  2105. if ReplaceRegisterInOper(hp, 1, CurrentReg, ReplaceReg) then
  2106. begin
  2107. Result := True;
  2108. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 3)', hp);
  2109. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2110. end;
  2111. else
  2112. InternalError(2020012901);
  2113. end;
  2114. end;
  2115. else
  2116. if (hp.ops > 0) and
  2117. ReplaceRegisterInInstruction(hp, CurrentReg, ReplaceReg) then
  2118. begin
  2119. Result := True;
  2120. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovXXX2MovXXX)', hp);
  2121. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2122. end;
  2123. end;
  2124. end;
  2125. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  2126. var
  2127. hp1, hp2, hp3: tai;
  2128. DoOptimisation, TempBool: Boolean;
  2129. procedure convert_mov_value(signed_movop: tasmop; max_value: tcgint); inline;
  2130. begin
  2131. if taicpu(hp1).opcode = signed_movop then
  2132. begin
  2133. if taicpu(p).oper[0]^.val > max_value shr 1 then
  2134. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val - max_value - 1 { Convert to signed }
  2135. end
  2136. else
  2137. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and max_value; { Trim to unsigned }
  2138. end;
  2139. var
  2140. GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
  2141. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  2142. NewSize: topsize;
  2143. CurrentReg, ActiveReg: TRegister;
  2144. SourceRef, TargetRef: TReference;
  2145. MovAligned, MovUnaligned: TAsmOp;
  2146. begin
  2147. Result:=false;
  2148. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  2149. { remove mov reg1,reg1? }
  2150. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  2151. then
  2152. begin
  2153. DebugMsg(SPeepholeOptimization + 'Mov2Nop 1 done',p);
  2154. { take care of the register (de)allocs following p }
  2155. RemoveCurrentP(p, hp1);
  2156. Result:=true;
  2157. exit;
  2158. end;
  2159. { All the next optimisations require a next instruction }
  2160. if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
  2161. Exit;
  2162. { Look for:
  2163. mov %reg1,%reg2
  2164. ??? %reg2,r/m
  2165. Change to:
  2166. mov %reg1,%reg2
  2167. ??? %reg1,r/m
  2168. }
  2169. if MatchOpType(taicpu(p), top_reg, top_reg) then
  2170. begin
  2171. CurrentReg := taicpu(p).oper[1]^.reg;
  2172. if RegReadByInstruction(CurrentReg, hp1) and
  2173. DeepMOVOpt(taicpu(p), taicpu(hp1)) then
  2174. begin
  2175. TransferUsedRegs(TmpUsedRegs);
  2176. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2177. if not RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs) and
  2178. { Just in case something didn't get modified (e.g. an
  2179. implicit register) }
  2180. not RegReadByInstruction(CurrentReg, hp1) then
  2181. begin
  2182. { We can remove the original MOV }
  2183. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
  2184. RemoveCurrentp(p, hp1);
  2185. { UsedRegs got updated by RemoveCurrentp }
  2186. Result := True;
  2187. Exit;
  2188. end;
  2189. { If we know a MOV instruction has become a null operation, we might as well
  2190. get rid of it now to save time. }
  2191. if (taicpu(hp1).opcode = A_MOV) and
  2192. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2193. SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
  2194. { Just being a register is enough to confirm it's a null operation }
  2195. (taicpu(hp1).oper[0]^.typ = top_reg) then
  2196. begin
  2197. Result := True;
  2198. { Speed-up to reduce a pipeline stall... if we had something like...
  2199. movl %eax,%edx
  2200. movw %dx,%ax
  2201. ... the second instruction would change to movw %ax,%ax, but
  2202. given that it is now %ax that's active rather than %eax,
  2203. penalties might occur due to a partial register write, so instead,
  2204. change it to a MOVZX instruction when optimising for speed.
  2205. }
  2206. if not (cs_opt_size in current_settings.optimizerswitches) and
  2207. IsMOVZXAcceptable and
  2208. (taicpu(hp1).opsize < taicpu(p).opsize)
  2209. {$ifdef x86_64}
  2210. { operations already implicitly set the upper 64 bits to zero }
  2211. and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
  2212. {$endif x86_64}
  2213. then
  2214. begin
  2215. CurrentReg := taicpu(hp1).oper[1]^.reg;
  2216. DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
  2217. case taicpu(p).opsize of
  2218. S_W:
  2219. if taicpu(hp1).opsize = S_B then
  2220. taicpu(hp1).opsize := S_BL
  2221. else
  2222. InternalError(2020012911);
  2223. S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
  2224. case taicpu(hp1).opsize of
  2225. S_B:
  2226. taicpu(hp1).opsize := S_BL;
  2227. S_W:
  2228. taicpu(hp1).opsize := S_WL;
  2229. else
  2230. InternalError(2020012912);
  2231. end;
  2232. else
  2233. InternalError(2020012910);
  2234. end;
  2235. taicpu(hp1).opcode := A_MOVZX;
  2236. taicpu(hp1).oper[1]^.reg := newreg(getregtype(CurrentReg), getsupreg(CurrentReg), R_SUBD)
  2237. end
  2238. else
  2239. begin
  2240. GetNextInstruction_p := GetNextInstruction(hp1, hp2);
  2241. DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
  2242. RemoveInstruction(hp1);
  2243. { The instruction after what was hp1 is now the immediate next instruction,
  2244. so we can continue to make optimisations if it's present }
  2245. if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
  2246. Exit;
  2247. hp1 := hp2;
  2248. end;
  2249. end;
  2250. end;
  2251. end;
  2252. { Depending on the DeepMOVOpt above, it may turn out that hp1 completely
  2253. overwrites the original destination register. e.g.
  2254. movl ###,%reg2d
  2255. movslq ###,%reg2q (### doesn't have to be the same as the first one)
  2256. In this case, we can remove the MOV (Go to "Mov2Nop 5" below)
  2257. }
  2258. if (taicpu(p).oper[1]^.typ = top_reg) and
  2259. MatchInstruction(hp1, [A_LEA, A_MOV, A_MOVSX, A_MOVZX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}], []) and
  2260. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2261. Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[1]^.reg) then
  2262. begin
  2263. if RegInOp(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^) then
  2264. begin
  2265. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2266. case taicpu(p).oper[0]^.typ of
  2267. top_const:
  2268. { We have something like:
  2269. movb $x, %regb
  2270. movzbl %regb,%regd
  2271. Change to:
  2272. movl $x, %regd
  2273. }
  2274. begin
  2275. case taicpu(hp1).opsize of
  2276. S_BW:
  2277. begin
  2278. convert_mov_value(A_MOVSX, $FF);
  2279. setsubreg(taicpu(p).oper[1]^.reg, R_SUBW);
  2280. taicpu(p).opsize := S_W;
  2281. end;
  2282. S_BL:
  2283. begin
  2284. convert_mov_value(A_MOVSX, $FF);
  2285. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2286. taicpu(p).opsize := S_L;
  2287. end;
  2288. S_WL:
  2289. begin
  2290. convert_mov_value(A_MOVSX, $FFFF);
  2291. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2292. taicpu(p).opsize := S_L;
  2293. end;
  2294. {$ifdef x86_64}
  2295. S_BQ:
  2296. begin
  2297. convert_mov_value(A_MOVSX, $FF);
  2298. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2299. taicpu(p).opsize := S_Q;
  2300. end;
  2301. S_WQ:
  2302. begin
  2303. convert_mov_value(A_MOVSX, $FFFF);
  2304. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2305. taicpu(p).opsize := S_Q;
  2306. end;
  2307. S_LQ:
  2308. begin
  2309. convert_mov_value(A_MOVSXD, $FFFFFFFF); { Note it's MOVSXD, not MOVSX }
  2310. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2311. taicpu(p).opsize := S_Q;
  2312. end;
  2313. {$endif x86_64}
  2314. else
  2315. { If hp1 was a MOV instruction, it should have been
  2316. optimised already }
  2317. InternalError(2020021001);
  2318. end;
  2319. DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 2 done',p);
  2320. RemoveInstruction(hp1);
  2321. Result := True;
  2322. Exit;
  2323. end;
  2324. top_ref:
  2325. { We have something like:
  2326. movb mem, %regb
  2327. movzbl %regb,%regd
  2328. Change to:
  2329. movzbl mem, %regd
  2330. }
  2331. if (taicpu(p).oper[0]^.ref^.refaddr<>addr_full) and (IsMOVZXAcceptable or (taicpu(hp1).opcode<>A_MOVZX)) then
  2332. begin
  2333. DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 1 done',p);
  2334. taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
  2335. RemoveCurrentP(p, hp1);
  2336. Result:=True;
  2337. Exit;
  2338. end;
  2339. else
  2340. if (taicpu(hp1).opcode <> A_MOV) and (taicpu(hp1).opcode <> A_LEA) then
  2341. { Just to make a saving, since there are no more optimisations with MOVZX and MOVSX/D }
  2342. Exit;
  2343. end;
  2344. end
  2345. { The RegInOp check makes sure that movl r/m,%reg1l; movzbl (%reg1l),%reg1l"
  2346. and "movl r/m,%reg1; leal $1(%reg1,%reg2),%reg1" etc. are not incorrectly
  2347. optimised }
  2348. else
  2349. begin
  2350. DebugMsg(SPeepholeOptimization + 'Mov2Nop 5 done',p);
  2351. RemoveCurrentP(p, hp1);
  2352. Result := True;
  2353. Exit;
  2354. end;
  2355. end;
  2356. if (taicpu(hp1).opcode = A_AND) and
  2357. (taicpu(p).oper[1]^.typ = top_reg) and
  2358. MatchOpType(taicpu(hp1),top_const,top_reg) then
  2359. begin
  2360. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  2361. begin
  2362. case taicpu(p).opsize of
  2363. S_L:
  2364. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2365. begin
  2366. { Optimize out:
  2367. mov x, %reg
  2368. and ffffffffh, %reg
  2369. }
  2370. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  2371. RemoveInstruction(hp1);
  2372. Result:=true;
  2373. exit;
  2374. end;
  2375. S_Q: { TODO: Confirm if this is even possible }
  2376. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  2377. begin
  2378. { Optimize out:
  2379. mov x, %reg
  2380. and ffffffffffffffffh, %reg
  2381. }
  2382. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  2383. RemoveInstruction(hp1);
  2384. Result:=true;
  2385. exit;
  2386. end;
  2387. else
  2388. ;
  2389. end;
  2390. if ((taicpu(p).oper[0]^.typ=top_reg) or
  2391. ((taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr<>addr_full))) and
  2392. GetNextInstruction(hp1,hp2) and
  2393. MatchInstruction(hp2,A_TEST,[taicpu(p).opsize]) and
  2394. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp2).oper[1]^) and
  2395. (MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^) or
  2396. MatchOperand(taicpu(hp2).oper[0]^,-1)) and
  2397. GetNextInstruction(hp2,hp3) and
  2398. MatchInstruction(hp3,A_Jcc,A_Setcc,[]) and
  2399. (taicpu(hp3).condition in [C_E,C_NE]) then
  2400. begin
  2401. TransferUsedRegs(TmpUsedRegs);
  2402. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2403. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2404. if not(RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp2, TmpUsedRegs)) then
  2405. begin
  2406. DebugMsg(SPeepholeOptimization + 'MovAndTest2Test done',p);
  2407. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  2408. taicpu(hp1).opcode:=A_TEST;
  2409. RemoveInstruction(hp2);
  2410. RemoveCurrentP(p, hp1);
  2411. Result:=true;
  2412. exit;
  2413. end;
  2414. end;
  2415. end
  2416. else if IsMOVZXAcceptable and
  2417. (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  2418. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  2419. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2420. then
  2421. begin
  2422. InputVal := debug_operstr(taicpu(p).oper[0]^);
  2423. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  2424. case taicpu(p).opsize of
  2425. S_B:
  2426. if (taicpu(hp1).oper[0]^.val = $ff) then
  2427. begin
  2428. { Convert:
  2429. movb x, %regl movb x, %regl
  2430. andw ffh, %regw andl ffh, %regd
  2431. To:
  2432. movzbw x, %regd movzbl x, %regd
  2433. (Identical registers, just different sizes)
  2434. }
  2435. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  2436. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  2437. case taicpu(hp1).opsize of
  2438. S_W: NewSize := S_BW;
  2439. S_L: NewSize := S_BL;
  2440. {$ifdef x86_64}
  2441. S_Q: NewSize := S_BQ;
  2442. {$endif x86_64}
  2443. else
  2444. InternalError(2018011510);
  2445. end;
  2446. end
  2447. else
  2448. NewSize := S_NO;
  2449. S_W:
  2450. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2451. begin
  2452. { Convert:
  2453. movw x, %regw
  2454. andl ffffh, %regd
  2455. To:
  2456. movzwl x, %regd
  2457. (Identical registers, just different sizes)
  2458. }
  2459. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  2460. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  2461. case taicpu(hp1).opsize of
  2462. S_L: NewSize := S_WL;
  2463. {$ifdef x86_64}
  2464. S_Q: NewSize := S_WQ;
  2465. {$endif x86_64}
  2466. else
  2467. InternalError(2018011511);
  2468. end;
  2469. end
  2470. else
  2471. NewSize := S_NO;
  2472. else
  2473. NewSize := S_NO;
  2474. end;
  2475. if NewSize <> S_NO then
  2476. begin
  2477. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  2478. { The actual optimization }
  2479. taicpu(p).opcode := A_MOVZX;
  2480. taicpu(p).changeopsize(NewSize);
  2481. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  2482. { Safeguard if "and" is followed by a conditional command }
  2483. TransferUsedRegs(TmpUsedRegs);
  2484. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  2485. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  2486. begin
  2487. { At this point, the "and" command is effectively equivalent to
  2488. "test %reg,%reg". This will be handled separately by the
  2489. Peephole Optimizer. [Kit] }
  2490. DebugMsg(SPeepholeOptimization + PreMessage +
  2491. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  2492. end
  2493. else
  2494. begin
  2495. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  2496. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  2497. RemoveInstruction(hp1);
  2498. end;
  2499. Result := True;
  2500. Exit;
  2501. end;
  2502. end;
  2503. end;
  2504. if (taicpu(hp1).opcode = A_OR) and
  2505. (taicpu(p).oper[1]^.typ = top_reg) and
  2506. MatchOperand(taicpu(p).oper[0]^, 0) and
  2507. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) then
  2508. begin
  2509. { mov 0, %reg
  2510. or ###,%reg
  2511. Change to (only if the flags are not used):
  2512. mov ###,%reg
  2513. }
  2514. TransferUsedRegs(TmpUsedRegs);
  2515. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2516. DoOptimisation := True;
  2517. { Even if the flags are used, we might be able to do the optimisation
  2518. if the conditions are predictable }
  2519. if RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
  2520. begin
  2521. { Only perform if ### = %reg (the same register) or equal to 0,
  2522. so %reg is guaranteed to still have a value of zero }
  2523. if MatchOperand(taicpu(hp1).oper[0]^, 0) or
  2524. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) then
  2525. begin
  2526. hp2 := hp1;
  2527. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2528. while RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
  2529. GetNextInstruction(hp2, hp3) do
  2530. begin
  2531. { Don't continue modifying if the flags state is getting changed }
  2532. if RegModifiedByInstruction(NR_DEFAULTFLAGS, hp3) then
  2533. Break;
  2534. UpdateUsedRegs(TmpUsedRegs, tai(hp3.Next));
  2535. if MatchInstruction(hp3, A_Jcc, A_SETcc, A_CMOVcc, []) then
  2536. begin
  2537. if condition_in(C_E, taicpu(hp3).condition) or (taicpu(hp3).condition in [C_NC, C_NS, C_NO]) then
  2538. begin
  2539. { Condition is always true }
  2540. case taicpu(hp3).opcode of
  2541. A_Jcc:
  2542. begin
  2543. DebugMsg(SPeepholeOptimization + 'Condition is always true (jump made unconditional)', hp3);
  2544. { Check for jump shortcuts before we destroy the condition }
  2545. DoJumpOptimizations(hp3, TempBool);
  2546. MakeUnconditional(taicpu(hp3));
  2547. Result := True;
  2548. end;
  2549. A_CMOVcc:
  2550. begin
  2551. DebugMsg(SPeepholeOptimization + 'Condition is always true (CMOVcc -> MOV)', hp3);
  2552. taicpu(hp3).opcode := A_MOV;
  2553. taicpu(hp3).condition := C_None;
  2554. Result := True;
  2555. end;
  2556. A_SETcc:
  2557. begin
  2558. DebugMsg(SPeepholeOptimization + 'Condition is always true (changed to MOV 1)', hp3);
  2559. { Convert "set(c) %reg" instruction to "movb 1,%reg" }
  2560. taicpu(hp3).opcode := A_MOV;
  2561. taicpu(hp3).ops := 2;
  2562. taicpu(hp3).condition := C_None;
  2563. taicpu(hp3).opsize := S_B;
  2564. taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
  2565. taicpu(hp3).loadconst(0, 1);
  2566. Result := True;
  2567. end;
  2568. else
  2569. InternalError(2021090701);
  2570. end;
  2571. end
  2572. else if (taicpu(hp3).condition in [C_A, C_B, C_C, C_G, C_L, C_NE, C_NZ, C_O, C_S]) then
  2573. begin
  2574. { Condition is always false }
  2575. case taicpu(hp3).opcode of
  2576. A_Jcc:
  2577. begin
  2578. DebugMsg(SPeepholeOptimization + 'Condition is always false (jump removed)', hp3);
  2579. TAsmLabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2580. RemoveInstruction(hp3);
  2581. Result := True;
  2582. { Since hp3 was deleted, hp2 must not be updated }
  2583. Continue;
  2584. end;
  2585. A_CMOVcc:
  2586. begin
  2587. DebugMsg(SPeepholeOptimization + 'Condition is always false (conditional load removed)', hp3);
  2588. RemoveInstruction(hp3);
  2589. Result := True;
  2590. { Since hp3 was deleted, hp2 must not be updated }
  2591. Continue;
  2592. end;
  2593. A_SETcc:
  2594. begin
  2595. DebugMsg(SPeepholeOptimization + 'Condition is always false (changed to MOV 0)', hp3);
  2596. { Convert "set(c) %reg" instruction to "movb 0,%reg" }
  2597. taicpu(hp3).opcode := A_MOV;
  2598. taicpu(hp3).ops := 2;
  2599. taicpu(hp3).condition := C_None;
  2600. taicpu(hp3).opsize := S_B;
  2601. taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
  2602. taicpu(hp3).loadconst(0, 0);
  2603. Result := True;
  2604. end;
  2605. else
  2606. InternalError(2021090702);
  2607. end;
  2608. end
  2609. else
  2610. { Uncertain what to do - don't optimise (although optimise other conditional statements if present) }
  2611. DoOptimisation := False;
  2612. end;
  2613. hp2 := hp3;
  2614. end;
  2615. { Flags are still in use - don't optimise }
  2616. if DoOptimisation and RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
  2617. DoOptimisation := False;
  2618. end
  2619. else
  2620. DoOptimisation := False;
  2621. end;
  2622. if DoOptimisation then
  2623. begin
  2624. {$ifdef x86_64}
  2625. { OR only supports 32-bit sign-extended constants for 64-bit
  2626. instructions, so compensate for this if the constant is
  2627. encoded as a value greater than or equal to 2^31 }
  2628. if (taicpu(hp1).opsize = S_Q) and
  2629. (taicpu(hp1).oper[0]^.typ = top_const) and
  2630. (taicpu(hp1).oper[0]^.val >= $80000000) then
  2631. taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val or $FFFFFFFF00000000;
  2632. {$endif x86_64}
  2633. DebugMsg(SPeepholeOptimization + 'MOV 0 / OR -> MOV', p);
  2634. taicpu(hp1).opcode := A_MOV;
  2635. RemoveCurrentP(p, hp1);
  2636. Result := True;
  2637. Exit;
  2638. end;
  2639. end;
  2640. { Next instruction is also a MOV ? }
  2641. if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  2642. begin
  2643. if (taicpu(p).oper[1]^.typ = top_reg) and
  2644. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  2645. begin
  2646. CurrentReg := taicpu(p).oper[1]^.reg;
  2647. TransferUsedRegs(TmpUsedRegs);
  2648. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2649. { we have
  2650. mov x, %treg
  2651. mov %treg, y
  2652. }
  2653. if not(RegInOp(CurrentReg, taicpu(hp1).oper[1]^)) then
  2654. if not(RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs)) then
  2655. { we've got
  2656. mov x, %treg
  2657. mov %treg, y
  2658. with %treg is not used after }
  2659. case taicpu(p).oper[0]^.typ Of
  2660. { top_reg is covered by DeepMOVOpt }
  2661. top_const:
  2662. begin
  2663. { change
  2664. mov const, %treg
  2665. mov %treg, y
  2666. to
  2667. mov const, y
  2668. }
  2669. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  2670. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  2671. begin
  2672. if taicpu(hp1).oper[1]^.typ=top_reg then
  2673. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  2674. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  2675. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  2676. RemoveInstruction(hp1);
  2677. Result:=true;
  2678. Exit;
  2679. end;
  2680. end;
  2681. top_ref:
  2682. case taicpu(hp1).oper[1]^.typ of
  2683. top_reg:
  2684. begin
  2685. { change
  2686. mov mem, %treg
  2687. mov %treg, %reg
  2688. to
  2689. mov mem, %reg"
  2690. }
  2691. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  2692. taicpu(p).loadreg(1, taicpu(hp1).oper[1]^.reg);
  2693. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  2694. RemoveInstruction(hp1);
  2695. Result:=true;
  2696. Exit;
  2697. end;
  2698. top_ref:
  2699. begin
  2700. {$ifdef x86_64}
  2701. { Look for the following to simplify:
  2702. mov x(mem1), %reg
  2703. mov %reg, y(mem2)
  2704. mov x+8(mem1), %reg
  2705. mov %reg, y+8(mem2)
  2706. Change to:
  2707. movdqu x(mem1), %xmmreg
  2708. movdqu %xmmreg, y(mem2)
  2709. }
  2710. SourceRef := taicpu(p).oper[0]^.ref^;
  2711. TargetRef := taicpu(hp1).oper[1]^.ref^;
  2712. if (taicpu(p).opsize = S_Q) and
  2713. GetNextInstruction(hp1, hp2) and
  2714. MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
  2715. MatchOpType(taicpu(hp2), top_ref, top_reg) then
  2716. begin
  2717. { Delay calling GetNextInstruction(hp2, hp3) for as long as possible }
  2718. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2719. Inc(SourceRef.offset, 8);
  2720. if UseAVX then
  2721. begin
  2722. MovAligned := A_VMOVDQA;
  2723. MovUnaligned := A_VMOVDQU;
  2724. end
  2725. else
  2726. begin
  2727. MovAligned := A_MOVDQA;
  2728. MovUnaligned := A_MOVDQU;
  2729. end;
  2730. if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) then
  2731. begin
  2732. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  2733. Inc(TargetRef.offset, 8);
  2734. if GetNextInstruction(hp2, hp3) and
  2735. MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
  2736. MatchOpType(taicpu(hp3), top_reg, top_ref) and
  2737. (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
  2738. RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
  2739. not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
  2740. begin
  2741. CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
  2742. if CurrentReg <> NR_NO then
  2743. begin
  2744. { Remember that the offsets are 8 ahead }
  2745. if ((SourceRef.offset mod 16) = 8) and
  2746. (
  2747. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2748. (SourceRef.base = current_procinfo.framepointer) or
  2749. ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
  2750. ) then
  2751. taicpu(p).opcode := MovAligned
  2752. else
  2753. taicpu(p).opcode := MovUnaligned;
  2754. taicpu(p).opsize := S_XMM;
  2755. taicpu(p).oper[1]^.reg := CurrentReg;
  2756. if ((TargetRef.offset mod 16) = 8) and
  2757. (
  2758. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2759. (TargetRef.base = current_procinfo.framepointer) or
  2760. ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
  2761. ) then
  2762. taicpu(hp1).opcode := MovAligned
  2763. else
  2764. taicpu(hp1).opcode := MovUnaligned;
  2765. taicpu(hp1).opsize := S_XMM;
  2766. taicpu(hp1).oper[0]^.reg := CurrentReg;
  2767. DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
  2768. RemoveInstruction(hp2);
  2769. RemoveInstruction(hp3);
  2770. Result := True;
  2771. Exit;
  2772. end;
  2773. end;
  2774. end
  2775. else
  2776. begin
  2777. { See if the next references are 8 less rather than 8 greater }
  2778. Dec(SourceRef.offset, 16); { -8 the other way }
  2779. if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) then
  2780. begin
  2781. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  2782. Dec(TargetRef.offset, 8); { Only 8, not 16, as it wasn't incremented unlike SourceRef }
  2783. if GetNextInstruction(hp2, hp3) and
  2784. MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
  2785. MatchOpType(taicpu(hp3), top_reg, top_ref) and
  2786. (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
  2787. RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
  2788. not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
  2789. begin
  2790. CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
  2791. if CurrentReg <> NR_NO then
  2792. begin
  2793. { hp2 and hp3 are the starting offsets, so mod 0 this time }
  2794. if ((SourceRef.offset mod 16) = 0) and
  2795. (
  2796. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2797. (SourceRef.base = current_procinfo.framepointer) or
  2798. ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
  2799. ) then
  2800. taicpu(hp2).opcode := MovAligned
  2801. else
  2802. taicpu(hp2).opcode := MovUnaligned;
  2803. taicpu(hp2).opsize := S_XMM;
  2804. taicpu(hp2).oper[1]^.reg := CurrentReg;
  2805. if ((TargetRef.offset mod 16) = 0) and
  2806. (
  2807. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2808. (TargetRef.base = current_procinfo.framepointer) or
  2809. ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
  2810. ) then
  2811. taicpu(hp3).opcode := MovAligned
  2812. else
  2813. taicpu(hp3).opcode := MovUnaligned;
  2814. taicpu(hp3).opsize := S_XMM;
  2815. taicpu(hp3).oper[0]^.reg := CurrentReg;
  2816. DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
  2817. RemoveInstruction(hp1);
  2818. RemoveCurrentP(p, hp2);
  2819. Result := True;
  2820. Exit;
  2821. end;
  2822. end;
  2823. end;
  2824. end;
  2825. end;
  2826. {$endif x86_64}
  2827. end;
  2828. else
  2829. { The write target should be a reg or a ref }
  2830. InternalError(2021091601);
  2831. end;
  2832. else
  2833. ;
  2834. end
  2835. else
  2836. { %treg is used afterwards, but all eventualities
  2837. other than the first MOV instruction being a constant
  2838. are covered by DeepMOVOpt, so only check for that }
  2839. if (taicpu(p).oper[0]^.typ = top_const) and
  2840. (
  2841. { For MOV operations, a size saving is only made if the register/const is byte-sized }
  2842. not (cs_opt_size in current_settings.optimizerswitches) or
  2843. (taicpu(hp1).opsize = S_B)
  2844. ) and
  2845. (
  2846. (taicpu(hp1).oper[1]^.typ = top_reg) or
  2847. ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
  2848. ) then
  2849. begin
  2850. DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
  2851. taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
  2852. end;
  2853. end;
  2854. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2855. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2856. { mov reg1, mem1 or mov mem1, reg1
  2857. mov mem2, reg2 mov reg2, mem2}
  2858. begin
  2859. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2860. { mov reg1, mem1 or mov mem1, reg1
  2861. mov mem2, reg1 mov reg2, mem1}
  2862. begin
  2863. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2864. { Removes the second statement from
  2865. mov reg1, mem1/reg2
  2866. mov mem1/reg2, reg1 }
  2867. begin
  2868. if taicpu(p).oper[0]^.typ=top_reg then
  2869. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2870. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  2871. RemoveInstruction(hp1);
  2872. Result:=true;
  2873. exit;
  2874. end
  2875. else
  2876. begin
  2877. TransferUsedRegs(TmpUsedRegs);
  2878. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2879. if (taicpu(p).oper[1]^.typ = top_ref) and
  2880. { mov reg1, mem1
  2881. mov mem2, reg1 }
  2882. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  2883. GetNextInstruction(hp1, hp2) and
  2884. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  2885. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2886. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  2887. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  2888. { change to
  2889. mov reg1, mem1 mov reg1, mem1
  2890. mov mem2, reg1 cmp reg1, mem2
  2891. cmp mem1, reg1
  2892. }
  2893. begin
  2894. RemoveInstruction(hp2);
  2895. taicpu(hp1).opcode := A_CMP;
  2896. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  2897. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  2898. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  2899. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  2900. end;
  2901. end;
  2902. end
  2903. else if (taicpu(p).oper[1]^.typ=top_ref) and
  2904. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2905. begin
  2906. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  2907. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  2908. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  2909. end
  2910. else
  2911. begin
  2912. TransferUsedRegs(TmpUsedRegs);
  2913. if GetNextInstruction(hp1, hp2) and
  2914. MatchOpType(taicpu(p),top_ref,top_reg) and
  2915. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2916. (taicpu(hp1).oper[1]^.typ = top_ref) and
  2917. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  2918. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  2919. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  2920. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  2921. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  2922. { mov mem1, %reg1
  2923. mov %reg1, mem2
  2924. mov mem2, reg2
  2925. to:
  2926. mov mem1, reg2
  2927. mov reg2, mem2}
  2928. begin
  2929. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  2930. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  2931. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  2932. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2933. RemoveInstruction(hp2);
  2934. Result := True;
  2935. end
  2936. {$ifdef i386}
  2937. { this is enabled for i386 only, as the rules to create the reg sets below
  2938. are too complicated for x86-64, so this makes this code too error prone
  2939. on x86-64
  2940. }
  2941. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  2942. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  2943. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  2944. { mov mem1, reg1 mov mem1, reg1
  2945. mov reg1, mem2 mov reg1, mem2
  2946. mov mem2, reg2 mov mem2, reg1
  2947. to: to:
  2948. mov mem1, reg1 mov mem1, reg1
  2949. mov mem1, reg2 mov reg1, mem2
  2950. mov reg1, mem2
  2951. or (if mem1 depends on reg1
  2952. and/or if mem2 depends on reg2)
  2953. to:
  2954. mov mem1, reg1
  2955. mov reg1, mem2
  2956. mov reg1, reg2
  2957. }
  2958. begin
  2959. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2960. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  2961. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  2962. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  2963. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  2964. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2965. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  2966. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  2967. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  2968. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  2969. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  2970. end
  2971. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  2972. begin
  2973. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  2974. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  2975. end
  2976. else
  2977. begin
  2978. RemoveInstruction(hp2);
  2979. end
  2980. {$endif i386}
  2981. ;
  2982. end;
  2983. end
  2984. { movl [mem1],reg1
  2985. movl [mem1],reg2
  2986. to
  2987. movl [mem1],reg1
  2988. movl reg1,reg2
  2989. }
  2990. else if MatchOpType(taicpu(p),top_ref,top_reg) and
  2991. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  2992. (taicpu(p).opsize = taicpu(hp1).opsize) and
  2993. RefsEqual(taicpu(p).oper[0]^.ref^,taicpu(hp1).oper[0]^.ref^) and
  2994. (taicpu(p).oper[0]^.ref^.volatility=[]) and
  2995. (taicpu(hp1).oper[0]^.ref^.volatility=[]) and
  2996. not(SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^.base)) and
  2997. not(SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^.index)) then
  2998. begin
  2999. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 2',p);
  3000. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  3001. end;
  3002. { movl const1,[mem1]
  3003. movl [mem1],reg1
  3004. to
  3005. movl const1,reg1
  3006. movl reg1,[mem1]
  3007. }
  3008. if MatchOpType(Taicpu(p),top_const,top_ref) and
  3009. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  3010. (taicpu(p).opsize = taicpu(hp1).opsize) and
  3011. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  3012. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  3013. begin
  3014. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  3015. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  3016. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  3017. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  3018. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  3019. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  3020. Result:=true;
  3021. exit;
  3022. end;
  3023. { mov x,reg1; mov y,reg1 -> mov y,reg1 is handled by the Mov2Nop 5 optimisation }
  3024. end;
  3025. { search further than the next instruction for a mov (as long as it's not a jump) }
  3026. if not is_calljmpuncondret(taicpu(hp1).opcode) and
  3027. { check as much as possible before the expensive GetNextInstructionUsingRegCond call }
  3028. (taicpu(p).oper[1]^.typ = top_reg) and
  3029. (taicpu(p).oper[0]^.typ in [top_reg,top_const]) and
  3030. not RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp1) then
  3031. begin
  3032. { we work with hp2 here, so hp1 can be still used later on when
  3033. checking for GetNextInstruction_p }
  3034. hp3 := hp1;
  3035. { Initialise CrossJump (if it becomes True at any point, it will remain True) }
  3036. CrossJump := (taicpu(hp1).opcode = A_Jcc);
  3037. { Saves on a large number of dereferences }
  3038. ActiveReg := taicpu(p).oper[1]^.reg;
  3039. while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and
  3040. { GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
  3041. (hp2.typ=ait_instruction) do
  3042. begin
  3043. case taicpu(hp2).opcode of
  3044. A_MOV:
  3045. if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
  3046. ((taicpu(p).oper[0]^.typ=top_const) or
  3047. ((taicpu(p).oper[0]^.typ=top_reg) and
  3048. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2))
  3049. )
  3050. ) then
  3051. begin
  3052. { we have
  3053. mov x, %treg
  3054. mov %treg, y
  3055. }
  3056. TransferUsedRegs(TmpUsedRegs);
  3057. TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
  3058. { We don't need to call UpdateUsedRegs for every instruction between
  3059. p and hp2 because the register we're concerned about will not
  3060. become deallocated (otherwise GetNextInstructionUsingReg would
  3061. have stopped at an earlier instruction). [Kit] }
  3062. TempRegUsed :=
  3063. CrossJump { Assume the register is in use if it crossed a conditional jump } or
  3064. RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) or
  3065. RegReadByInstruction(ActiveReg, hp1);
  3066. case taicpu(p).oper[0]^.typ Of
  3067. top_reg:
  3068. begin
  3069. { change
  3070. mov %reg, %treg
  3071. mov %treg, y
  3072. to
  3073. mov %reg, y
  3074. }
  3075. CurrentReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
  3076. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  3077. if taicpu(hp2).oper[1]^.reg = CurrentReg then
  3078. begin
  3079. { %reg = y - remove hp2 completely (doing it here instead of relying on
  3080. the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
  3081. if TempRegUsed then
  3082. begin
  3083. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
  3084. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  3085. { Set the start of the next GetNextInstructionUsingRegCond search
  3086. to start at the entry right before hp2 (which is about to be removed) }
  3087. hp3 := tai(hp2.Previous);
  3088. RemoveInstruction(hp2);
  3089. { See if there's more we can optimise }
  3090. Continue;
  3091. end
  3092. else
  3093. begin
  3094. RemoveInstruction(hp2);
  3095. { We can remove the original MOV too }
  3096. DebugMsg(SPeepholeOptimization + 'MovMov2NopNop 6b done',p);
  3097. RemoveCurrentP(p, hp1);
  3098. Result:=true;
  3099. Exit;
  3100. end;
  3101. end
  3102. else
  3103. begin
  3104. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  3105. taicpu(hp2).loadReg(0, CurrentReg);
  3106. if TempRegUsed then
  3107. begin
  3108. { Don't remove the first instruction if the temporary register is in use }
  3109. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(CurrentReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
  3110. { No need to set Result to True. If there's another instruction later on
  3111. that can be optimised, it will be detected when the main Pass 1 loop
  3112. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  3113. end
  3114. else
  3115. begin
  3116. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
  3117. RemoveCurrentP(p, hp1);
  3118. Result:=true;
  3119. Exit;
  3120. end;
  3121. end;
  3122. end;
  3123. top_const:
  3124. if not (cs_opt_size in current_settings.optimizerswitches) or (taicpu(hp2).opsize = S_B) then
  3125. begin
  3126. { change
  3127. mov const, %treg
  3128. mov %treg, y
  3129. to
  3130. mov const, y
  3131. }
  3132. if (taicpu(hp2).oper[1]^.typ=top_reg) or
  3133. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  3134. begin
  3135. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  3136. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  3137. if TempRegUsed then
  3138. begin
  3139. { Don't remove the first instruction if the temporary register is in use }
  3140. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 7a)',hp2);
  3141. { No need to set Result to True. If there's another instruction later on
  3142. that can be optimised, it will be detected when the main Pass 1 loop
  3143. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  3144. end
  3145. else
  3146. begin
  3147. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 7 done',p);
  3148. RemoveCurrentP(p, hp1);
  3149. Result:=true;
  3150. Exit;
  3151. end;
  3152. end;
  3153. end;
  3154. else
  3155. Internalerror(2019103001);
  3156. end;
  3157. end
  3158. else
  3159. if MatchOperand(taicpu(hp2).oper[1]^, ActiveReg) then
  3160. begin
  3161. if not CrossJump and
  3162. not RegUsedBetween(ActiveReg, p, hp2) and
  3163. not RegReadByInstruction(ActiveReg, hp2) then
  3164. begin
  3165. { Register is not used before it is overwritten }
  3166. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
  3167. RemoveCurrentp(p, hp1);
  3168. Result := True;
  3169. Exit;
  3170. end;
  3171. if (taicpu(p).oper[0]^.typ = top_const) and
  3172. (taicpu(hp2).oper[0]^.typ = top_const) then
  3173. begin
  3174. if taicpu(p).oper[0]^.val = taicpu(hp2).oper[0]^.val then
  3175. begin
  3176. { Same value - register hasn't changed }
  3177. DebugMsg(SPeepholeOptimization + 'Mov2Nop 2 done', hp2);
  3178. RemoveInstruction(hp2);
  3179. Result := True;
  3180. { See if there's more we can optimise }
  3181. Continue;
  3182. end;
  3183. end;
  3184. end;
  3185. A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
  3186. if MatchOpType(taicpu(hp2), top_reg, top_reg) and
  3187. MatchOperand(taicpu(hp2).oper[0]^, ActiveReg) and
  3188. SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, ActiveReg) then
  3189. begin
  3190. {
  3191. Change from:
  3192. mov ###, %reg
  3193. ...
  3194. movs/z %reg,%reg (Same register, just different sizes)
  3195. To:
  3196. movs/z ###, %reg (Longer version)
  3197. ...
  3198. (remove)
  3199. }
  3200. DebugMsg(SPeepholeOptimization + 'MovMovs/z2Mov/s/z done', p);
  3201. taicpu(p).oper[1]^.reg := taicpu(hp2).oper[1]^.reg;
  3202. { Keep the first instruction as mov if ### is a constant }
  3203. if taicpu(p).oper[0]^.typ = top_const then
  3204. taicpu(p).opsize := reg2opsize(taicpu(hp2).oper[1]^.reg)
  3205. else
  3206. begin
  3207. taicpu(p).opcode := taicpu(hp2).opcode;
  3208. taicpu(p).opsize := taicpu(hp2).opsize;
  3209. end;
  3210. DebugMsg(SPeepholeOptimization + 'Removed movs/z instruction and extended earlier write (MovMovs/z2Mov/s/z)', hp2);
  3211. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp2, UsedRegs);
  3212. RemoveInstruction(hp2);
  3213. Result := True;
  3214. Exit;
  3215. end;
  3216. else
  3217. if MatchOpType(taicpu(p), top_reg, top_reg) then
  3218. begin
  3219. TransferUsedRegs(TmpUsedRegs);
  3220. TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
  3221. if
  3222. not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1) and
  3223. not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, hp2) and
  3224. DeepMovOpt(taicpu(p), taicpu(hp2)) then
  3225. begin
  3226. { Just in case something didn't get modified (e.g. an
  3227. implicit register) }
  3228. if not RegReadByInstruction(ActiveReg, hp2) and
  3229. { If a conditional jump was crossed, do not delete
  3230. the original MOV no matter what }
  3231. not CrossJump then
  3232. begin
  3233. TransferUsedRegs(TmpUsedRegs);
  3234. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3235. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3236. if
  3237. { Make sure the original register isn't still present
  3238. and has been written to (e.g. with SHRX) }
  3239. RegLoadedWithNewValue(ActiveReg, hp2) or
  3240. not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then
  3241. begin
  3242. RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
  3243. { We can remove the original MOV }
  3244. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
  3245. RemoveCurrentp(p, hp1);
  3246. Result := True;
  3247. Exit;
  3248. end
  3249. else
  3250. begin
  3251. { See if there's more we can optimise }
  3252. hp3 := hp2;
  3253. Continue;
  3254. end;
  3255. end;
  3256. end;
  3257. end;
  3258. end;
  3259. { Break out of the while loop under normal circumstances }
  3260. Break;
  3261. end;
  3262. end;
  3263. if (aoc_MovAnd2Mov_3 in OptsToCheck) and
  3264. (taicpu(p).oper[1]^.typ = top_reg) and
  3265. (taicpu(p).opsize = S_L) and
  3266. GetNextInstructionUsingRegTrackingUse(p,hp2,taicpu(p).oper[1]^.reg) and
  3267. (taicpu(hp2).opcode = A_AND) and
  3268. (MatchOpType(taicpu(hp2),top_const,top_reg) or
  3269. (MatchOpType(taicpu(hp2),top_reg,top_reg) and
  3270. MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^))
  3271. ) then
  3272. begin
  3273. if SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp2).oper[1]^.reg) then
  3274. begin
  3275. if ((taicpu(hp2).oper[0]^.typ=top_const) and (taicpu(hp2).oper[0]^.val = $ffffffff)) or
  3276. ((taicpu(hp2).oper[0]^.typ=top_reg) and (taicpu(hp2).opsize=S_L)) then
  3277. begin
  3278. { Optimize out:
  3279. mov x, %reg
  3280. and ffffffffh, %reg
  3281. }
  3282. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 3 done',p);
  3283. RemoveInstruction(hp2);
  3284. Result:=true;
  3285. exit;
  3286. end;
  3287. end;
  3288. end;
  3289. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  3290. x >= RetOffset) as it doesn't do anything (it writes either to a
  3291. parameter or to the temporary storage room for the function
  3292. result)
  3293. }
  3294. if IsExitCode(hp1) and
  3295. (taicpu(p).oper[1]^.typ = top_ref) and
  3296. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  3297. (
  3298. (
  3299. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  3300. not (
  3301. assigned(current_procinfo.procdef.funcretsym) and
  3302. (taicpu(p).oper[1]^.ref^.offset <= tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)
  3303. )
  3304. ) or
  3305. { Also discard writes to the stack that are below the base pointer,
  3306. as this is temporary storage rather than a function result on the
  3307. stack, say. }
  3308. (
  3309. (taicpu(p).oper[1]^.ref^.base = NR_STACK_POINTER_REG) and
  3310. (taicpu(p).oper[1]^.ref^.offset < current_procinfo.final_localsize)
  3311. )
  3312. ) then
  3313. begin
  3314. RemoveCurrentp(p, hp1);
  3315. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  3316. RemoveLastDeallocForFuncRes(p);
  3317. Result:=true;
  3318. exit;
  3319. end;
  3320. if MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) then
  3321. begin
  3322. if MatchOpType(taicpu(p),top_reg,top_ref) and
  3323. (taicpu(hp1).oper[1]^.typ = top_ref) and
  3324. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  3325. begin
  3326. { change
  3327. mov reg1, mem1
  3328. test/cmp x, mem1
  3329. to
  3330. mov reg1, mem1
  3331. test/cmp x, reg1
  3332. }
  3333. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  3334. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  3335. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  3336. Result := True;
  3337. Exit;
  3338. end;
  3339. if MatchOpType(taicpu(p),top_ref,top_reg) and
  3340. { The x86 assemblers have difficulty comparing values against absolute addresses }
  3341. (taicpu(p).oper[0]^.ref^.refaddr in [addr_no, addr_pic, addr_pic_no_got]) and
  3342. (taicpu(hp1).oper[0]^.typ <> top_ref) and
  3343. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  3344. (
  3345. (
  3346. (taicpu(hp1).opcode = A_TEST)
  3347. ) or (
  3348. (taicpu(hp1).opcode = A_CMP) and
  3349. { A sanity check more than anything }
  3350. not MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg)
  3351. )
  3352. ) then
  3353. begin
  3354. { change
  3355. mov mem, %reg
  3356. cmp/test x, %reg / test %reg,%reg
  3357. (reg deallocated)
  3358. to
  3359. cmp/test x, mem / cmp 0, mem
  3360. }
  3361. TransferUsedRegs(TmpUsedRegs);
  3362. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3363. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
  3364. begin
  3365. { Convert test %reg,%reg or test $-1,%reg to cmp $0,mem }
  3366. if (taicpu(hp1).opcode = A_TEST) and
  3367. (
  3368. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) or
  3369. MatchOperand(taicpu(hp1).oper[0]^, -1)
  3370. ) then
  3371. begin
  3372. taicpu(hp1).opcode := A_CMP;
  3373. taicpu(hp1).loadconst(0, 0);
  3374. end;
  3375. taicpu(hp1).loadref(1, taicpu(p).oper[0]^.ref^);
  3376. DebugMsg(SPeepholeOptimization + 'MOV/CMP -> CMP (memory check)', p);
  3377. RemoveCurrentP(p, hp1);
  3378. Result := True;
  3379. Exit;
  3380. end;
  3381. end;
  3382. end;
  3383. if MatchInstruction(hp1,A_LEA,[S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  3384. { If the flags register is in use, don't change the instruction to an
  3385. ADD otherwise this will scramble the flags. [Kit] }
  3386. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) then
  3387. begin
  3388. if MatchOpType(Taicpu(p),top_ref,top_reg) and
  3389. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  3390. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  3391. ) or
  3392. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  3393. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  3394. )
  3395. ) then
  3396. { mov reg1,ref
  3397. lea reg2,[reg1,reg2]
  3398. to
  3399. add reg2,ref}
  3400. begin
  3401. TransferUsedRegs(TmpUsedRegs);
  3402. { reg1 may not be used afterwards }
  3403. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  3404. begin
  3405. Taicpu(hp1).opcode:=A_ADD;
  3406. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  3407. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  3408. RemoveCurrentp(p, hp1);
  3409. result:=true;
  3410. exit;
  3411. end;
  3412. end;
  3413. { If the LEA instruction can be converted into an arithmetic instruction,
  3414. it may be possible to then fold it in the next optimisation, otherwise
  3415. there's nothing more that can be optimised here. }
  3416. if not ConvertLEA(taicpu(hp1)) then
  3417. Exit;
  3418. end;
  3419. if (taicpu(p).oper[1]^.typ = top_reg) and
  3420. (hp1.typ = ait_instruction) and
  3421. GetNextInstruction(hp1, hp2) and
  3422. MatchInstruction(hp2,A_MOV,[]) and
  3423. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  3424. (topsize2memsize[taicpu(hp1).opsize]>=topsize2memsize[taicpu(hp2).opsize]) and
  3425. (
  3426. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg)
  3427. {$ifdef x86_64}
  3428. or
  3429. (
  3430. (taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  3431. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ))
  3432. )
  3433. {$endif x86_64}
  3434. ) then
  3435. begin
  3436. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  3437. (taicpu(hp2).oper[0]^.typ=top_reg) then
  3438. { change movsX/movzX reg/ref, reg2
  3439. add/sub/or/... reg3/$const, reg2
  3440. mov reg2 reg/ref
  3441. dealloc reg2
  3442. to
  3443. add/sub/or/... reg3/$const, reg/ref }
  3444. begin
  3445. TransferUsedRegs(TmpUsedRegs);
  3446. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3447. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  3448. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  3449. begin
  3450. { by example:
  3451. movswl %si,%eax movswl %si,%eax p
  3452. decl %eax addl %edx,%eax hp1
  3453. movw %ax,%si movw %ax,%si hp2
  3454. ->
  3455. movswl %si,%eax movswl %si,%eax p
  3456. decw %eax addw %edx,%eax hp1
  3457. movw %ax,%si movw %ax,%si hp2
  3458. }
  3459. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  3460. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  3461. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  3462. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  3463. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3464. {
  3465. ->
  3466. movswl %si,%eax movswl %si,%eax p
  3467. decw %si addw %dx,%si hp1
  3468. movw %ax,%si movw %ax,%si hp2
  3469. }
  3470. case taicpu(hp1).ops of
  3471. 1:
  3472. begin
  3473. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  3474. if taicpu(hp1).oper[0]^.typ=top_reg then
  3475. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3476. end;
  3477. 2:
  3478. begin
  3479. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  3480. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  3481. (taicpu(hp1).opcode<>A_SHL) and
  3482. (taicpu(hp1).opcode<>A_SHR) and
  3483. (taicpu(hp1).opcode<>A_SAR) then
  3484. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3485. end;
  3486. else
  3487. internalerror(2008042701);
  3488. end;
  3489. {
  3490. ->
  3491. decw %si addw %dx,%si p
  3492. }
  3493. RemoveInstruction(hp2);
  3494. RemoveCurrentP(p, hp1);
  3495. Result:=True;
  3496. Exit;
  3497. end;
  3498. end;
  3499. if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  3500. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  3501. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  3502. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  3503. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  3504. )
  3505. {$ifdef i386}
  3506. { byte registers of esi, edi, ebp, esp are not available on i386 }
  3507. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  3508. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  3509. {$endif i386}
  3510. then
  3511. { change movsX/movzX reg/ref, reg2
  3512. add/sub/or/... regX/$const, reg2
  3513. mov reg2, reg3
  3514. dealloc reg2
  3515. to
  3516. movsX/movzX reg/ref, reg3
  3517. add/sub/or/... reg3/$const, reg3
  3518. }
  3519. begin
  3520. TransferUsedRegs(TmpUsedRegs);
  3521. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3522. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  3523. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  3524. begin
  3525. { by example:
  3526. movswl %si,%eax movswl %si,%eax p
  3527. decl %eax addl %edx,%eax hp1
  3528. movw %ax,%si movw %ax,%si hp2
  3529. ->
  3530. movswl %si,%eax movswl %si,%eax p
  3531. decw %eax addw %edx,%eax hp1
  3532. movw %ax,%si movw %ax,%si hp2
  3533. }
  3534. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  3535. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  3536. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  3537. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  3538. { limit size of constants as well to avoid assembler errors, but
  3539. check opsize to avoid overflow when left shifting the 1 }
  3540. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=63) then
  3541. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl topsize2memsize[taicpu(hp2).opsize])-1);
  3542. {$ifdef x86_64}
  3543. { Be careful of, for example:
  3544. movl %reg1,%reg2
  3545. addl %reg3,%reg2
  3546. movq %reg2,%reg4
  3547. This will cause problems if the upper 32-bits of %reg3 or %reg4 are non-zero
  3548. }
  3549. if (taicpu(hp1).opsize = S_L) and (taicpu(hp2).opsize = S_Q) then
  3550. begin
  3551. taicpu(hp2).changeopsize(S_L);
  3552. setsubreg(taicpu(hp2).oper[0]^.reg, R_SUBD);
  3553. setsubreg(taicpu(hp2).oper[1]^.reg, R_SUBD);
  3554. end;
  3555. {$endif x86_64}
  3556. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3557. taicpu(p).changeopsize(taicpu(hp2).opsize);
  3558. if taicpu(p).oper[0]^.typ=top_reg then
  3559. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3560. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  3561. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  3562. {
  3563. ->
  3564. movswl %si,%eax movswl %si,%eax p
  3565. decw %si addw %dx,%si hp1
  3566. movw %ax,%si movw %ax,%si hp2
  3567. }
  3568. case taicpu(hp1).ops of
  3569. 1:
  3570. begin
  3571. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  3572. if taicpu(hp1).oper[0]^.typ=top_reg then
  3573. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3574. end;
  3575. 2:
  3576. begin
  3577. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  3578. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  3579. (taicpu(hp1).opcode<>A_SHL) and
  3580. (taicpu(hp1).opcode<>A_SHR) and
  3581. (taicpu(hp1).opcode<>A_SAR) then
  3582. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3583. end;
  3584. else
  3585. internalerror(2018111801);
  3586. end;
  3587. {
  3588. ->
  3589. decw %si addw %dx,%si p
  3590. }
  3591. RemoveInstruction(hp2);
  3592. end;
  3593. end;
  3594. end;
  3595. if MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  3596. GetNextInstruction(hp1, hp2) and
  3597. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  3598. MatchOperand(Taicpu(p).oper[0]^,0) and
  3599. (Taicpu(p).oper[1]^.typ = top_reg) and
  3600. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  3601. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  3602. { mov reg1,0
  3603. bts reg1,operand1 --> mov reg1,operand2
  3604. or reg1,operand2 bts reg1,operand1}
  3605. begin
  3606. Taicpu(hp2).opcode:=A_MOV;
  3607. DebugMsg(SPeepholeOptimization + 'MovBtsOr2MovBts done',hp1);
  3608. asml.remove(hp1);
  3609. insertllitem(hp2,hp2.next,hp1);
  3610. RemoveCurrentp(p, hp1);
  3611. Result:=true;
  3612. exit;
  3613. end;
  3614. {
  3615. mov ref,reg0
  3616. <op> reg0,reg1
  3617. dealloc reg0
  3618. to
  3619. <op> ref,reg1
  3620. }
  3621. if MatchOpType(taicpu(p),top_ref,top_reg) and
  3622. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3623. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  3624. MatchInstruction(hp1,[A_AND,A_OR,A_XOR,A_ADD,A_SUB,A_CMP],[Taicpu(p).opsize]) and
  3625. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) and
  3626. RegEndOfLife(taicpu(p).oper[1]^.reg,taicpu(hp1)) then
  3627. begin
  3628. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  3629. DebugMsg(SPeepholeOptimization + 'MovOp2Op done',hp1);
  3630. RemoveCurrentp(p, hp1);
  3631. Result:=true;
  3632. exit;
  3633. end;
  3634. {$ifdef x86_64}
  3635. { Convert:
  3636. movq x(ref),%reg64
  3637. shrq y,%reg64
  3638. To:
  3639. movq x+4(ref),%reg32
  3640. shrq y-32,%reg32 (Remove if y = 32)
  3641. }
  3642. if (taicpu(p).opsize = S_Q) and
  3643. (taicpu(p).oper[0]^.typ = top_ref) and { Second operand will be a register }
  3644. (taicpu(p).oper[0]^.ref^.offset <= $7FFFFFFB) and
  3645. MatchInstruction(hp1, A_SHR, [taicpu(p).opsize]) and
  3646. MatchOpType(taicpu(hp1), top_const, top_reg) and
  3647. (taicpu(hp1).oper[0]^.val >= 32) and
  3648. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3649. begin
  3650. RegName1 := debug_regname(taicpu(hp1).oper[1]^.reg);
  3651. PreMessage := 'movq ' + debug_operstr(taicpu(p).oper[0]^) + ',' + RegName1 + '; ' +
  3652. 'shrq $' + debug_tostr(taicpu(hp1).oper[0]^.val) + ',' + RegName1 + ' -> movl ';
  3653. { Convert to 32-bit }
  3654. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3655. taicpu(p).opsize := S_L;
  3656. Inc(taicpu(p).oper[0]^.ref^.offset, 4);
  3657. PreMessage := PreMessage + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg);
  3658. if (taicpu(hp1).oper[0]^.val = 32) then
  3659. begin
  3660. DebugMsg(SPeepholeOptimization + PreMessage + ' (MovShr2Mov)', p);
  3661. RemoveInstruction(hp1);
  3662. end
  3663. else
  3664. begin
  3665. { This will potentially open up more arithmetic operations since
  3666. the peephole optimizer now has a big hint that only the lower
  3667. 32 bits are currently in use (and opcodes are smaller in size) }
  3668. setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
  3669. taicpu(hp1).opsize := S_L;
  3670. Dec(taicpu(hp1).oper[0]^.val, 32);
  3671. DebugMsg(SPeepholeOptimization + PreMessage +
  3672. '; shrl $' + debug_tostr(taicpu(hp1).oper[0]^.val) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' (MovShr2MovShr)', p);
  3673. end;
  3674. Result := True;
  3675. Exit;
  3676. end;
  3677. {$endif x86_64}
  3678. end;
  3679. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  3680. var
  3681. hp1 : tai;
  3682. begin
  3683. Result:=false;
  3684. if taicpu(p).ops <> 2 then
  3685. exit;
  3686. if GetNextInstruction(p,hp1) and
  3687. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  3688. (taicpu(hp1).ops = 2) then
  3689. begin
  3690. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  3691. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  3692. { movXX reg1, mem1 or movXX mem1, reg1
  3693. movXX mem2, reg2 movXX reg2, mem2}
  3694. begin
  3695. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  3696. { movXX reg1, mem1 or movXX mem1, reg1
  3697. movXX mem2, reg1 movXX reg2, mem1}
  3698. begin
  3699. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  3700. begin
  3701. { Removes the second statement from
  3702. movXX reg1, mem1/reg2
  3703. movXX mem1/reg2, reg1
  3704. }
  3705. if taicpu(p).oper[0]^.typ=top_reg then
  3706. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  3707. { Removes the second statement from
  3708. movXX mem1/reg1, reg2
  3709. movXX reg2, mem1/reg1
  3710. }
  3711. if (taicpu(p).oper[1]^.typ=top_reg) and
  3712. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  3713. begin
  3714. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  3715. RemoveInstruction(hp1);
  3716. RemoveCurrentp(p); { p will now be equal to the instruction that follows what was hp1 }
  3717. end
  3718. else
  3719. begin
  3720. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  3721. RemoveInstruction(hp1);
  3722. end;
  3723. Result:=true;
  3724. exit;
  3725. end
  3726. end;
  3727. end;
  3728. end;
  3729. end;
  3730. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  3731. var
  3732. hp1 : tai;
  3733. begin
  3734. result:=false;
  3735. { replace
  3736. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  3737. MovX %mreg2,%mreg1
  3738. dealloc %mreg2
  3739. by
  3740. <Op>X %mreg2,%mreg1
  3741. ?
  3742. }
  3743. if GetNextInstruction(p,hp1) and
  3744. { we mix single and double opperations here because we assume that the compiler
  3745. generates vmovapd only after double operations and vmovaps only after single operations }
  3746. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  3747. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  3748. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  3749. (taicpu(p).oper[0]^.typ=top_reg) then
  3750. begin
  3751. TransferUsedRegs(TmpUsedRegs);
  3752. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3753. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3754. begin
  3755. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  3756. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  3757. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  3758. RemoveInstruction(hp1);
  3759. result:=true;
  3760. end;
  3761. end;
  3762. end;
  3763. function TX86AsmOptimizer.OptPass1Test(var p: tai) : boolean;
  3764. var
  3765. hp1, p_label, p_dist, hp1_dist: tai;
  3766. JumpLabel, JumpLabel_dist: TAsmLabel;
  3767. begin
  3768. Result := False;
  3769. if GetNextInstruction(p, hp1) and
  3770. MatchInstruction(hp1,A_MOV,[]) and
  3771. (
  3772. (taicpu(p).oper[0]^.typ <> top_reg) or
  3773. not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
  3774. ) and
  3775. (
  3776. (taicpu(p).oper[1]^.typ <> top_reg) or
  3777. not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
  3778. ) and
  3779. (
  3780. { Make sure the register written to doesn't appear in the
  3781. test instruction (in a reference, say) }
  3782. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  3783. not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
  3784. ) then
  3785. begin
  3786. { If we have something like:
  3787. test %reg1,%reg1
  3788. mov 0,%reg2
  3789. And no registers are shared (the two %reg1's can be different, as
  3790. long as neither of them are also %reg2), move the MOV command to
  3791. before the comparison as this means it can be optimised without
  3792. worrying about the FLAGS register. (This combination is generated
  3793. by "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
  3794. }
  3795. SwapMovCmp(p, hp1);
  3796. Result := True;
  3797. Exit;
  3798. end;
  3799. { Search for:
  3800. test %reg,%reg
  3801. j(c1) @lbl1
  3802. ...
  3803. @lbl:
  3804. test %reg,%reg (same register)
  3805. j(c2) @lbl2
  3806. If c2 is a subset of c1, change to:
  3807. test %reg,%reg
  3808. j(c1) @lbl2
  3809. (@lbl1 may become a dead label as a result)
  3810. }
  3811. if (taicpu(p).oper[1]^.typ = top_reg) and
  3812. (taicpu(p).oper[0]^.typ = top_reg) and
  3813. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  3814. MatchInstruction(hp1, A_JCC, []) and
  3815. IsJumpToLabel(taicpu(hp1)) then
  3816. begin
  3817. JumpLabel := TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol);
  3818. p_label := nil;
  3819. if Assigned(JumpLabel) then
  3820. p_label := getlabelwithsym(JumpLabel);
  3821. if Assigned(p_label) and
  3822. GetNextInstruction(p_label, p_dist) and
  3823. MatchInstruction(p_dist, A_TEST, []) and
  3824. { It's fine if the second test uses smaller sub-registers }
  3825. (taicpu(p_dist).opsize <= taicpu(p).opsize) and
  3826. MatchOpType(taicpu(p_dist), top_reg, top_reg) and
  3827. SuperRegistersEqual(taicpu(p_dist).oper[0]^.reg, taicpu(p).oper[0]^.reg) and
  3828. SuperRegistersEqual(taicpu(p_dist).oper[1]^.reg, taicpu(p).oper[1]^.reg) and
  3829. GetNextInstruction(p_dist, hp1_dist) and
  3830. MatchInstruction(hp1_dist, A_JCC, []) then { This doesn't have to be an explicit label }
  3831. begin
  3832. JumpLabel_dist := TAsmLabel(taicpu(hp1_dist).oper[0]^.ref^.symbol);
  3833. if JumpLabel = JumpLabel_dist then
  3834. { This is an infinite loop }
  3835. Exit;
  3836. { Best optimisation when the first condition is a subset (or equal) of the second }
  3837. if condition_in(taicpu(hp1).condition, taicpu(hp1_dist).condition) then
  3838. begin
  3839. { Any registers used here will already be allocated }
  3840. if Assigned(JumpLabel_dist) then
  3841. JumpLabel_dist.IncRefs;
  3842. if Assigned(JumpLabel) then
  3843. JumpLabel.DecRefs;
  3844. DebugMsg(SPeepholeOptimization + 'TEST/Jcc/@Lbl/TEST/Jcc -> TEST/Jcc, redirecting first jump', hp1);
  3845. taicpu(hp1).loadref(0, taicpu(hp1_dist).oper[0]^.ref^);
  3846. Result := True;
  3847. Exit;
  3848. end;
  3849. end;
  3850. end;
  3851. end;
  3852. function TX86AsmOptimizer.OptPass1Add(var p : tai) : boolean;
  3853. var
  3854. hp1 : tai;
  3855. begin
  3856. result:=false;
  3857. { replace
  3858. addX const,%reg1
  3859. leaX (%reg1,%reg1,Y),%reg2 // Base or index might not be equal to reg1
  3860. dealloc %reg1
  3861. by
  3862. leaX const+const*Y(%reg1,%reg1,Y),%reg2
  3863. }
  3864. if MatchOpType(taicpu(p),top_const,top_reg) and
  3865. GetNextInstruction(p,hp1) and
  3866. MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
  3867. ((taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.base) or
  3868. (taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index)) then
  3869. begin
  3870. TransferUsedRegs(TmpUsedRegs);
  3871. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3872. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3873. begin
  3874. DebugMsg(SPeepholeOptimization + 'AddLea2Lea done',p);
  3875. if taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.base then
  3876. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.val);
  3877. if taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index then
  3878. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.val*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
  3879. RemoveCurrentP(p);
  3880. result:=true;
  3881. end;
  3882. end;
  3883. end;
  3884. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  3885. var
  3886. hp1: tai;
  3887. ref: Integer;
  3888. saveref: treference;
  3889. TempReg: TRegister;
  3890. Multiple: TCGInt;
  3891. begin
  3892. Result:=false;
  3893. { removes seg register prefixes from LEA operations, as they
  3894. don't do anything}
  3895. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  3896. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  3897. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  3898. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  3899. (
  3900. { do not mess with leas accessing the stack pointer
  3901. unless it's a null operation }
  3902. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) or
  3903. (
  3904. (taicpu(p).oper[0]^.ref^.base = NR_STACK_POINTER_REG) and
  3905. (taicpu(p).oper[0]^.ref^.offset = 0)
  3906. )
  3907. ) and
  3908. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  3909. begin
  3910. if (taicpu(p).oper[0]^.ref^.offset = 0) then
  3911. begin
  3912. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) then
  3913. begin
  3914. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  3915. taicpu(p).oper[1]^.reg);
  3916. InsertLLItem(p.previous,p.next, hp1);
  3917. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  3918. p.free;
  3919. p:=hp1;
  3920. end
  3921. else
  3922. begin
  3923. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  3924. RemoveCurrentP(p);
  3925. end;
  3926. Result:=true;
  3927. exit;
  3928. end
  3929. else if (
  3930. { continue to use lea to adjust the stack pointer,
  3931. it is the recommended way, but only if not optimizing for size }
  3932. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  3933. (cs_opt_size in current_settings.optimizerswitches)
  3934. ) and
  3935. { If the flags register is in use, don't change the instruction
  3936. to an ADD otherwise this will scramble the flags. [Kit] }
  3937. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  3938. ConvertLEA(taicpu(p)) then
  3939. begin
  3940. Result:=true;
  3941. exit;
  3942. end;
  3943. end;
  3944. if GetNextInstruction(p,hp1) and
  3945. (hp1.typ=ait_instruction) then
  3946. begin
  3947. if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  3948. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  3949. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  3950. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  3951. begin
  3952. TransferUsedRegs(TmpUsedRegs);
  3953. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3954. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3955. begin
  3956. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  3957. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  3958. RemoveInstruction(hp1);
  3959. result:=true;
  3960. exit;
  3961. end;
  3962. end;
  3963. { changes
  3964. lea <ref1>, reg1
  3965. <op> ...,<ref. with reg1>,...
  3966. to
  3967. <op> ...,<ref1>,... }
  3968. if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
  3969. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
  3970. not(MatchInstruction(hp1,A_LEA,[])) then
  3971. begin
  3972. { find a reference which uses reg1 }
  3973. if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
  3974. ref:=0
  3975. else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
  3976. ref:=1
  3977. else
  3978. ref:=-1;
  3979. if (ref<>-1) and
  3980. { reg1 must be either the base or the index }
  3981. ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
  3982. begin
  3983. { reg1 can be removed from the reference }
  3984. saveref:=taicpu(hp1).oper[ref]^.ref^;
  3985. if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
  3986. taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
  3987. else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
  3988. taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
  3989. else
  3990. Internalerror(2019111201);
  3991. { check if the can insert all data of the lea into the second instruction }
  3992. if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
  3993. ((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
  3994. ((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
  3995. ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
  3996. ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
  3997. ((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
  3998. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
  3999. {$ifdef x86_64}
  4000. and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
  4001. and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
  4002. ((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
  4003. )
  4004. {$endif x86_64}
  4005. then
  4006. begin
  4007. { reg1 might not used by the second instruction after it is remove from the reference }
  4008. if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
  4009. begin
  4010. TransferUsedRegs(TmpUsedRegs);
  4011. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  4012. { reg1 is not updated so it might not be used afterwards }
  4013. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  4014. begin
  4015. DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
  4016. if taicpu(p).oper[0]^.ref^.base<>NR_NO then
  4017. taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  4018. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  4019. taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  4020. if taicpu(p).oper[0]^.ref^.symbol<>nil then
  4021. taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
  4022. if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
  4023. taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
  4024. if taicpu(p).oper[0]^.ref^.scalefactor > 1 then
  4025. taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  4026. inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  4027. RemoveCurrentP(p, hp1);
  4028. result:=true;
  4029. exit;
  4030. end
  4031. end;
  4032. end;
  4033. { recover }
  4034. taicpu(hp1).oper[ref]^.ref^:=saveref;
  4035. end;
  4036. end;
  4037. end;
  4038. { for now, we do not mess with the stack pointer, thought it might be usefull to remove
  4039. unneeded lea sequences on the stack pointer, it needs to be tested in detail }
  4040. if (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  4041. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  4042. begin
  4043. { Check common LEA/LEA conditions }
  4044. if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
  4045. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
  4046. (taicpu(p).oper[0]^.ref^.relsymbol = nil) and
  4047. (taicpu(p).oper[0]^.ref^.segment = NR_NO) and
  4048. (taicpu(p).oper[0]^.ref^.symbol = nil) and
  4049. (taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
  4050. (taicpu(hp1).oper[0]^.ref^.segment = NR_NO) and
  4051. (taicpu(hp1).oper[0]^.ref^.symbol = nil) and
  4052. (
  4053. (taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
  4054. not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1))
  4055. ) and (
  4056. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) or { Don't call RegModifiedBetween unnecessarily }
  4057. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4058. not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1))
  4059. ) then
  4060. begin
  4061. { changes
  4062. lea (regX,scale), reg1
  4063. lea offset(reg1,reg1), reg1
  4064. to
  4065. lea offset(regX,scale*2), reg1
  4066. and
  4067. lea (regX,scale1), reg1
  4068. lea offset(reg1,scale2), reg1
  4069. to
  4070. lea offset(regX,scale1*scale2), reg1
  4071. ... so long as the final scale does not exceed 8
  4072. (Similarly, allow the first instruction to be "lea (regX,regX),reg1")
  4073. }
  4074. if (taicpu(p).oper[0]^.ref^.offset = 0) and
  4075. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4076. (
  4077. (
  4078. (taicpu(p).oper[0]^.ref^.base = NR_NO)
  4079. ) or (
  4080. (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
  4081. (
  4082. (taicpu(p).oper[0]^.ref^.base = taicpu(p).oper[0]^.ref^.index) and
  4083. not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index, p, hp1))
  4084. )
  4085. )
  4086. ) and (
  4087. (
  4088. { lea (reg1,scale2), reg1 variant }
  4089. (taicpu(hp1).oper[0]^.ref^.base = NR_NO) and
  4090. (
  4091. (
  4092. (taicpu(p).oper[0]^.ref^.base = NR_NO) and
  4093. (taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor <= 8)
  4094. ) or (
  4095. { lea (regX,regX), reg1 variant }
  4096. (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  4097. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 4)
  4098. )
  4099. )
  4100. ) or (
  4101. { lea (reg1,reg1), reg1 variant }
  4102. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  4103. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1)
  4104. )
  4105. ) then
  4106. begin
  4107. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
  4108. { Make everything homogeneous to make calculations easier }
  4109. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) then
  4110. begin
  4111. if taicpu(p).oper[0]^.ref^.index <> NR_NO then
  4112. { Convert lea (regX,regX),reg1 to lea (regX,2),reg1 }
  4113. taicpu(p).oper[0]^.ref^.scalefactor := 2
  4114. else
  4115. taicpu(p).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.base;
  4116. taicpu(p).oper[0]^.ref^.base := NR_NO;
  4117. end;
  4118. if (taicpu(hp1).oper[0]^.ref^.base = NR_NO) then
  4119. begin
  4120. { Just to prevent miscalculations }
  4121. if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
  4122. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor
  4123. else
  4124. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor;
  4125. end
  4126. else
  4127. begin
  4128. taicpu(hp1).oper[0]^.ref^.base := NR_NO;
  4129. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor * 2;
  4130. end;
  4131. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
  4132. RemoveCurrentP(p);
  4133. result:=true;
  4134. exit;
  4135. end
  4136. { changes
  4137. lea offset1(regX), reg1
  4138. lea offset2(reg1), reg1
  4139. to
  4140. lea offset1+offset2(regX), reg1 }
  4141. else if
  4142. (
  4143. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4144. (taicpu(p).oper[0]^.ref^.index = NR_NO)
  4145. ) or (
  4146. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  4147. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
  4148. (
  4149. (
  4150. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4151. (taicpu(p).oper[0]^.ref^.base = NR_NO)
  4152. ) or (
  4153. (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
  4154. (
  4155. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4156. (
  4157. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
  4158. (
  4159. (taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
  4160. (taicpu(hp1).oper[0]^.ref^.base = NR_NO)
  4161. )
  4162. )
  4163. )
  4164. )
  4165. )
  4166. ) then
  4167. begin
  4168. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
  4169. if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
  4170. begin
  4171. taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
  4172. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
  4173. { if the register is used as index and base, we have to increase for base as well
  4174. and adapt base }
  4175. if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
  4176. begin
  4177. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  4178. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  4179. end;
  4180. end
  4181. else
  4182. begin
  4183. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  4184. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  4185. end;
  4186. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  4187. begin
  4188. taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
  4189. taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  4190. taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  4191. end;
  4192. RemoveCurrentP(p);
  4193. result:=true;
  4194. exit;
  4195. end;
  4196. end;
  4197. { Change:
  4198. leal/q $x(%reg1),%reg2
  4199. ...
  4200. shll/q $y,%reg2
  4201. To:
  4202. leal/q $(x+2^y)(%reg1,2^y),%reg2 (if y <= 3)
  4203. }
  4204. if MatchInstruction(hp1, A_SHL, [taicpu(p).opsize]) and
  4205. MatchOpType(taicpu(hp1), top_const, top_reg) and
  4206. (taicpu(hp1).oper[0]^.val <= 3) then
  4207. begin
  4208. Multiple := 1 shl taicpu(hp1).oper[0]^.val;
  4209. TransferUsedRegs(TmpUsedRegs);
  4210. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  4211. TempReg := taicpu(hp1).oper[1]^.reg; { Store locally to reduce the number of dereferences }
  4212. if
  4213. { This allows the optimisation in some circumstances even if the lea instruction already has a scale factor
  4214. (this works even if scalefactor is zero) }
  4215. ((Multiple * taicpu(p).oper[0]^.ref^.scalefactor) <= 8) and
  4216. { Ensure offset doesn't go out of bounds }
  4217. (abs(taicpu(p).oper[0]^.ref^.offset * Multiple) <= $7FFFFFFF) and
  4218. not (RegInUsedRegs(NR_DEFAULTFLAGS,TmpUsedRegs)) and
  4219. MatchOperand(taicpu(p).oper[1]^, TempReg) and
  4220. (
  4221. (
  4222. not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.base, TempReg) and
  4223. (
  4224. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4225. (taicpu(p).oper[0]^.ref^.index = NR_INVALID) or
  4226. (
  4227. { Check for lea $x(%reg1,%reg1),%reg2 and treat as it it were lea $x(%reg1,2),%reg2 }
  4228. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
  4229. (taicpu(p).oper[0]^.ref^.scalefactor <= 1)
  4230. )
  4231. )
  4232. ) or (
  4233. (
  4234. (taicpu(p).oper[0]^.ref^.base = NR_NO) or
  4235. (taicpu(p).oper[0]^.ref^.base = NR_INVALID)
  4236. ) and
  4237. not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.index, TempReg)
  4238. )
  4239. ) then
  4240. begin
  4241. repeat
  4242. with taicpu(p).oper[0]^.ref^ do
  4243. begin
  4244. { Convert lea $x(%reg1,%reg1),%reg2 to lea $x(%reg1,2),%reg2 }
  4245. if index = base then
  4246. begin
  4247. if Multiple > 4 then
  4248. { Optimisation will no longer work because resultant
  4249. scale factor will exceed 8 }
  4250. Break;
  4251. base := NR_NO;
  4252. scalefactor := 2;
  4253. DebugMsg(SPeepholeOptimization + 'lea $x(%reg1,%reg1),%reg2 -> lea $x(%reg1,2),%reg2 for following optimisation', p);
  4254. end
  4255. else if (base <> NR_NO) and (base <> NR_INVALID) then
  4256. begin
  4257. { Scale factor only works on the index register }
  4258. index := base;
  4259. base := NR_NO;
  4260. end;
  4261. { For safety }
  4262. if scalefactor <= 1 then
  4263. begin
  4264. DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 1', p);
  4265. scalefactor := Multiple;
  4266. end
  4267. else
  4268. begin
  4269. DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 2', p);
  4270. scalefactor := scalefactor * Multiple;
  4271. end;
  4272. offset := offset * Multiple;
  4273. end;
  4274. RemoveInstruction(hp1);
  4275. Result := True;
  4276. Exit;
  4277. { This repeat..until loop exists for the benefit of Break }
  4278. until True;
  4279. end;
  4280. end;
  4281. end;
  4282. end;
  4283. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  4284. var
  4285. hp1 : tai;
  4286. begin
  4287. DoSubAddOpt := False;
  4288. if GetLastInstruction(p, hp1) and
  4289. (hp1.typ = ait_instruction) and
  4290. (taicpu(hp1).opsize = taicpu(p).opsize) then
  4291. case taicpu(hp1).opcode Of
  4292. A_DEC:
  4293. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  4294. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  4295. begin
  4296. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  4297. RemoveInstruction(hp1);
  4298. end;
  4299. A_SUB:
  4300. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  4301. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  4302. begin
  4303. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  4304. RemoveInstruction(hp1);
  4305. end;
  4306. A_ADD:
  4307. begin
  4308. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  4309. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  4310. begin
  4311. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  4312. RemoveInstruction(hp1);
  4313. if (taicpu(p).oper[0]^.val = 0) then
  4314. begin
  4315. hp1 := tai(p.next);
  4316. RemoveInstruction(p); { Note, the choice to not use RemoveCurrentp is deliberate }
  4317. if not GetLastInstruction(hp1, p) then
  4318. p := hp1;
  4319. DoSubAddOpt := True;
  4320. end
  4321. end;
  4322. end;
  4323. else
  4324. ;
  4325. end;
  4326. end;
  4327. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  4328. {$ifdef i386}
  4329. var
  4330. hp1 : tai;
  4331. {$endif i386}
  4332. begin
  4333. Result:=false;
  4334. { * change "subl $2, %esp; pushw x" to "pushl x"}
  4335. { * change "sub/add const1, reg" or "dec reg" followed by
  4336. "sub const2, reg" to one "sub ..., reg" }
  4337. if MatchOpType(taicpu(p),top_const,top_reg) then
  4338. begin
  4339. {$ifdef i386}
  4340. if (taicpu(p).oper[0]^.val = 2) and
  4341. (taicpu(p).oper[1]^.reg = NR_ESP) and
  4342. { Don't do the sub/push optimization if the sub }
  4343. { comes from setting up the stack frame (JM) }
  4344. (not(GetLastInstruction(p,hp1)) or
  4345. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  4346. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  4347. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  4348. begin
  4349. hp1 := tai(p.next);
  4350. while Assigned(hp1) and
  4351. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  4352. not RegReadByInstruction(NR_ESP,hp1) and
  4353. not RegModifiedByInstruction(NR_ESP,hp1) do
  4354. hp1 := tai(hp1.next);
  4355. if Assigned(hp1) and
  4356. MatchInstruction(hp1,A_PUSH,[S_W]) then
  4357. begin
  4358. taicpu(hp1).changeopsize(S_L);
  4359. if taicpu(hp1).oper[0]^.typ=top_reg then
  4360. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  4361. hp1 := tai(p.next);
  4362. RemoveCurrentp(p, hp1);
  4363. Result:=true;
  4364. exit;
  4365. end;
  4366. end;
  4367. {$endif i386}
  4368. if DoSubAddOpt(p) then
  4369. Result:=true;
  4370. end;
  4371. end;
  4372. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  4373. var
  4374. TmpBool1,TmpBool2 : Boolean;
  4375. tmpref : treference;
  4376. hp1,hp2: tai;
  4377. mask: tcgint;
  4378. begin
  4379. Result:=false;
  4380. { All these optimisations work on "shl/sal const,%reg" }
  4381. if not MatchOpType(taicpu(p),top_const,top_reg) then
  4382. Exit;
  4383. if (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  4384. (taicpu(p).oper[0]^.val <= 3) then
  4385. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  4386. begin
  4387. { should we check the next instruction? }
  4388. TmpBool1 := True;
  4389. { have we found an add/sub which could be
  4390. integrated in the lea? }
  4391. TmpBool2 := False;
  4392. reference_reset(tmpref,2,[]);
  4393. TmpRef.index := taicpu(p).oper[1]^.reg;
  4394. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  4395. while TmpBool1 and
  4396. GetNextInstruction(p, hp1) and
  4397. (tai(hp1).typ = ait_instruction) and
  4398. ((((taicpu(hp1).opcode = A_ADD) or
  4399. (taicpu(hp1).opcode = A_SUB)) and
  4400. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  4401. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  4402. (((taicpu(hp1).opcode = A_INC) or
  4403. (taicpu(hp1).opcode = A_DEC)) and
  4404. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  4405. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg)) or
  4406. ((taicpu(hp1).opcode = A_LEA) and
  4407. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4408. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg))) and
  4409. (not GetNextInstruction(hp1,hp2) or
  4410. not instrReadsFlags(hp2)) Do
  4411. begin
  4412. TmpBool1 := False;
  4413. if taicpu(hp1).opcode=A_LEA then
  4414. begin
  4415. if (TmpRef.base = NR_NO) and
  4416. (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
  4417. (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
  4418. (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
  4419. ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
  4420. (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
  4421. begin
  4422. TmpBool1 := True;
  4423. TmpBool2 := True;
  4424. inc(TmpRef.offset, taicpu(hp1).oper[0]^.ref^.offset);
  4425. if taicpu(hp1).oper[0]^.ref^.scalefactor<>0 then
  4426. tmpref.scalefactor:=tmpref.scalefactor*taicpu(hp1).oper[0]^.ref^.scalefactor;
  4427. TmpRef.base := taicpu(hp1).oper[0]^.ref^.base;
  4428. RemoveInstruction(hp1);
  4429. end
  4430. end
  4431. else if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  4432. begin
  4433. TmpBool1 := True;
  4434. TmpBool2 := True;
  4435. case taicpu(hp1).opcode of
  4436. A_ADD:
  4437. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  4438. A_SUB:
  4439. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  4440. else
  4441. internalerror(2019050536);
  4442. end;
  4443. RemoveInstruction(hp1);
  4444. end
  4445. else
  4446. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  4447. (((taicpu(hp1).opcode = A_ADD) and
  4448. (TmpRef.base = NR_NO)) or
  4449. (taicpu(hp1).opcode = A_INC) or
  4450. (taicpu(hp1).opcode = A_DEC)) then
  4451. begin
  4452. TmpBool1 := True;
  4453. TmpBool2 := True;
  4454. case taicpu(hp1).opcode of
  4455. A_ADD:
  4456. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  4457. A_INC:
  4458. inc(TmpRef.offset);
  4459. A_DEC:
  4460. dec(TmpRef.offset);
  4461. else
  4462. internalerror(2019050535);
  4463. end;
  4464. RemoveInstruction(hp1);
  4465. end;
  4466. end;
  4467. if TmpBool2
  4468. {$ifndef x86_64}
  4469. or
  4470. ((current_settings.optimizecputype < cpu_Pentium2) and
  4471. (taicpu(p).oper[0]^.val <= 3) and
  4472. not(cs_opt_size in current_settings.optimizerswitches))
  4473. {$endif x86_64}
  4474. then
  4475. begin
  4476. if not(TmpBool2) and
  4477. (taicpu(p).oper[0]^.val=1) then
  4478. begin
  4479. hp1:=taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  4480. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  4481. end
  4482. else
  4483. hp1:=taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  4484. taicpu(p).oper[1]^.reg);
  4485. DebugMsg(SPeepholeOptimization + 'ShlAddLeaSubIncDec2Lea',p);
  4486. InsertLLItem(p.previous, p.next, hp1);
  4487. p.free;
  4488. p := hp1;
  4489. end;
  4490. end
  4491. {$ifndef x86_64}
  4492. else if (current_settings.optimizecputype < cpu_Pentium2) then
  4493. begin
  4494. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  4495. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  4496. (unlike shl, which is only Tairable in the U pipe) }
  4497. if taicpu(p).oper[0]^.val=1 then
  4498. begin
  4499. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  4500. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  4501. InsertLLItem(p.previous, p.next, hp1);
  4502. p.free;
  4503. p := hp1;
  4504. end
  4505. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  4506. "shl $3, %reg" to "lea (,%reg,8), %reg }
  4507. else if (taicpu(p).opsize = S_L) and
  4508. (taicpu(p).oper[0]^.val<= 3) then
  4509. begin
  4510. reference_reset(tmpref,2,[]);
  4511. TmpRef.index := taicpu(p).oper[1]^.reg;
  4512. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  4513. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  4514. InsertLLItem(p.previous, p.next, hp1);
  4515. p.free;
  4516. p := hp1;
  4517. end;
  4518. end
  4519. {$endif x86_64}
  4520. else if
  4521. GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) and MatchOpType(taicpu(hp1), top_const, top_reg) and
  4522. (
  4523. (
  4524. MatchInstruction(hp1, A_AND, [taicpu(p).opsize]) and
  4525. SetAndTest(hp1, hp2)
  4526. {$ifdef x86_64}
  4527. ) or
  4528. (
  4529. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  4530. GetNextInstruction(hp1, hp2) and
  4531. MatchInstruction(hp2, A_AND, [taicpu(p).opsize]) and
  4532. MatchOpType(taicpu(hp2), top_reg, top_reg) and
  4533. (taicpu(hp1).oper[1]^.reg = taicpu(hp2).oper[0]^.reg)
  4534. {$endif x86_64}
  4535. )
  4536. ) and
  4537. (taicpu(p).oper[1]^.reg = taicpu(hp2).oper[1]^.reg) then
  4538. begin
  4539. { Change:
  4540. shl x, %reg1
  4541. mov -(1<<x), %reg2
  4542. and %reg2, %reg1
  4543. Or:
  4544. shl x, %reg1
  4545. and -(1<<x), %reg1
  4546. To just:
  4547. shl x, %reg1
  4548. Since the and operation only zeroes bits that are already zero from the shl operation
  4549. }
  4550. case taicpu(p).oper[0]^.val of
  4551. 8:
  4552. mask:=$FFFFFFFFFFFFFF00;
  4553. 16:
  4554. mask:=$FFFFFFFFFFFF0000;
  4555. 32:
  4556. mask:=$FFFFFFFF00000000;
  4557. 63:
  4558. { Constant pre-calculated to prevent overflow errors with Int64 }
  4559. mask:=$8000000000000000;
  4560. else
  4561. begin
  4562. if taicpu(p).oper[0]^.val >= 64 then
  4563. { Shouldn't happen realistically, since the register
  4564. is guaranteed to be set to zero at this point }
  4565. mask := 0
  4566. else
  4567. mask := -(Int64(1 shl taicpu(p).oper[0]^.val));
  4568. end;
  4569. end;
  4570. if taicpu(hp1).oper[0]^.val = mask then
  4571. begin
  4572. { Everything checks out, perform the optimisation, as long as
  4573. the FLAGS register isn't being used}
  4574. TransferUsedRegs(TmpUsedRegs);
  4575. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  4576. {$ifdef x86_64}
  4577. if (hp1 <> hp2) then
  4578. begin
  4579. { "shl/mov/and" version }
  4580. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  4581. { Don't do the optimisation if the FLAGS register is in use }
  4582. if not(RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp2, TmpUsedRegs)) then
  4583. begin
  4584. DebugMsg(SPeepholeOptimization + 'ShlMovAnd2Shl', p);
  4585. { Don't remove the 'mov' instruction if its register is used elsewhere }
  4586. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp2, TmpUsedRegs)) then
  4587. begin
  4588. RemoveInstruction(hp1);
  4589. Result := True;
  4590. end;
  4591. { Only set Result to True if the 'mov' instruction was removed }
  4592. RemoveInstruction(hp2);
  4593. end;
  4594. end
  4595. else
  4596. {$endif x86_64}
  4597. begin
  4598. { "shl/and" version }
  4599. { Don't do the optimisation if the FLAGS register is in use }
  4600. if not(RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  4601. begin
  4602. DebugMsg(SPeepholeOptimization + 'ShlAnd2Shl', p);
  4603. RemoveInstruction(hp1);
  4604. Result := True;
  4605. end;
  4606. end;
  4607. Exit;
  4608. end
  4609. else {$ifdef x86_64}if (hp1 = hp2) then{$endif x86_64}
  4610. begin
  4611. { Even if the mask doesn't allow for its removal, we might be
  4612. able to optimise the mask for the "shl/and" version, which
  4613. may permit other peephole optimisations }
  4614. {$ifdef DEBUG_AOPTCPU}
  4615. mask := taicpu(hp1).oper[0]^.val and mask;
  4616. if taicpu(hp1).oper[0]^.val <> mask then
  4617. begin
  4618. DebugMsg(
  4619. SPeepholeOptimization +
  4620. 'Changed mask from $' + debug_tostr(taicpu(hp1).oper[0]^.val) +
  4621. ' to $' + debug_tostr(mask) +
  4622. 'based on previous instruction (ShlAnd2ShlAnd)', hp1);
  4623. taicpu(hp1).oper[0]^.val := mask;
  4624. end;
  4625. {$else DEBUG_AOPTCPU}
  4626. { If debugging is off, just set the operand even if it's the same }
  4627. taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val and mask;
  4628. {$endif DEBUG_AOPTCPU}
  4629. end;
  4630. end;
  4631. {
  4632. change
  4633. shl/sal const,reg
  4634. <op> ...(...,reg,1),...
  4635. into
  4636. <op> ...(...,reg,1 shl const),...
  4637. if const in 1..3
  4638. }
  4639. if MatchOpType(taicpu(p), top_const, top_reg) and
  4640. (taicpu(p).oper[0]^.val in [1..3]) and
  4641. GetNextInstruction(p, hp1) and
  4642. MatchInstruction(hp1,A_MOV,A_LEA,[]) and
  4643. MatchOpType(taicpu(hp1), top_ref, top_reg) and
  4644. (taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index) and
  4645. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^.ref^.base) and
  4646. (taicpu(hp1).oper[0]^.ref^.scalefactor in [0,1]) then
  4647. begin
  4648. TransferUsedRegs(TmpUsedRegs);
  4649. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  4650. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  4651. begin
  4652. taicpu(hp1).oper[0]^.ref^.scalefactor:=1 shl taicpu(p).oper[0]^.val;
  4653. DebugMsg(SPeepholeOptimization + 'ShlOp2Op', p);
  4654. RemoveCurrentP(p);
  4655. Result:=true;
  4656. end;
  4657. end;
  4658. end;
  4659. function TX86AsmOptimizer.CheckMemoryWrite(var first_mov, second_mov: taicpu): Boolean;
  4660. var
  4661. CurrentRef: TReference;
  4662. FullReg: TRegister;
  4663. hp1, hp2: tai;
  4664. begin
  4665. Result := False;
  4666. if (first_mov.opsize <> S_B) or (second_mov.opsize <> S_B) then
  4667. Exit;
  4668. { We assume you've checked if the operand is actually a reference by
  4669. this point. If it isn't, you'll most likely get an access violation }
  4670. CurrentRef := first_mov.oper[1]^.ref^;
  4671. { Memory must be aligned }
  4672. if (CurrentRef.offset mod 4) <> 0 then
  4673. Exit;
  4674. Inc(CurrentRef.offset);
  4675. CurrentRef.alignment := 1; { Otherwise references_equal will return False }
  4676. if MatchOperand(second_mov.oper[0]^, 0) and
  4677. references_equal(second_mov.oper[1]^.ref^, CurrentRef) and
  4678. GetNextInstruction(second_mov, hp1) and
  4679. (hp1.typ = ait_instruction) and
  4680. (taicpu(hp1).opcode = A_MOV) and
  4681. MatchOpType(taicpu(hp1), top_const, top_ref) and
  4682. (taicpu(hp1).oper[0]^.val = 0) then
  4683. begin
  4684. Inc(CurrentRef.offset);
  4685. CurrentRef.alignment := taicpu(hp1).oper[1]^.ref^.alignment; { Otherwise references_equal might return False }
  4686. FullReg := newreg(R_INTREGISTER,getsupreg(first_mov.oper[0]^.reg), R_SUBD);
  4687. if references_equal(taicpu(hp1).oper[1]^.ref^, CurrentRef) then
  4688. begin
  4689. case taicpu(hp1).opsize of
  4690. S_B:
  4691. if GetNextInstruction(hp1, hp2) and
  4692. MatchInstruction(taicpu(hp2), A_MOV, [S_B]) and
  4693. MatchOpType(taicpu(hp2), top_const, top_ref) and
  4694. (taicpu(hp2).oper[0]^.val = 0) then
  4695. begin
  4696. Inc(CurrentRef.offset);
  4697. CurrentRef.alignment := 1; { Otherwise references_equal will return False }
  4698. if references_equal(taicpu(hp2).oper[1]^.ref^, CurrentRef) and
  4699. (taicpu(hp2).opsize = S_B) then
  4700. begin
  4701. RemoveInstruction(hp1);
  4702. RemoveInstruction(hp2);
  4703. first_mov.opsize := S_L;
  4704. if first_mov.oper[0]^.typ = top_reg then
  4705. begin
  4706. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVb/MOVb -> MOVZX/MOVl', first_mov);
  4707. { Reuse second_mov as a MOVZX instruction }
  4708. second_mov.opcode := A_MOVZX;
  4709. second_mov.opsize := S_BL;
  4710. second_mov.loadreg(0, first_mov.oper[0]^.reg);
  4711. second_mov.loadreg(1, FullReg);
  4712. first_mov.oper[0]^.reg := FullReg;
  4713. asml.Remove(second_mov);
  4714. asml.InsertBefore(second_mov, first_mov);
  4715. end
  4716. else
  4717. { It's a value }
  4718. begin
  4719. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVb/MOVb -> MOVl', first_mov);
  4720. RemoveInstruction(second_mov);
  4721. end;
  4722. Result := True;
  4723. Exit;
  4724. end;
  4725. end;
  4726. S_W:
  4727. begin
  4728. RemoveInstruction(hp1);
  4729. first_mov.opsize := S_L;
  4730. if first_mov.oper[0]^.typ = top_reg then
  4731. begin
  4732. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVw -> MOVZX/MOVl', first_mov);
  4733. { Reuse second_mov as a MOVZX instruction }
  4734. second_mov.opcode := A_MOVZX;
  4735. second_mov.opsize := S_BL;
  4736. second_mov.loadreg(0, first_mov.oper[0]^.reg);
  4737. second_mov.loadreg(1, FullReg);
  4738. first_mov.oper[0]^.reg := FullReg;
  4739. asml.Remove(second_mov);
  4740. asml.InsertBefore(second_mov, first_mov);
  4741. end
  4742. else
  4743. { It's a value }
  4744. begin
  4745. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVw -> MOVl', first_mov);
  4746. RemoveInstruction(second_mov);
  4747. end;
  4748. Result := True;
  4749. Exit;
  4750. end;
  4751. else
  4752. ;
  4753. end;
  4754. end;
  4755. end;
  4756. end;
  4757. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  4758. { returns true if a "continue" should be done after this optimization }
  4759. var
  4760. hp1, hp2: tai;
  4761. begin
  4762. Result := false;
  4763. if MatchOpType(taicpu(p),top_ref) and
  4764. GetNextInstruction(p, hp1) and
  4765. (hp1.typ = ait_instruction) and
  4766. (((taicpu(hp1).opcode = A_FLD) and
  4767. (taicpu(p).opcode = A_FSTP)) or
  4768. ((taicpu(p).opcode = A_FISTP) and
  4769. (taicpu(hp1).opcode = A_FILD))) and
  4770. MatchOpType(taicpu(hp1),top_ref) and
  4771. (taicpu(hp1).opsize = taicpu(p).opsize) and
  4772. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  4773. begin
  4774. { replacing fstp f;fld f by fst f is only valid for extended because of rounding or if fastmath is on }
  4775. if ((taicpu(p).opsize=S_FX) or (cs_opt_fastmath in current_settings.optimizerswitches)) and
  4776. GetNextInstruction(hp1, hp2) and
  4777. (hp2.typ = ait_instruction) and
  4778. IsExitCode(hp2) and
  4779. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  4780. not(assigned(current_procinfo.procdef.funcretsym) and
  4781. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  4782. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  4783. begin
  4784. RemoveInstruction(hp1);
  4785. RemoveCurrentP(p, hp2);
  4786. RemoveLastDeallocForFuncRes(p);
  4787. Result := true;
  4788. end
  4789. else
  4790. { we can do this only in fast math mode as fstp is rounding ...
  4791. ... still disabled as it breaks the compiler and/or rtl }
  4792. if ({ (cs_opt_fastmath in current_settings.optimizerswitches) or }
  4793. { ... or if another fstp equal to the first one follows }
  4794. (GetNextInstruction(hp1,hp2) and
  4795. (hp2.typ = ait_instruction) and
  4796. (taicpu(p).opcode=taicpu(hp2).opcode) and
  4797. (taicpu(p).opsize=taicpu(hp2).opsize))
  4798. ) and
  4799. { fst can't store an extended/comp value }
  4800. (taicpu(p).opsize <> S_FX) and
  4801. (taicpu(p).opsize <> S_IQ) then
  4802. begin
  4803. if (taicpu(p).opcode = A_FSTP) then
  4804. taicpu(p).opcode := A_FST
  4805. else
  4806. taicpu(p).opcode := A_FIST;
  4807. DebugMsg(SPeepholeOptimization + 'FstpFld2Fst',p);
  4808. RemoveInstruction(hp1);
  4809. end;
  4810. end;
  4811. end;
  4812. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  4813. var
  4814. hp1, hp2: tai;
  4815. begin
  4816. result:=false;
  4817. if MatchOpType(taicpu(p),top_reg) and
  4818. GetNextInstruction(p, hp1) and
  4819. (hp1.typ = Ait_Instruction) and
  4820. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  4821. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  4822. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  4823. { change to
  4824. fld reg fxxx reg,st
  4825. fxxxp st, st1 (hp1)
  4826. Remark: non commutative operations must be reversed!
  4827. }
  4828. begin
  4829. case taicpu(hp1).opcode Of
  4830. A_FMULP,A_FADDP,
  4831. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  4832. begin
  4833. case taicpu(hp1).opcode Of
  4834. A_FADDP: taicpu(hp1).opcode := A_FADD;
  4835. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  4836. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  4837. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  4838. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  4839. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  4840. else
  4841. internalerror(2019050534);
  4842. end;
  4843. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  4844. taicpu(hp1).oper[1]^.reg := NR_ST;
  4845. RemoveCurrentP(p, hp1);
  4846. Result:=true;
  4847. exit;
  4848. end;
  4849. else
  4850. ;
  4851. end;
  4852. end
  4853. else
  4854. if MatchOpType(taicpu(p),top_ref) and
  4855. GetNextInstruction(p, hp2) and
  4856. (hp2.typ = Ait_Instruction) and
  4857. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  4858. (taicpu(p).opsize in [S_FS, S_FL]) and
  4859. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  4860. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  4861. if GetLastInstruction(p, hp1) and
  4862. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  4863. MatchOpType(taicpu(hp1),top_ref) and
  4864. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  4865. if ((taicpu(hp2).opcode = A_FMULP) or
  4866. (taicpu(hp2).opcode = A_FADDP)) then
  4867. { change to
  4868. fld/fst mem1 (hp1) fld/fst mem1
  4869. fld mem1 (p) fadd/
  4870. faddp/ fmul st, st
  4871. fmulp st, st1 (hp2) }
  4872. begin
  4873. RemoveCurrentP(p, hp1);
  4874. if (taicpu(hp2).opcode = A_FADDP) then
  4875. taicpu(hp2).opcode := A_FADD
  4876. else
  4877. taicpu(hp2).opcode := A_FMUL;
  4878. taicpu(hp2).oper[1]^.reg := NR_ST;
  4879. end
  4880. else
  4881. { change to
  4882. fld/fst mem1 (hp1) fld/fst mem1
  4883. fld mem1 (p) fld st}
  4884. begin
  4885. taicpu(p).changeopsize(S_FL);
  4886. taicpu(p).loadreg(0,NR_ST);
  4887. end
  4888. else
  4889. begin
  4890. case taicpu(hp2).opcode Of
  4891. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  4892. { change to
  4893. fld/fst mem1 (hp1) fld/fst mem1
  4894. fld mem2 (p) fxxx mem2
  4895. fxxxp st, st1 (hp2) }
  4896. begin
  4897. case taicpu(hp2).opcode Of
  4898. A_FADDP: taicpu(p).opcode := A_FADD;
  4899. A_FMULP: taicpu(p).opcode := A_FMUL;
  4900. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  4901. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  4902. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  4903. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  4904. else
  4905. internalerror(2019050533);
  4906. end;
  4907. RemoveInstruction(hp2);
  4908. end
  4909. else
  4910. ;
  4911. end
  4912. end
  4913. end;
  4914. function IsCmpSubset(cond1, cond2: TAsmCond): Boolean; inline;
  4915. begin
  4916. Result := condition_in(cond1, cond2) or
  4917. { Not strictly subsets due to the actual flags checked, but because we're
  4918. comparing integers, E is a subset of AE and GE and their aliases }
  4919. ((cond1 in [C_E, C_Z]) and (cond2 in [C_AE, C_NB, C_NC, C_GE, C_NL]));
  4920. end;
  4921. function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
  4922. var
  4923. v: TCGInt;
  4924. hp1, hp2, p_dist, p_jump, hp1_dist, p_label, hp1_label: tai;
  4925. FirstMatch: Boolean;
  4926. JumpLabel, JumpLabel_dist, JumpLabel_far: TAsmLabel;
  4927. begin
  4928. Result:=false;
  4929. { All these optimisations need a next instruction }
  4930. if not GetNextInstruction(p, hp1) then
  4931. Exit;
  4932. { Search for:
  4933. cmp ###,###
  4934. j(c1) @lbl1
  4935. ...
  4936. @lbl:
  4937. cmp ###.### (same comparison as above)
  4938. j(c2) @lbl2
  4939. If c1 is a subset of c2, change to:
  4940. cmp ###,###
  4941. j(c2) @lbl2
  4942. (@lbl1 may become a dead label as a result)
  4943. }
  4944. { Also handle cases where there are multiple jumps in a row }
  4945. p_jump := hp1;
  4946. while Assigned(p_jump) and MatchInstruction(p_jump, A_JCC, []) do
  4947. begin
  4948. if IsJumpToLabel(taicpu(p_jump)) then
  4949. begin
  4950. JumpLabel := TAsmLabel(taicpu(p_jump).oper[0]^.ref^.symbol);
  4951. p_label := nil;
  4952. if Assigned(JumpLabel) then
  4953. p_label := getlabelwithsym(JumpLabel);
  4954. if Assigned(p_label) and
  4955. GetNextInstruction(p_label, p_dist) and
  4956. MatchInstruction(p_dist, A_CMP, []) and
  4957. MatchOperand(taicpu(p_dist).oper[0]^, taicpu(p).oper[0]^) and
  4958. MatchOperand(taicpu(p_dist).oper[1]^, taicpu(p).oper[1]^) and
  4959. GetNextInstruction(p_dist, hp1_dist) and
  4960. MatchInstruction(hp1_dist, A_JCC, []) then { This doesn't have to be an explicit label }
  4961. begin
  4962. JumpLabel_dist := TAsmLabel(taicpu(hp1_dist).oper[0]^.ref^.symbol);
  4963. if JumpLabel = JumpLabel_dist then
  4964. { This is an infinite loop }
  4965. Exit;
  4966. { Best optimisation when the first condition is a subset (or equal) of the second }
  4967. if IsCmpSubset(taicpu(p_jump).condition, taicpu(hp1_dist).condition) then
  4968. begin
  4969. { Any registers used here will already be allocated }
  4970. if Assigned(JumpLabel_dist) then
  4971. JumpLabel_dist.IncRefs;
  4972. if Assigned(JumpLabel) then
  4973. JumpLabel.DecRefs;
  4974. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/@Lbl/CMP/Jcc -> CMP/Jcc, redirecting first jump', p_jump);
  4975. taicpu(p_jump).condition := taicpu(hp1_dist).condition;
  4976. taicpu(p_jump).loadref(0, taicpu(hp1_dist).oper[0]^.ref^);
  4977. Result := True;
  4978. { Don't exit yet. Since p and p_jump haven't actually been
  4979. removed, we can check for more on this iteration }
  4980. end
  4981. else if IsCmpSubset(taicpu(hp1_dist).condition, inverse_cond(taicpu(p_jump).condition)) and
  4982. GetNextInstruction(hp1_dist, hp1_label) and
  4983. SkipAligns(hp1_label, hp1_label) and
  4984. (hp1_label.typ = ait_label) then
  4985. begin
  4986. JumpLabel_far := tai_label(hp1_label).labsym;
  4987. if (JumpLabel_far = JumpLabel_dist) or (JumpLabel_far = JumpLabel) then
  4988. { This is an infinite loop }
  4989. Exit;
  4990. if Assigned(JumpLabel_far) then
  4991. begin
  4992. { In this situation, if the first jump branches, the second one will never,
  4993. branch so change the destination label to after the second jump }
  4994. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/@Lbl/CMP/Jcc/@Lbl -> CMP/Jcc, redirecting first jump to 2nd label', p_jump);
  4995. if Assigned(JumpLabel) then
  4996. JumpLabel.DecRefs;
  4997. JumpLabel_far.IncRefs;
  4998. taicpu(p_jump).oper[0]^.ref^.symbol := JumpLabel_far;
  4999. Result := True;
  5000. { Don't exit yet. Since p and p_jump haven't actually been
  5001. removed, we can check for more on this iteration }
  5002. Continue;
  5003. end;
  5004. end;
  5005. end;
  5006. end;
  5007. { Search for:
  5008. cmp ###,###
  5009. j(c1) @lbl1
  5010. cmp ###,### (same as first)
  5011. Remove second cmp
  5012. }
  5013. if GetNextInstruction(p_jump, hp2) and
  5014. (
  5015. (
  5016. MatchInstruction(hp2, A_CMP, []) and
  5017. (
  5018. (
  5019. MatchOpType(taicpu(p), top_const, top_reg) and
  5020. (taicpu(hp2).oper[0]^.val = taicpu(p).oper[0]^.val) and
  5021. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp2).oper[1]^.reg)
  5022. ) or (
  5023. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  5024. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^)
  5025. )
  5026. )
  5027. ) or (
  5028. { Also match cmp $0,%reg; jcc @lbl; test %reg,%reg }
  5029. MatchOperand(taicpu(p).oper[0]^, 0) and
  5030. (taicpu(p).oper[1]^.typ = top_reg) and
  5031. MatchInstruction(hp2, A_TEST, []) and
  5032. MatchOpType(taicpu(hp2), top_reg, top_reg) and
  5033. (taicpu(hp2).oper[0]^.reg = taicpu(hp2).oper[1]^.reg) and
  5034. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp2).oper[1]^.reg)
  5035. )
  5036. ) then
  5037. begin
  5038. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/CMP; removed superfluous CMP', hp2);
  5039. RemoveInstruction(hp2);
  5040. Result := True;
  5041. { Continue the while loop in case "Jcc/CMP" follows the second CMP that was just removed }
  5042. end;
  5043. GetNextInstruction(p_jump, p_jump);
  5044. end;
  5045. if taicpu(p).oper[0]^.typ = top_const then
  5046. begin
  5047. if (taicpu(p).oper[0]^.val = 0) and
  5048. (taicpu(p).oper[1]^.typ = top_reg) and
  5049. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) then
  5050. begin
  5051. hp2 := p;
  5052. FirstMatch := True;
  5053. { When dealing with "cmp $0,%reg", only ZF and SF contain
  5054. anything meaningful once it's converted to "test %reg,%reg";
  5055. additionally, some jumps will always (or never) branch, so
  5056. evaluate every jump immediately following the
  5057. comparison, optimising the conditions if possible.
  5058. Similarly with SETcc... those that are always set to 0 or 1
  5059. are changed to MOV instructions }
  5060. while FirstMatch or { Saves calling GetNextInstruction unnecessarily }
  5061. (
  5062. GetNextInstruction(hp2, hp1) and
  5063. MatchInstruction(hp1,A_Jcc,A_SETcc,[])
  5064. ) do
  5065. begin
  5066. FirstMatch := False;
  5067. case taicpu(hp1).condition of
  5068. C_B, C_C, C_NAE, C_O:
  5069. { For B/NAE:
  5070. Will never branch since an unsigned integer can never be below zero
  5071. For C/O:
  5072. Result cannot overflow because 0 is being subtracted
  5073. }
  5074. begin
  5075. if taicpu(hp1).opcode = A_Jcc then
  5076. begin
  5077. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (jump removed)', hp1);
  5078. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  5079. RemoveInstruction(hp1);
  5080. { Since hp1 was deleted, hp2 must not be updated }
  5081. Continue;
  5082. end
  5083. else
  5084. begin
  5085. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (set -> mov 0)', hp1);
  5086. { Convert "set(c) %reg" instruction to "movb 0,%reg" }
  5087. taicpu(hp1).opcode := A_MOV;
  5088. taicpu(hp1).ops := 2;
  5089. taicpu(hp1).condition := C_None;
  5090. taicpu(hp1).opsize := S_B;
  5091. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  5092. taicpu(hp1).loadconst(0, 0);
  5093. end;
  5094. end;
  5095. C_BE, C_NA:
  5096. begin
  5097. { Will only branch if equal to zero }
  5098. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition BE/NA --> E', hp1);
  5099. taicpu(hp1).condition := C_E;
  5100. end;
  5101. C_A, C_NBE:
  5102. begin
  5103. { Will only branch if not equal to zero }
  5104. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition A/NBE --> NE', hp1);
  5105. taicpu(hp1).condition := C_NE;
  5106. end;
  5107. C_AE, C_NB, C_NC, C_NO:
  5108. begin
  5109. { Will always branch }
  5110. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition AE/NB/NC/NO --> Always', hp1);
  5111. if taicpu(hp1).opcode = A_Jcc then
  5112. begin
  5113. MakeUnconditional(taicpu(hp1));
  5114. { Any jumps/set that follow will now be dead code }
  5115. RemoveDeadCodeAfterJump(taicpu(hp1));
  5116. Break;
  5117. end
  5118. else
  5119. begin
  5120. { Convert "set(c) %reg" instruction to "movb 1,%reg" }
  5121. taicpu(hp1).opcode := A_MOV;
  5122. taicpu(hp1).ops := 2;
  5123. taicpu(hp1).condition := C_None;
  5124. taicpu(hp1).opsize := S_B;
  5125. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  5126. taicpu(hp1).loadconst(0, 1);
  5127. end;
  5128. end;
  5129. C_None:
  5130. InternalError(2020012201);
  5131. C_P, C_PE, C_NP, C_PO:
  5132. { We can't handle parity checks and they should never be generated
  5133. after a general-purpose CMP (it's used in some floating-point
  5134. comparisons that don't use CMP) }
  5135. InternalError(2020012202);
  5136. else
  5137. { Zero/Equality, Sign, their complements and all of the
  5138. signed comparisons do not need to be converted };
  5139. end;
  5140. hp2 := hp1;
  5141. end;
  5142. { Convert the instruction to a TEST }
  5143. taicpu(p).opcode := A_TEST;
  5144. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  5145. Result := True;
  5146. Exit;
  5147. end
  5148. else if (taicpu(p).oper[0]^.val = 1) and
  5149. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  5150. (taicpu(hp1).condition in [C_L, C_NGE]) then
  5151. begin
  5152. { Convert; To:
  5153. cmp $1,r/m cmp $0,r/m
  5154. jl @lbl jle @lbl
  5155. }
  5156. DebugMsg(SPeepholeOptimization + 'Cmp1Jl2Cmp0Jle', p);
  5157. taicpu(p).oper[0]^.val := 0;
  5158. taicpu(hp1).condition := C_LE;
  5159. { If the instruction is now "cmp $0,%reg", convert it to a
  5160. TEST (and effectively do the work of the "cmp $0,%reg" in
  5161. the block above)
  5162. If it's a reference, we can get away with not setting
  5163. Result to True because he haven't evaluated the jump
  5164. in this pass yet.
  5165. }
  5166. if (taicpu(p).oper[1]^.typ = top_reg) then
  5167. begin
  5168. taicpu(p).opcode := A_TEST;
  5169. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  5170. Result := True;
  5171. end;
  5172. Exit;
  5173. end
  5174. else if (taicpu(p).oper[1]^.typ = top_reg)
  5175. {$ifdef x86_64}
  5176. and (taicpu(p).opsize <> S_Q) { S_Q will never happen: cmp with 64 bit constants is not possible }
  5177. {$endif x86_64}
  5178. then
  5179. begin
  5180. { cmp register,$8000 neg register
  5181. je target --> jo target
  5182. .... only if register is deallocated before jump.}
  5183. case Taicpu(p).opsize of
  5184. S_B: v:=$80;
  5185. S_W: v:=$8000;
  5186. S_L: v:=qword($80000000);
  5187. else
  5188. internalerror(2013112905);
  5189. end;
  5190. if (taicpu(p).oper[0]^.val=v) and
  5191. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  5192. (Taicpu(hp1).condition in [C_E,C_NE]) then
  5193. begin
  5194. TransferUsedRegs(TmpUsedRegs);
  5195. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  5196. if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
  5197. begin
  5198. DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
  5199. Taicpu(p).opcode:=A_NEG;
  5200. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  5201. Taicpu(p).clearop(1);
  5202. Taicpu(p).ops:=1;
  5203. if Taicpu(hp1).condition=C_E then
  5204. Taicpu(hp1).condition:=C_O
  5205. else
  5206. Taicpu(hp1).condition:=C_NO;
  5207. Result:=true;
  5208. exit;
  5209. end;
  5210. end;
  5211. end;
  5212. end;
  5213. if MatchInstruction(hp1,A_MOV,[]) and
  5214. (
  5215. (taicpu(p).oper[0]^.typ <> top_reg) or
  5216. not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
  5217. ) and
  5218. (
  5219. (taicpu(p).oper[1]^.typ <> top_reg) or
  5220. not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
  5221. ) and
  5222. (
  5223. { Make sure the register written to doesn't appear in the
  5224. cmp instruction (in a reference, say) }
  5225. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  5226. not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
  5227. ) then
  5228. begin
  5229. { If we have something like:
  5230. cmp ###,%reg1
  5231. mov 0,%reg2
  5232. And no registers are shared, move the MOV command to before the
  5233. comparison as this means it can be optimised without worrying
  5234. about the FLAGS register. (This combination is generated by
  5235. "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
  5236. }
  5237. SwapMovCmp(p, hp1);
  5238. Result := True;
  5239. Exit;
  5240. end;
  5241. end;
  5242. function TX86AsmOptimizer.OptPass1PXor(var p: tai): boolean;
  5243. var
  5244. hp1: tai;
  5245. begin
  5246. {
  5247. remove the second (v)pxor from
  5248. pxor reg,reg
  5249. ...
  5250. pxor reg,reg
  5251. }
  5252. Result:=false;
  5253. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  5254. MatchOpType(taicpu(p),top_reg,top_reg) and
  5255. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  5256. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  5257. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  5258. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) then
  5259. begin
  5260. DebugMsg(SPeepholeOptimization + 'PXorPXor2PXor done',hp1);
  5261. RemoveInstruction(hp1);
  5262. Result:=true;
  5263. Exit;
  5264. end
  5265. {
  5266. replace
  5267. pxor reg1,reg1
  5268. movapd/s reg1,reg2
  5269. dealloc reg1
  5270. by
  5271. pxor reg2,reg2
  5272. }
  5273. else if GetNextInstruction(p,hp1) and
  5274. { we mix single and double opperations here because we assume that the compiler
  5275. generates vmovapd only after double operations and vmovaps only after single operations }
  5276. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  5277. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  5278. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  5279. (taicpu(p).oper[0]^.typ=top_reg) then
  5280. begin
  5281. TransferUsedRegs(TmpUsedRegs);
  5282. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5283. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  5284. begin
  5285. taicpu(p).loadoper(0,taicpu(hp1).oper[1]^);
  5286. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  5287. DebugMsg(SPeepholeOptimization + 'PXorMovapd2PXor done',p);
  5288. RemoveInstruction(hp1);
  5289. result:=true;
  5290. end;
  5291. end;
  5292. end;
  5293. function TX86AsmOptimizer.OptPass1VPXor(var p: tai): boolean;
  5294. var
  5295. hp1: tai;
  5296. begin
  5297. {
  5298. remove the second (v)pxor from
  5299. (v)pxor reg,reg
  5300. ...
  5301. (v)pxor reg,reg
  5302. }
  5303. Result:=false;
  5304. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^,taicpu(p).oper[2]^) and
  5305. MatchOpType(taicpu(p),top_reg,top_reg,top_reg) and
  5306. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  5307. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  5308. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  5309. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^,taicpu(hp1).oper[2]^) then
  5310. begin
  5311. DebugMsg(SPeepholeOptimization + 'VPXorVPXor2PXor done',hp1);
  5312. RemoveInstruction(hp1);
  5313. Result:=true;
  5314. Exit;
  5315. end
  5316. else
  5317. Result:=OptPass1VOP(p);
  5318. end;
  5319. function TX86AsmOptimizer.OptPass1Imul(var p: tai): boolean;
  5320. var
  5321. hp1 : tai;
  5322. begin
  5323. result:=false;
  5324. { replace
  5325. IMul const,%mreg1,%mreg2
  5326. Mov %reg2,%mreg3
  5327. dealloc %mreg3
  5328. by
  5329. Imul const,%mreg1,%mreg23
  5330. }
  5331. if (taicpu(p).ops=3) and
  5332. GetNextInstruction(p,hp1) and
  5333. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  5334. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  5335. (taicpu(hp1).oper[1]^.typ=top_reg) then
  5336. begin
  5337. TransferUsedRegs(TmpUsedRegs);
  5338. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5339. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  5340. begin
  5341. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  5342. DebugMsg(SPeepholeOptimization + 'ImulMov2Imul done',p);
  5343. RemoveInstruction(hp1);
  5344. result:=true;
  5345. end;
  5346. end;
  5347. end;
  5348. function TX86AsmOptimizer.OptPass1SHXX(var p: tai): boolean;
  5349. var
  5350. hp1 : tai;
  5351. begin
  5352. result:=false;
  5353. { replace
  5354. IMul %reg0,%reg1,%reg2
  5355. Mov %reg2,%reg3
  5356. dealloc %reg2
  5357. by
  5358. Imul %reg0,%reg1,%reg3
  5359. }
  5360. if GetNextInstruction(p,hp1) and
  5361. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  5362. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  5363. (taicpu(hp1).oper[1]^.typ=top_reg) then
  5364. begin
  5365. TransferUsedRegs(TmpUsedRegs);
  5366. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5367. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  5368. begin
  5369. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  5370. DebugMsg(SPeepholeOptimization + 'SHXXMov2SHXX done',p);
  5371. RemoveInstruction(hp1);
  5372. result:=true;
  5373. end;
  5374. end;
  5375. end;
  5376. function TX86AsmOptimizer.OptPass1Jcc(var p : tai) : boolean;
  5377. var
  5378. hp1, hp2, hp3, hp4, hp5: tai;
  5379. ThisReg: TRegister;
  5380. begin
  5381. Result := False;
  5382. if not GetNextInstruction(p,hp1) or (hp1.typ <> ait_instruction) then
  5383. Exit;
  5384. {
  5385. convert
  5386. j<c> .L1
  5387. mov 1,reg
  5388. jmp .L2
  5389. .L1
  5390. mov 0,reg
  5391. .L2
  5392. into
  5393. mov 0,reg
  5394. set<not(c)> reg
  5395. take care of alignment and that the mov 0,reg is not converted into a xor as this
  5396. would destroy the flag contents
  5397. Use MOVZX if size is preferred, since while mov 0,reg is bigger, it can be
  5398. executed at the same time as a previous comparison.
  5399. set<not(c)> reg
  5400. movzx reg, reg
  5401. }
  5402. if MatchInstruction(hp1,A_MOV,[]) and
  5403. (taicpu(hp1).oper[0]^.typ = top_const) and
  5404. (
  5405. (
  5406. (taicpu(hp1).oper[1]^.typ = top_reg)
  5407. {$ifdef i386}
  5408. { Under i386, ESI, EDI, EBP and ESP
  5409. don't have an 8-bit representation }
  5410. and not (getsupreg(taicpu(hp1).oper[1]^.reg) in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  5411. {$endif i386}
  5412. ) or (
  5413. {$ifdef i386}
  5414. (taicpu(hp1).oper[1]^.typ <> top_reg) and
  5415. {$endif i386}
  5416. (taicpu(hp1).opsize = S_B)
  5417. )
  5418. ) and
  5419. GetNextInstruction(hp1,hp2) and
  5420. MatchInstruction(hp2,A_JMP,[]) and (taicpu(hp2).oper[0]^.ref^.refaddr=addr_full) and
  5421. GetNextInstruction(hp2,hp3) and
  5422. SkipAligns(hp3, hp3) and
  5423. (hp3.typ=ait_label) and
  5424. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol)=tai_label(hp3).labsym) and
  5425. GetNextInstruction(hp3,hp4) and
  5426. MatchInstruction(hp4,A_MOV,[taicpu(hp1).opsize]) and
  5427. (taicpu(hp4).oper[0]^.typ = top_const) and
  5428. (
  5429. ((taicpu(hp1).oper[0]^.val = 0) and (taicpu(hp4).oper[0]^.val = 1)) or
  5430. ((taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0))
  5431. ) and
  5432. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp4).oper[1]^) and
  5433. GetNextInstruction(hp4,hp5) and
  5434. SkipAligns(hp5, hp5) and
  5435. (hp5.typ=ait_label) and
  5436. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol)=tai_label(hp5).labsym) then
  5437. begin
  5438. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5439. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  5440. tai_label(hp3).labsym.DecRefs;
  5441. { If this isn't the only reference to the middle label, we can
  5442. still make a saving - only that the first jump and everything
  5443. that follows will remain. }
  5444. if (tai_label(hp3).labsym.getrefs = 0) then
  5445. begin
  5446. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5447. DebugMsg(SPeepholeOptimization + 'J(c)Mov1JmpMov0 -> Set(~c)',p)
  5448. else
  5449. DebugMsg(SPeepholeOptimization + 'J(c)Mov0JmpMov1 -> Set(c)',p);
  5450. { remove jump, first label and second MOV (also catching any aligns) }
  5451. repeat
  5452. if not GetNextInstruction(hp2, hp3) then
  5453. InternalError(2021040810);
  5454. RemoveInstruction(hp2);
  5455. hp2 := hp3;
  5456. until hp2 = hp5;
  5457. { Don't decrement reference count before the removal loop
  5458. above, otherwise GetNextInstruction won't stop on the
  5459. the label }
  5460. tai_label(hp5).labsym.DecRefs;
  5461. end
  5462. else
  5463. begin
  5464. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5465. DebugMsg(SPeepholeOptimization + 'J(c)Mov1JmpMov0 -> Set(~c) (partial)',p)
  5466. else
  5467. DebugMsg(SPeepholeOptimization + 'J(c)Mov0JmpMov1 -> Set(c) (partial)',p);
  5468. end;
  5469. taicpu(p).opcode:=A_SETcc;
  5470. taicpu(p).opsize:=S_B;
  5471. taicpu(p).is_jmp:=False;
  5472. if taicpu(hp1).opsize=S_B then
  5473. begin
  5474. taicpu(p).loadoper(0, taicpu(hp1).oper[1]^);
  5475. if taicpu(hp1).oper[1]^.typ = top_reg then
  5476. AllocRegBetween(taicpu(hp1).oper[1]^.reg, p, hp2, UsedRegs);
  5477. RemoveInstruction(hp1);
  5478. end
  5479. else
  5480. begin
  5481. { Will be a register because the size can't be S_B otherwise }
  5482. ThisReg := newreg(R_INTREGISTER,getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBL);
  5483. taicpu(p).loadreg(0, ThisReg);
  5484. AllocRegBetween(ThisReg, p, hp2, UsedRegs);
  5485. if (cs_opt_size in current_settings.optimizerswitches) and IsMOVZXAcceptable then
  5486. begin
  5487. case taicpu(hp1).opsize of
  5488. S_W:
  5489. taicpu(hp1).opsize := S_BW;
  5490. S_L:
  5491. taicpu(hp1).opsize := S_BL;
  5492. {$ifdef x86_64}
  5493. S_Q:
  5494. begin
  5495. taicpu(hp1).opsize := S_BL;
  5496. { Change the destination register to 32-bit }
  5497. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER,getsupreg(ThisReg), R_SUBD));
  5498. end;
  5499. {$endif x86_64}
  5500. else
  5501. InternalError(2021040820);
  5502. end;
  5503. taicpu(hp1).opcode := A_MOVZX;
  5504. taicpu(hp1).loadreg(0, ThisReg);
  5505. end
  5506. else
  5507. begin
  5508. AllocRegBetween(NR_FLAGS,p,hp1,UsedRegs);
  5509. { hp1 is already a MOV instruction with the correct register }
  5510. taicpu(hp1).loadconst(0, 0);
  5511. { Inserting it right before p will guarantee that the flags are also tracked }
  5512. asml.Remove(hp1);
  5513. asml.InsertBefore(hp1, p);
  5514. end;
  5515. end;
  5516. Result:=true;
  5517. exit;
  5518. end
  5519. end;
  5520. function TX86AsmOptimizer.CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
  5521. var
  5522. hp2, hp3, first_assignment: tai;
  5523. IncCount, OperIdx: Integer;
  5524. OrigLabel: TAsmLabel;
  5525. begin
  5526. Count := 0;
  5527. Result := False;
  5528. first_assignment := nil;
  5529. if (LoopCount >= 20) then
  5530. begin
  5531. { Guard against infinite loops }
  5532. Exit;
  5533. end;
  5534. if (taicpu(p).oper[0]^.typ <> top_ref) or
  5535. (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) or
  5536. (taicpu(p).oper[0]^.ref^.base <> NR_NO) or
  5537. (taicpu(p).oper[0]^.ref^.index <> NR_NO) or
  5538. not (taicpu(p).oper[0]^.ref^.symbol is TAsmLabel) then
  5539. Exit;
  5540. OrigLabel := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  5541. {
  5542. change
  5543. jmp .L1
  5544. ...
  5545. .L1:
  5546. mov ##, ## ( multiple movs possible )
  5547. jmp/ret
  5548. into
  5549. mov ##, ##
  5550. jmp/ret
  5551. }
  5552. if not Assigned(hp1) then
  5553. begin
  5554. hp1 := GetLabelWithSym(OrigLabel);
  5555. if not Assigned(hp1) or not SkipLabels(hp1, hp1) then
  5556. Exit;
  5557. end;
  5558. hp2 := hp1;
  5559. while Assigned(hp2) do
  5560. begin
  5561. if Assigned(hp2) and (hp2.typ in [ait_label, ait_align]) then
  5562. SkipLabels(hp2,hp2);
  5563. if not Assigned(hp2) or (hp2.typ <> ait_instruction) then
  5564. Break;
  5565. case taicpu(hp2).opcode of
  5566. A_MOVSS:
  5567. begin
  5568. if taicpu(hp2).ops = 0 then
  5569. { Wrong MOVSS }
  5570. Break;
  5571. Inc(Count);
  5572. if Count >= 5 then
  5573. { Too many to be worthwhile }
  5574. Break;
  5575. GetNextInstruction(hp2, hp2);
  5576. Continue;
  5577. end;
  5578. A_MOV,
  5579. A_MOVD,
  5580. A_MOVQ,
  5581. A_MOVSX,
  5582. {$ifdef x86_64}
  5583. A_MOVSXD,
  5584. {$endif x86_64}
  5585. A_MOVZX,
  5586. A_MOVAPS,
  5587. A_MOVUPS,
  5588. A_MOVSD,
  5589. A_MOVAPD,
  5590. A_MOVUPD,
  5591. A_MOVDQA,
  5592. A_MOVDQU,
  5593. A_VMOVSS,
  5594. A_VMOVAPS,
  5595. A_VMOVUPS,
  5596. A_VMOVSD,
  5597. A_VMOVAPD,
  5598. A_VMOVUPD,
  5599. A_VMOVDQA,
  5600. A_VMOVDQU:
  5601. begin
  5602. Inc(Count);
  5603. if Count >= 5 then
  5604. { Too many to be worthwhile }
  5605. Break;
  5606. GetNextInstruction(hp2, hp2);
  5607. Continue;
  5608. end;
  5609. A_JMP:
  5610. begin
  5611. { Guard against infinite loops }
  5612. if taicpu(hp2).oper[0]^.ref^.symbol = OrigLabel then
  5613. Exit;
  5614. { Analyse this jump first in case it also duplicates assignments }
  5615. if CheckJumpMovTransferOpt(hp2, nil, LoopCount + 1, IncCount) then
  5616. begin
  5617. { Something did change! }
  5618. Result := True;
  5619. Inc(Count, IncCount);
  5620. if Count >= 5 then
  5621. begin
  5622. { Too many to be worthwhile }
  5623. Exit;
  5624. end;
  5625. if MatchInstruction(hp2, [A_JMP, A_RET], []) then
  5626. Break;
  5627. end;
  5628. Result := True;
  5629. Break;
  5630. end;
  5631. A_RET:
  5632. begin
  5633. Result := True;
  5634. Break;
  5635. end;
  5636. else
  5637. Break;
  5638. end;
  5639. end;
  5640. if Result then
  5641. begin
  5642. { A count of zero can happen when CheckJumpMovTransferOpt is called recursively }
  5643. if Count = 0 then
  5644. begin
  5645. Result := False;
  5646. Exit;
  5647. end;
  5648. hp3 := p;
  5649. DebugMsg(SPeepholeOptimization + 'Duplicated ' + debug_tostr(Count) + ' assignment(s) and redirected jump', p);
  5650. while True do
  5651. begin
  5652. if Assigned(hp1) and (hp1.typ in [ait_label, ait_align]) then
  5653. SkipLabels(hp1,hp1);
  5654. if (hp1.typ <> ait_instruction) then
  5655. InternalError(2021040720);
  5656. case taicpu(hp1).opcode of
  5657. A_JMP:
  5658. begin
  5659. { Change the original jump to the new destination }
  5660. OrigLabel.decrefs;
  5661. taicpu(hp1).oper[0]^.ref^.symbol.increfs;
  5662. taicpu(p).loadref(0, taicpu(hp1).oper[0]^.ref^);
  5663. { Set p to the first duplicated assignment so it can get optimised if needs be }
  5664. if not Assigned(first_assignment) then
  5665. InternalError(2021040810)
  5666. else
  5667. p := first_assignment;
  5668. Exit;
  5669. end;
  5670. A_RET:
  5671. begin
  5672. { Now change the jump into a RET instruction }
  5673. ConvertJumpToRET(p, hp1);
  5674. { Set p to the first duplicated assignment so it can get optimised if needs be }
  5675. if not Assigned(first_assignment) then
  5676. InternalError(2021040811)
  5677. else
  5678. p := first_assignment;
  5679. Exit;
  5680. end;
  5681. else
  5682. begin
  5683. { Duplicate the MOV instruction }
  5684. hp3:=tai(hp1.getcopy);
  5685. if first_assignment = nil then
  5686. first_assignment := hp3;
  5687. asml.InsertBefore(hp3, p);
  5688. { Make sure the compiler knows about any final registers written here }
  5689. for OperIdx := 0 to taicpu(hp3).ops - 1 do
  5690. with taicpu(hp3).oper[OperIdx]^ do
  5691. begin
  5692. case typ of
  5693. top_ref:
  5694. begin
  5695. if (ref^.base <> NR_NO) and
  5696. (getsupreg(ref^.base) <> RS_ESP) and
  5697. (getsupreg(ref^.base) <> RS_EBP)
  5698. {$ifdef x86_64} and (ref^.base <> NR_RIP) {$endif x86_64}
  5699. then
  5700. AllocRegBetween(ref^.base, hp3, tai(p.Next), UsedRegs);
  5701. if (ref^.index <> NR_NO) and
  5702. (getsupreg(ref^.index) <> RS_ESP) and
  5703. (getsupreg(ref^.index) <> RS_EBP)
  5704. {$ifdef x86_64} and (ref^.index <> NR_RIP) {$endif x86_64} and
  5705. (ref^.index <> ref^.base) then
  5706. AllocRegBetween(ref^.index, hp3, tai(p.Next), UsedRegs);
  5707. end;
  5708. top_reg:
  5709. AllocRegBetween(reg, hp3, tai(p.Next), UsedRegs);
  5710. else
  5711. ;
  5712. end;
  5713. end;
  5714. end;
  5715. end;
  5716. if not GetNextInstruction(hp1, hp1) then
  5717. { Should have dropped out earlier }
  5718. InternalError(2021040710);
  5719. end;
  5720. end;
  5721. end;
  5722. procedure TX86AsmOptimizer.SwapMovCmp(var p, hp1: tai);
  5723. var
  5724. hp2: tai;
  5725. X: Integer;
  5726. begin
  5727. asml.Remove(hp1);
  5728. { Try to insert after the last instructions where the FLAGS register is not yet in use }
  5729. if not GetLastInstruction(p, hp2) then
  5730. asml.InsertBefore(hp1, p)
  5731. else
  5732. asml.InsertAfter(hp1, hp2);
  5733. DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and mov instructions to improve optimisation potential', hp1);
  5734. for X := 0 to 1 do
  5735. case taicpu(hp1).oper[X]^.typ of
  5736. top_reg:
  5737. AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
  5738. top_ref:
  5739. begin
  5740. if taicpu(hp1).oper[X]^.ref^.base <> NR_NO then
  5741. AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
  5742. if taicpu(hp1).oper[X]^.ref^.index <> NR_NO then
  5743. AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
  5744. end;
  5745. else
  5746. ;
  5747. end;
  5748. end;
  5749. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  5750. function IsXCHGAcceptable: Boolean; inline;
  5751. begin
  5752. { Always accept if optimising for size }
  5753. Result := (cs_opt_size in current_settings.optimizerswitches) or
  5754. (
  5755. {$ifdef x86_64}
  5756. { XCHG takes 3 cycles on AMD Athlon64 }
  5757. (current_settings.optimizecputype >= cpu_core_i)
  5758. {$else x86_64}
  5759. { From the Pentium M onwards, XCHG only has a latency of 2 rather
  5760. than 3, so it becomes a saving compared to three MOVs with two of
  5761. them able to execute simultaneously. [Kit] }
  5762. (current_settings.optimizecputype >= cpu_PentiumM)
  5763. {$endif x86_64}
  5764. );
  5765. end;
  5766. var
  5767. NewRef: TReference;
  5768. hp1, hp2, hp3, hp4: Tai;
  5769. {$ifndef x86_64}
  5770. OperIdx: Integer;
  5771. {$endif x86_64}
  5772. NewInstr : Taicpu;
  5773. NewAligh : Tai_align;
  5774. DestLabel: TAsmLabel;
  5775. begin
  5776. Result:=false;
  5777. { This optimisation adds an instruction, so only do it for speed }
  5778. if not (cs_opt_size in current_settings.optimizerswitches) and
  5779. MatchOpType(taicpu(p), top_const, top_reg) and
  5780. (taicpu(p).oper[0]^.val = 0) then
  5781. begin
  5782. { To avoid compiler warning }
  5783. DestLabel := nil;
  5784. if (p.typ <> ait_instruction) or (taicpu(p).oper[1]^.typ <> top_reg) then
  5785. InternalError(2021040750);
  5786. if not GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.reg) then
  5787. Exit;
  5788. case hp1.typ of
  5789. ait_label:
  5790. begin
  5791. { Change:
  5792. mov $0,%reg mov $0,%reg
  5793. @Lbl1: @Lbl1:
  5794. test %reg,%reg / cmp $0,%reg test %reg,%reg / mov $0,%reg
  5795. je @Lbl2 jne @Lbl2
  5796. To: To:
  5797. mov $0,%reg mov $0,%reg
  5798. jmp @Lbl2 jmp @Lbl3
  5799. (align) (align)
  5800. @Lbl1: @Lbl1:
  5801. test %reg,%reg / cmp $0,%reg test %reg,%reg / cmp $0,%reg
  5802. je @Lbl2 je @Lbl2
  5803. @Lbl3: <-- Only if label exists
  5804. (Not if it's optimised for size)
  5805. }
  5806. if not GetNextInstruction(hp1, hp2) then
  5807. Exit;
  5808. if not (cs_opt_size in current_settings.optimizerswitches) and
  5809. (hp2.typ = ait_instruction) and
  5810. (
  5811. { Register sizes must exactly match }
  5812. (
  5813. (taicpu(hp2).opcode = A_CMP) and
  5814. MatchOperand(taicpu(hp2).oper[0]^, 0) and
  5815. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^.reg)
  5816. ) or (
  5817. (taicpu(hp2).opcode = A_TEST) and
  5818. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  5819. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^.reg)
  5820. )
  5821. ) and GetNextInstruction(hp2, hp3) and
  5822. (hp3.typ = ait_instruction) and
  5823. (taicpu(hp3).opcode = A_JCC) and
  5824. (taicpu(hp3).oper[0]^.typ=top_ref) and (taicpu(hp3).oper[0]^.ref^.refaddr=addr_full) and (taicpu(hp3).oper[0]^.ref^.base=NR_NO) and
  5825. (taicpu(hp3).oper[0]^.ref^.index=NR_NO) and (taicpu(hp3).oper[0]^.ref^.symbol is tasmlabel) then
  5826. begin
  5827. { Check condition of jump }
  5828. { Always true? }
  5829. if condition_in(C_E, taicpu(hp3).condition) then
  5830. begin
  5831. { Copy label symbol and obtain matching label entry for the
  5832. conditional jump, as this will be our destination}
  5833. DestLabel := tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol);
  5834. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Je -> Mov0JmpLblCmp0Je', p);
  5835. Result := True;
  5836. end
  5837. { Always false? }
  5838. else if condition_in(C_NE, taicpu(hp3).condition) and GetNextInstruction(hp3, hp2) then
  5839. begin
  5840. { This is only worth it if there's a jump to take }
  5841. case hp2.typ of
  5842. ait_instruction:
  5843. begin
  5844. if taicpu(hp2).opcode = A_JMP then
  5845. begin
  5846. DestLabel := tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol);
  5847. { An unconditional jump follows the conditional jump which will always be false,
  5848. so use this jump's destination for the new jump }
  5849. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Jne -> Mov0JmpLblCmp0Jne (with JMP)', p);
  5850. Result := True;
  5851. end
  5852. else if taicpu(hp2).opcode = A_JCC then
  5853. begin
  5854. DestLabel := tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol);
  5855. if condition_in(C_E, taicpu(hp2).condition) then
  5856. begin
  5857. { A second conditional jump follows the conditional jump which will always be false,
  5858. while the second jump is always True, so use this jump's destination for the new jump }
  5859. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Jne -> Mov0JmpLblCmp0Jne (with second Jcc)', p);
  5860. Result := True;
  5861. end;
  5862. { Don't risk it if the jump isn't always true (Result remains False) }
  5863. end;
  5864. end;
  5865. else
  5866. { If anything else don't optimise };
  5867. end;
  5868. end;
  5869. if Result then
  5870. begin
  5871. { Just so we have something to insert as a paremeter}
  5872. reference_reset(NewRef, 1, []);
  5873. NewInstr := taicpu.op_ref(A_JMP, S_NO, NewRef);
  5874. { Now actually load the correct parameter }
  5875. NewInstr.loadsymbol(0, DestLabel, 0);
  5876. { Get instruction before original label (may not be p under -O3) }
  5877. if not GetLastInstruction(hp1, hp2) then
  5878. { Shouldn't fail here }
  5879. InternalError(2021040701);
  5880. DestLabel.increfs;
  5881. AsmL.InsertAfter(NewInstr, hp2);
  5882. { Add new alignment field }
  5883. (* AsmL.InsertAfter(
  5884. cai_align.create_max(
  5885. current_settings.alignment.jumpalign,
  5886. current_settings.alignment.jumpalignskipmax
  5887. ),
  5888. NewInstr
  5889. ); *)
  5890. end;
  5891. Exit;
  5892. end;
  5893. end;
  5894. else
  5895. ;
  5896. end;
  5897. end;
  5898. if not GetNextInstruction(p, hp1) then
  5899. Exit;
  5900. if MatchInstruction(hp1, A_JMP, [S_NO]) then
  5901. begin
  5902. { Sometimes the MOVs that OptPass2JMP produces can be improved
  5903. further, but we can't just put this jump optimisation in pass 1
  5904. because it tends to perform worse when conditional jumps are
  5905. nearby (e.g. when converting CMOV instructions). [Kit] }
  5906. if OptPass2JMP(hp1) then
  5907. { call OptPass1MOV once to potentially merge any MOVs that were created }
  5908. Result := OptPass1MOV(p)
  5909. { OptPass2MOV will now exit but will be called again if OptPass1MOV
  5910. returned True and the instruction is still a MOV, thus checking
  5911. the optimisations below }
  5912. { If OptPass2JMP returned False, no optimisations were done to
  5913. the jump and there are no further optimisations that can be done
  5914. to the MOV instruction on this pass }
  5915. end
  5916. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  5917. (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  5918. MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
  5919. MatchOpType(taicpu(hp1),top_const,top_reg) and
  5920. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  5921. { be lazy, checking separately for sub would be slightly better }
  5922. (abs(taicpu(hp1).oper[0]^.val)<=$7fffffff) then
  5923. begin
  5924. { Change:
  5925. movl/q %reg1,%reg2 movl/q %reg1,%reg2
  5926. addl/q $x,%reg2 subl/q $x,%reg2
  5927. To:
  5928. leal/q x(%reg1),%reg2 leal/q -x(%reg1),%reg2
  5929. }
  5930. TransferUsedRegs(TmpUsedRegs);
  5931. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  5932. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  5933. if not GetNextInstruction(hp1, hp2) or
  5934. (
  5935. { The FLAGS register isn't always tracked properly, so do not
  5936. perform this optimisation if a conditional statement follows }
  5937. not RegReadByInstruction(NR_DEFAULTFLAGS, hp2) and
  5938. not RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp2, TmpUsedRegs)
  5939. ) then
  5940. begin
  5941. reference_reset(NewRef, 1, []);
  5942. NewRef.base := taicpu(p).oper[0]^.reg;
  5943. NewRef.scalefactor := 1;
  5944. if taicpu(hp1).opcode = A_ADD then
  5945. begin
  5946. DebugMsg(SPeepholeOptimization + 'MovAdd2Lea', p);
  5947. NewRef.offset := taicpu(hp1).oper[0]^.val;
  5948. end
  5949. else
  5950. begin
  5951. DebugMsg(SPeepholeOptimization + 'MovSub2Lea', p);
  5952. NewRef.offset := -taicpu(hp1).oper[0]^.val;
  5953. end;
  5954. taicpu(p).opcode := A_LEA;
  5955. taicpu(p).loadref(0, NewRef);
  5956. RemoveInstruction(hp1);
  5957. Result := True;
  5958. Exit;
  5959. end;
  5960. end
  5961. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  5962. {$ifdef x86_64}
  5963. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  5964. {$else x86_64}
  5965. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  5966. {$endif x86_64}
  5967. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  5968. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  5969. { mov reg1, reg2 mov reg1, reg2
  5970. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  5971. begin
  5972. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  5973. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  5974. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  5975. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  5976. TransferUsedRegs(TmpUsedRegs);
  5977. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5978. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  5979. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  5980. then
  5981. begin
  5982. RemoveCurrentP(p, hp1);
  5983. Result:=true;
  5984. end;
  5985. exit;
  5986. end
  5987. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  5988. IsXCHGAcceptable and
  5989. { XCHG doesn't support 8-byte registers }
  5990. (taicpu(p).opsize <> S_B) and
  5991. MatchInstruction(hp1, A_MOV, []) and
  5992. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  5993. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  5994. GetNextInstruction(hp1, hp2) and
  5995. MatchInstruction(hp2, A_MOV, []) and
  5996. { Don't need to call MatchOpType for hp2 because the operand matches below cover for it }
  5997. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  5998. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) then
  5999. begin
  6000. { mov %reg1,%reg2
  6001. mov %reg3,%reg1 -> xchg %reg3,%reg1
  6002. mov %reg2,%reg3
  6003. (%reg2 not used afterwards)
  6004. Note that xchg takes 3 cycles to execute, and generally mov's take
  6005. only one cycle apiece, but the first two mov's can be executed in
  6006. parallel, only taking 2 cycles overall. Older processors should
  6007. therefore only optimise for size. [Kit]
  6008. }
  6009. TransferUsedRegs(TmpUsedRegs);
  6010. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  6011. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  6012. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs) then
  6013. begin
  6014. DebugMsg(SPeepholeOptimization + 'MovMovMov2XChg', p);
  6015. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp1, UsedRegs);
  6016. taicpu(hp1).opcode := A_XCHG;
  6017. RemoveCurrentP(p, hp1);
  6018. RemoveInstruction(hp2);
  6019. Result := True;
  6020. Exit;
  6021. end;
  6022. end
  6023. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  6024. MatchInstruction(hp1, A_SAR, []) then
  6025. begin
  6026. if MatchOperand(taicpu(hp1).oper[0]^, 31) then
  6027. begin
  6028. { the use of %edx also covers the opsize being S_L }
  6029. if MatchOperand(taicpu(hp1).oper[1]^, NR_EDX) then
  6030. begin
  6031. { Note it has to be specifically "movl %eax,%edx", and those specific sub-registers }
  6032. if (taicpu(p).oper[0]^.reg = NR_EAX) and
  6033. (taicpu(p).oper[1]^.reg = NR_EDX) then
  6034. begin
  6035. { Change:
  6036. movl %eax,%edx
  6037. sarl $31,%edx
  6038. To:
  6039. cltd
  6040. }
  6041. DebugMsg(SPeepholeOptimization + 'MovSar2Cltd', p);
  6042. RemoveInstruction(hp1);
  6043. taicpu(p).opcode := A_CDQ;
  6044. taicpu(p).opsize := S_NO;
  6045. taicpu(p).clearop(1);
  6046. taicpu(p).clearop(0);
  6047. taicpu(p).ops:=0;
  6048. Result := True;
  6049. end
  6050. else if (cs_opt_size in current_settings.optimizerswitches) and
  6051. (taicpu(p).oper[0]^.reg = NR_EDX) and
  6052. (taicpu(p).oper[1]^.reg = NR_EAX) then
  6053. begin
  6054. { Change:
  6055. movl %edx,%eax
  6056. sarl $31,%edx
  6057. To:
  6058. movl %edx,%eax
  6059. cltd
  6060. Note that this creates a dependency between the two instructions,
  6061. so only perform if optimising for size.
  6062. }
  6063. DebugMsg(SPeepholeOptimization + 'MovSar2MovCltd', p);
  6064. taicpu(hp1).opcode := A_CDQ;
  6065. taicpu(hp1).opsize := S_NO;
  6066. taicpu(hp1).clearop(1);
  6067. taicpu(hp1).clearop(0);
  6068. taicpu(hp1).ops:=0;
  6069. end;
  6070. {$ifndef x86_64}
  6071. end
  6072. { Don't bother if CMOV is supported, because a more optimal
  6073. sequence would have been generated for the Abs() intrinsic }
  6074. else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  6075. { the use of %eax also covers the opsize being S_L }
  6076. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) and
  6077. (taicpu(p).oper[0]^.reg = NR_EAX) and
  6078. (taicpu(p).oper[1]^.reg = NR_EDX) and
  6079. GetNextInstruction(hp1, hp2) and
  6080. MatchInstruction(hp2, A_XOR, [S_L]) and
  6081. MatchOperand(taicpu(hp2).oper[0]^, NR_EAX) and
  6082. MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) and
  6083. GetNextInstruction(hp2, hp3) and
  6084. MatchInstruction(hp3, A_SUB, [S_L]) and
  6085. MatchOperand(taicpu(hp3).oper[0]^, NR_EAX) and
  6086. MatchOperand(taicpu(hp3).oper[1]^, NR_EDX) then
  6087. begin
  6088. { Change:
  6089. movl %eax,%edx
  6090. sarl $31,%eax
  6091. xorl %eax,%edx
  6092. subl %eax,%edx
  6093. (Instruction that uses %edx)
  6094. (%eax deallocated)
  6095. (%edx deallocated)
  6096. To:
  6097. cltd
  6098. xorl %edx,%eax <-- Note the registers have swapped
  6099. subl %edx,%eax
  6100. (Instruction that uses %eax) <-- %eax rather than %edx
  6101. }
  6102. TransferUsedRegs(TmpUsedRegs);
  6103. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  6104. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  6105. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  6106. if not RegUsedAfterInstruction(NR_EAX, hp3, TmpUsedRegs) then
  6107. begin
  6108. if GetNextInstruction(hp3, hp4) and
  6109. not RegModifiedByInstruction(NR_EDX, hp4) and
  6110. not RegUsedAfterInstruction(NR_EDX, hp4, TmpUsedRegs) then
  6111. begin
  6112. DebugMsg(SPeepholeOptimization + 'abs() intrinsic optimisation', p);
  6113. taicpu(p).opcode := A_CDQ;
  6114. taicpu(p).clearop(1);
  6115. taicpu(p).clearop(0);
  6116. taicpu(p).ops:=0;
  6117. RemoveInstruction(hp1);
  6118. taicpu(hp2).loadreg(0, NR_EDX);
  6119. taicpu(hp2).loadreg(1, NR_EAX);
  6120. taicpu(hp3).loadreg(0, NR_EDX);
  6121. taicpu(hp3).loadreg(1, NR_EAX);
  6122. AllocRegBetween(NR_EAX, hp3, hp4, TmpUsedRegs);
  6123. { Convert references in the following instruction (hp4) from %edx to %eax }
  6124. for OperIdx := 0 to taicpu(hp4).ops - 1 do
  6125. with taicpu(hp4).oper[OperIdx]^ do
  6126. case typ of
  6127. top_reg:
  6128. if getsupreg(reg) = RS_EDX then
  6129. reg := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6130. top_ref:
  6131. begin
  6132. if getsupreg(reg) = RS_EDX then
  6133. ref^.base := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6134. if getsupreg(reg) = RS_EDX then
  6135. ref^.index := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6136. end;
  6137. else
  6138. ;
  6139. end;
  6140. end;
  6141. end;
  6142. {$else x86_64}
  6143. end;
  6144. end
  6145. else if MatchOperand(taicpu(hp1).oper[0]^, 63) and
  6146. { the use of %rdx also covers the opsize being S_Q }
  6147. MatchOperand(taicpu(hp1).oper[1]^, NR_RDX) then
  6148. begin
  6149. { Note it has to be specifically "movq %rax,%rdx", and those specific sub-registers }
  6150. if (taicpu(p).oper[0]^.reg = NR_RAX) and
  6151. (taicpu(p).oper[1]^.reg = NR_RDX) then
  6152. begin
  6153. { Change:
  6154. movq %rax,%rdx
  6155. sarq $63,%rdx
  6156. To:
  6157. cqto
  6158. }
  6159. DebugMsg(SPeepholeOptimization + 'MovSar2Cqto', p);
  6160. RemoveInstruction(hp1);
  6161. taicpu(p).opcode := A_CQO;
  6162. taicpu(p).opsize := S_NO;
  6163. taicpu(p).clearop(1);
  6164. taicpu(p).clearop(0);
  6165. taicpu(p).ops:=0;
  6166. Result := True;
  6167. end
  6168. else if (cs_opt_size in current_settings.optimizerswitches) and
  6169. (taicpu(p).oper[0]^.reg = NR_RDX) and
  6170. (taicpu(p).oper[1]^.reg = NR_RAX) then
  6171. begin
  6172. { Change:
  6173. movq %rdx,%rax
  6174. sarq $63,%rdx
  6175. To:
  6176. movq %rdx,%rax
  6177. cqto
  6178. Note that this creates a dependency between the two instructions,
  6179. so only perform if optimising for size.
  6180. }
  6181. DebugMsg(SPeepholeOptimization + 'MovSar2MovCqto', p);
  6182. taicpu(hp1).opcode := A_CQO;
  6183. taicpu(hp1).opsize := S_NO;
  6184. taicpu(hp1).clearop(1);
  6185. taicpu(hp1).clearop(0);
  6186. taicpu(hp1).ops:=0;
  6187. {$endif x86_64}
  6188. end;
  6189. end;
  6190. end
  6191. else if MatchInstruction(hp1, A_MOV, []) and
  6192. (taicpu(hp1).oper[1]^.typ = top_reg) then
  6193. { Though "GetNextInstruction" could be factored out, along with
  6194. the instructions that depend on hp2, it is an expensive call that
  6195. should be delayed for as long as possible, hence we do cheaper
  6196. checks first that are likely to be False. [Kit] }
  6197. begin
  6198. if (
  6199. (
  6200. MatchOperand(taicpu(p).oper[1]^, NR_EDX) and
  6201. (taicpu(hp1).oper[1]^.reg = NR_EAX) and
  6202. (
  6203. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6204. MatchOperand(taicpu(hp1).oper[0]^, NR_EDX)
  6205. )
  6206. ) or
  6207. (
  6208. MatchOperand(taicpu(p).oper[1]^, NR_EAX) and
  6209. (taicpu(hp1).oper[1]^.reg = NR_EDX) and
  6210. (
  6211. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6212. MatchOperand(taicpu(hp1).oper[0]^, NR_EAX)
  6213. )
  6214. )
  6215. ) and
  6216. GetNextInstruction(hp1, hp2) and
  6217. MatchInstruction(hp2, A_SAR, []) and
  6218. MatchOperand(taicpu(hp2).oper[0]^, 31) then
  6219. begin
  6220. if MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) then
  6221. begin
  6222. { Change:
  6223. movl r/m,%edx movl r/m,%eax movl r/m,%edx movl r/m,%eax
  6224. movl %edx,%eax or movl %eax,%edx or movl r/m,%eax or movl r/m,%edx
  6225. sarl $31,%edx sarl $31,%edx sarl $31,%edx sarl $31,%edx
  6226. To:
  6227. movl r/m,%eax <- Note the change in register
  6228. cltd
  6229. }
  6230. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCltd', p);
  6231. AllocRegBetween(NR_EAX, p, hp1, UsedRegs);
  6232. taicpu(p).loadreg(1, NR_EAX);
  6233. taicpu(hp1).opcode := A_CDQ;
  6234. taicpu(hp1).clearop(1);
  6235. taicpu(hp1).clearop(0);
  6236. taicpu(hp1).ops:=0;
  6237. RemoveInstruction(hp2);
  6238. (*
  6239. {$ifdef x86_64}
  6240. end
  6241. else if MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) and
  6242. { This code sequence does not get generated - however it might become useful
  6243. if and when 128-bit signed integer types make an appearance, so the code
  6244. is kept here for when it is eventually needed. [Kit] }
  6245. (
  6246. (
  6247. (taicpu(hp1).oper[1]^.reg = NR_RAX) and
  6248. (
  6249. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6250. MatchOperand(taicpu(hp1).oper[0]^, NR_RDX)
  6251. )
  6252. ) or
  6253. (
  6254. (taicpu(hp1).oper[1]^.reg = NR_RDX) and
  6255. (
  6256. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6257. MatchOperand(taicpu(hp1).oper[0]^, NR_RAX)
  6258. )
  6259. )
  6260. ) and
  6261. GetNextInstruction(hp1, hp2) and
  6262. MatchInstruction(hp2, A_SAR, [S_Q]) and
  6263. MatchOperand(taicpu(hp2).oper[0]^, 63) and
  6264. MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) then
  6265. begin
  6266. { Change:
  6267. movq r/m,%rdx movq r/m,%rax movq r/m,%rdx movq r/m,%rax
  6268. movq %rdx,%rax or movq %rax,%rdx or movq r/m,%rax or movq r/m,%rdx
  6269. sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx
  6270. To:
  6271. movq r/m,%rax <- Note the change in register
  6272. cqto
  6273. }
  6274. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCqto', p);
  6275. AllocRegBetween(NR_RAX, p, hp1, UsedRegs);
  6276. taicpu(p).loadreg(1, NR_RAX);
  6277. taicpu(hp1).opcode := A_CQO;
  6278. taicpu(hp1).clearop(1);
  6279. taicpu(hp1).clearop(0);
  6280. taicpu(hp1).ops:=0;
  6281. RemoveInstruction(hp2);
  6282. {$endif x86_64}
  6283. *)
  6284. end;
  6285. end;
  6286. {$ifdef x86_64}
  6287. end
  6288. else if (taicpu(p).opsize = S_L) and
  6289. (taicpu(p).oper[1]^.typ = top_reg) and
  6290. (
  6291. MatchInstruction(hp1, A_MOV,[]) and
  6292. (taicpu(hp1).opsize = S_L) and
  6293. (taicpu(hp1).oper[1]^.typ = top_reg)
  6294. ) and (
  6295. GetNextInstruction(hp1, hp2) and
  6296. (tai(hp2).typ=ait_instruction) and
  6297. (taicpu(hp2).opsize = S_Q) and
  6298. (
  6299. (
  6300. MatchInstruction(hp2, A_ADD,[]) and
  6301. (taicpu(hp2).opsize = S_Q) and
  6302. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  6303. (
  6304. (
  6305. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  6306. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6307. ) or (
  6308. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  6309. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  6310. )
  6311. )
  6312. ) or (
  6313. MatchInstruction(hp2, A_LEA,[]) and
  6314. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  6315. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  6316. (
  6317. (
  6318. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  6319. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6320. ) or (
  6321. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  6322. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  6323. )
  6324. ) and (
  6325. (
  6326. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6327. ) or (
  6328. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  6329. )
  6330. )
  6331. )
  6332. )
  6333. ) and (
  6334. GetNextInstruction(hp2, hp3) and
  6335. MatchInstruction(hp3, A_SHR,[]) and
  6336. (taicpu(hp3).opsize = S_Q) and
  6337. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  6338. (taicpu(hp3).oper[0]^.val = 1) and
  6339. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  6340. ) then
  6341. begin
  6342. { Change movl x, reg1d movl x, reg1d
  6343. movl y, reg2d movl y, reg2d
  6344. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  6345. shrq $1, reg1q shrq $1, reg1q
  6346. ( reg1d and reg2d can be switched around in the first two instructions )
  6347. To movl x, reg1d
  6348. addl y, reg1d
  6349. rcrl $1, reg1d
  6350. This corresponds to the common expression (x + y) shr 1, where
  6351. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  6352. smaller code, but won't account for x + y causing an overflow). [Kit]
  6353. }
  6354. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  6355. { Change first MOV command to have the same register as the final output }
  6356. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  6357. else
  6358. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  6359. { Change second MOV command to an ADD command. This is easier than
  6360. converting the existing command because it means we don't have to
  6361. touch 'y', which might be a complicated reference, and also the
  6362. fact that the third command might either be ADD or LEA. [Kit] }
  6363. taicpu(hp1).opcode := A_ADD;
  6364. { Delete old ADD/LEA instruction }
  6365. RemoveInstruction(hp2);
  6366. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  6367. taicpu(hp3).opcode := A_RCR;
  6368. taicpu(hp3).changeopsize(S_L);
  6369. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  6370. {$endif x86_64}
  6371. end;
  6372. end;
  6373. function TX86AsmOptimizer.OptPass2Movx(var p : tai) : boolean;
  6374. var
  6375. ThisReg: TRegister;
  6376. MinSize, MaxSize, TrySmaller, TargetSize: TOpSize;
  6377. TargetSubReg: TSubRegister;
  6378. hp1, hp2: tai;
  6379. RegInUse, RegChanged, p_removed: Boolean;
  6380. { Store list of found instructions so we don't have to call
  6381. GetNextInstructionUsingReg multiple times }
  6382. InstrList: array of taicpu;
  6383. InstrMax, Index: Integer;
  6384. UpperLimit, TrySmallerLimit: TCgInt;
  6385. PreMessage: string;
  6386. { Data flow analysis }
  6387. TestValMin, TestValMax: TCgInt;
  6388. SmallerOverflow: Boolean;
  6389. begin
  6390. Result := False;
  6391. p_removed := False;
  6392. { This is anything but quick! }
  6393. if not(cs_opt_level2 in current_settings.optimizerswitches) then
  6394. Exit;
  6395. SetLength(InstrList, 0);
  6396. InstrMax := -1;
  6397. ThisReg := taicpu(p).oper[1]^.reg;
  6398. case taicpu(p).opsize of
  6399. S_BW, S_BL:
  6400. begin
  6401. {$if defined(i386) or defined(i8086)}
  6402. { If the target size is 8-bit, make sure we can actually encode it }
  6403. if not (GetSupReg(ThisReg) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX]) then
  6404. Exit;
  6405. {$endif i386 or i8086}
  6406. UpperLimit := $FF;
  6407. MinSize := S_B;
  6408. if taicpu(p).opsize = S_BW then
  6409. MaxSize := S_W
  6410. else
  6411. MaxSize := S_L;
  6412. end;
  6413. S_WL:
  6414. begin
  6415. UpperLimit := $FFFF;
  6416. MinSize := S_W;
  6417. MaxSize := S_L;
  6418. end
  6419. else
  6420. InternalError(2020112301);
  6421. end;
  6422. TestValMin := 0;
  6423. TestValMax := UpperLimit;
  6424. TrySmallerLimit := UpperLimit;
  6425. TrySmaller := S_NO;
  6426. SmallerOverflow := False;
  6427. RegChanged := False;
  6428. hp1 := p;
  6429. while GetNextInstructionUsingReg(hp1, hp1, ThisReg) and
  6430. (hp1.typ = ait_instruction) and
  6431. (
  6432. { Under -O1 and -O2, GetNextInstructionUsingReg may return an
  6433. instruction that doesn't actually contain ThisReg }
  6434. (cs_opt_level3 in current_settings.optimizerswitches) or
  6435. RegInInstruction(ThisReg, hp1)
  6436. ) do
  6437. begin
  6438. case taicpu(hp1).opcode of
  6439. A_INC,A_DEC:
  6440. begin
  6441. { Has to be an exact match on the register }
  6442. if not MatchOperand(taicpu(hp1).oper[0]^, ThisReg) then
  6443. Break;
  6444. if taicpu(hp1).opcode = A_INC then
  6445. begin
  6446. Inc(TestValMin);
  6447. Inc(TestValMax);
  6448. end
  6449. else
  6450. begin
  6451. Dec(TestValMin);
  6452. Dec(TestValMax);
  6453. end;
  6454. end;
  6455. A_CMP:
  6456. begin
  6457. if (taicpu(hp1).oper[1]^.typ <> top_reg) or
  6458. { Has to be an exact match on the register }
  6459. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6460. (taicpu(hp1).oper[0]^.typ <> top_const) or
  6461. { Make sure the comparison value is not smaller than the
  6462. smallest allowed signed value for the minimum size (e.g.
  6463. -128 for 8-bit) }
  6464. not (
  6465. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6466. { Is it in the negative range? }
  6467. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val))
  6468. ) then
  6469. Break;
  6470. TestValMin := TestValMin - taicpu(hp1).oper[0]^.val;
  6471. TestValMax := TestValMax - taicpu(hp1).oper[0]^.val;
  6472. if (TestValMin < TrySmallerLimit) or (TestValMax < TrySmallerLimit) or
  6473. (TestValMin > UpperLimit) or (TestValMax > UpperLimit) then
  6474. { Overflow }
  6475. Break;
  6476. { Check to see if the active register is used afterwards }
  6477. TransferUsedRegs(TmpUsedRegs);
  6478. IncludeRegInUsedRegs(ThisReg, TmpUsedRegs);
  6479. if not RegUsedAfterInstruction(ThisReg, hp1, TmpUsedRegs) then
  6480. begin
  6481. case MinSize of
  6482. S_B:
  6483. TargetSubReg := R_SUBL;
  6484. S_W:
  6485. TargetSubReg := R_SUBW;
  6486. else
  6487. InternalError(2021051002);
  6488. end;
  6489. { Update the register to its new size }
  6490. setsubreg(ThisReg, TargetSubReg);
  6491. taicpu(hp1).oper[1]^.reg := ThisReg;
  6492. taicpu(hp1).opsize := MinSize;
  6493. { Convert the input MOVZX to a MOV }
  6494. if (taicpu(p).oper[0]^.typ = top_reg) and
  6495. SuperRegistersEqual(taicpu(p).oper[0]^.reg, ThisReg) then
  6496. begin
  6497. { Or remove it completely! }
  6498. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 1a', p);
  6499. RemoveCurrentP(p);
  6500. p_removed := True;
  6501. end
  6502. else
  6503. begin
  6504. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 1a', p);
  6505. taicpu(p).opcode := A_MOV;
  6506. taicpu(p).oper[1]^.reg := ThisReg;
  6507. taicpu(p).opsize := MinSize;
  6508. end;
  6509. if (InstrMax >= 0) then
  6510. begin
  6511. for Index := 0 to InstrMax do
  6512. begin
  6513. { If p_removed is true, then the original MOV/Z was removed
  6514. and removing the AND instruction may not be safe if it
  6515. appears first }
  6516. if (InstrList[Index].oper[InstrList[Index].ops - 1]^.typ <> top_reg) then
  6517. InternalError(2020112311);
  6518. if InstrList[Index].oper[0]^.typ = top_reg then
  6519. InstrList[Index].oper[0]^.reg := ThisReg;
  6520. InstrList[Index].oper[InstrList[Index].ops - 1]^.reg := ThisReg;
  6521. InstrList[Index].opsize := MinSize;
  6522. end;
  6523. end;
  6524. Result := True;
  6525. Exit;
  6526. end;
  6527. end;
  6528. { OR and XOR are not included because they can too easily fool
  6529. the data flow analysis (they can cause non-linear behaviour) }
  6530. A_ADD,A_SUB,A_AND,A_SHL,A_SHR:
  6531. begin
  6532. if
  6533. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  6534. { Has to be an exact match on the register }
  6535. (taicpu(hp1).oper[1]^.reg <> ThisReg) or not
  6536. (
  6537. (
  6538. (taicpu(hp1).oper[0]^.typ = top_const) and
  6539. (
  6540. (
  6541. (taicpu(hp1).opcode = A_SHL) and
  6542. (
  6543. ((MinSize = S_B) and (taicpu(hp1).oper[0]^.val < 8)) or
  6544. ((MinSize = S_W) and (taicpu(hp1).oper[0]^.val < 16)) or
  6545. ((MinSize = S_L) and (taicpu(hp1).oper[0]^.val < 32))
  6546. )
  6547. ) or (
  6548. (taicpu(hp1).opcode <> A_SHL) and
  6549. (
  6550. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6551. { Is it in the negative range? }
  6552. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val))
  6553. )
  6554. )
  6555. )
  6556. ) or (
  6557. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) and
  6558. ((taicpu(hp1).opcode = A_ADD) or (taicpu(hp1).opcode = A_AND) or (taicpu(hp1).opcode = A_SUB))
  6559. )
  6560. ) then
  6561. Break;
  6562. case taicpu(hp1).opcode of
  6563. A_ADD:
  6564. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  6565. begin
  6566. TestValMin := TestValMin * 2;
  6567. TestValMax := TestValMax * 2;
  6568. end
  6569. else
  6570. begin
  6571. TestValMin := TestValMin + taicpu(hp1).oper[0]^.val;
  6572. TestValMax := TestValMax + taicpu(hp1).oper[0]^.val;
  6573. end;
  6574. A_SUB:
  6575. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  6576. begin
  6577. TestValMin := 0;
  6578. TestValMax := 0;
  6579. end
  6580. else
  6581. begin
  6582. TestValMin := TestValMin - taicpu(hp1).oper[0]^.val;
  6583. TestValMax := TestValMax - taicpu(hp1).oper[0]^.val;
  6584. end;
  6585. A_AND:
  6586. if (taicpu(hp1).oper[0]^.typ = top_const) then
  6587. begin
  6588. { we might be able to go smaller if AND appears first }
  6589. if InstrMax = -1 then
  6590. case MinSize of
  6591. S_B:
  6592. ;
  6593. S_W:
  6594. if ((taicpu(hp1).oper[0]^.val and $FF) = taicpu(hp1).oper[0]^.val) or
  6595. ((not(taicpu(hp1).oper[0]^.val) and $7F) = (not taicpu(hp1).oper[0]^.val)) then
  6596. begin
  6597. TrySmaller := S_B;
  6598. TrySmallerLimit := $FF;
  6599. end;
  6600. S_L:
  6601. if ((taicpu(hp1).oper[0]^.val and $FF) = taicpu(hp1).oper[0]^.val) or
  6602. ((not(taicpu(hp1).oper[0]^.val) and $7F) = (not taicpu(hp1).oper[0]^.val)) then
  6603. begin
  6604. TrySmaller := S_B;
  6605. TrySmallerLimit := $FF;
  6606. end
  6607. else if ((taicpu(hp1).oper[0]^.val and $FFFF) = taicpu(hp1).oper[0]^.val) or
  6608. ((not(taicpu(hp1).oper[0]^.val) and $7FFF) = (not taicpu(hp1).oper[0]^.val)) then
  6609. begin
  6610. TrySmaller := S_W;
  6611. TrySmallerLimit := $FFFF;
  6612. end;
  6613. else
  6614. InternalError(2020112320);
  6615. end;
  6616. TestValMin := TestValMin and taicpu(hp1).oper[0]^.val;
  6617. TestValMax := TestValMax and taicpu(hp1).oper[0]^.val;
  6618. end;
  6619. A_SHL:
  6620. begin
  6621. TestValMin := TestValMin shl taicpu(hp1).oper[0]^.val;
  6622. TestValMax := TestValMax shl taicpu(hp1).oper[0]^.val;
  6623. end;
  6624. A_SHR:
  6625. begin
  6626. { we might be able to go smaller if SHR appears first }
  6627. if InstrMax = -1 then
  6628. case MinSize of
  6629. S_B:
  6630. ;
  6631. S_W:
  6632. if (taicpu(hp1).oper[0]^.val >= 8) then
  6633. begin
  6634. TrySmaller := S_B;
  6635. TrySmallerLimit := $FF;
  6636. end;
  6637. S_L:
  6638. if (taicpu(hp1).oper[0]^.val >= 24) then
  6639. begin
  6640. TrySmaller := S_B;
  6641. TrySmallerLimit := $FF;
  6642. end
  6643. else if (taicpu(hp1).oper[0]^.val >= 16) then
  6644. begin
  6645. TrySmaller := S_W;
  6646. TrySmallerLimit := $FFFF;
  6647. end;
  6648. else
  6649. InternalError(2020112321);
  6650. end;
  6651. TestValMin := TestValMin shr taicpu(hp1).oper[0]^.val;
  6652. TestValMax := TestValMax shr taicpu(hp1).oper[0]^.val;
  6653. end;
  6654. else
  6655. InternalError(2020112303);
  6656. end;
  6657. end;
  6658. (*
  6659. A_IMUL:
  6660. case taicpu(hp1).ops of
  6661. 2:
  6662. begin
  6663. if not MatchOpType(hp1, top_reg, top_reg) or
  6664. { Has to be an exact match on the register }
  6665. (taicpu(hp1).oper[0]^.reg <> ThisReg) or
  6666. (taicpu(hp1).oper[1]^.reg <> ThisReg) then
  6667. Break;
  6668. TestValMin := TestValMin * TestValMin;
  6669. TestValMax := TestValMax * TestValMax;
  6670. end;
  6671. 3:
  6672. begin
  6673. if not MatchOpType(hp1, top_const, top_reg, top_reg) or
  6674. { Has to be an exact match on the register }
  6675. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6676. (taicpu(hp1).oper[2]^.reg <> ThisReg) or
  6677. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6678. { Is it in the negative range? }
  6679. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val)) then
  6680. Break;
  6681. TestValMin := TestValMin * taicpu(hp1).oper[0]^.val;
  6682. TestValMax := TestValMax * taicpu(hp1).oper[0]^.val;
  6683. end;
  6684. else
  6685. Break;
  6686. end;
  6687. A_IDIV:
  6688. case taicpu(hp1).ops of
  6689. 3:
  6690. begin
  6691. if not MatchOpType(hp1, top_const, top_reg, top_reg) or
  6692. { Has to be an exact match on the register }
  6693. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6694. (taicpu(hp1).oper[2]^.reg <> ThisReg) or
  6695. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6696. { Is it in the negative range? }
  6697. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val)) then
  6698. Break;
  6699. TestValMin := TestValMin div taicpu(hp1).oper[0]^.val;
  6700. TestValMax := TestValMax div taicpu(hp1).oper[0]^.val;
  6701. end;
  6702. else
  6703. Break;
  6704. end;
  6705. *)
  6706. A_MOVZX:
  6707. begin
  6708. if not MatchOpType(taicpu(hp1), top_reg, top_reg) then
  6709. Break;
  6710. if not SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, ThisReg) then
  6711. begin
  6712. { Because hp1 was obtained via GetNextInstructionUsingReg
  6713. and ThisReg doesn't appear in the first operand, it
  6714. must appear in the second operand and hence gets
  6715. overwritten }
  6716. if (InstrMax = -1) and
  6717. Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, ThisReg) then
  6718. begin
  6719. { The two MOVZX instructions are adjacent, so remove the first one }
  6720. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 5', p);
  6721. RemoveCurrentP(p);
  6722. Result := True;
  6723. Exit;
  6724. end;
  6725. Break;
  6726. end;
  6727. { The objective here is to try to find a combination that
  6728. removes one of the MOV/Z instructions. }
  6729. case taicpu(hp1).opsize of
  6730. S_WL:
  6731. if (MinSize in [S_B, S_W]) then
  6732. begin
  6733. TargetSize := S_L;
  6734. TargetSubReg := R_SUBD;
  6735. end
  6736. else if ((TrySmaller in [S_B, S_W]) and not SmallerOverflow) then
  6737. begin
  6738. TargetSize := TrySmaller;
  6739. if TrySmaller = S_B then
  6740. TargetSubReg := R_SUBL
  6741. else
  6742. TargetSubReg := R_SUBW;
  6743. end
  6744. else
  6745. Break;
  6746. S_BW:
  6747. if (MinSize in [S_B, S_W]) then
  6748. begin
  6749. TargetSize := S_W;
  6750. TargetSubReg := R_SUBW;
  6751. end
  6752. else if ((TrySmaller = S_B) and not SmallerOverflow) then
  6753. begin
  6754. TargetSize := S_B;
  6755. TargetSubReg := R_SUBL;
  6756. end
  6757. else
  6758. Break;
  6759. S_BL:
  6760. if (MinSize in [S_B, S_W]) then
  6761. begin
  6762. TargetSize := S_L;
  6763. TargetSubReg := R_SUBD;
  6764. end
  6765. else if ((TrySmaller = S_B) and not SmallerOverflow) then
  6766. begin
  6767. TargetSize := S_B;
  6768. TargetSubReg := R_SUBL;
  6769. end
  6770. else
  6771. Break;
  6772. else
  6773. InternalError(2020112302);
  6774. end;
  6775. { Update the register to its new size }
  6776. setsubreg(ThisReg, TargetSubReg);
  6777. if TargetSize = MinSize then
  6778. begin
  6779. { Convert the input MOVZX to a MOV }
  6780. if (taicpu(p).oper[0]^.typ = top_reg) and
  6781. SuperRegistersEqual(taicpu(p).oper[0]^.reg, ThisReg) then
  6782. begin
  6783. { Or remove it completely! }
  6784. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 1', p);
  6785. RemoveCurrentP(p);
  6786. p_removed := True;
  6787. end
  6788. else
  6789. begin
  6790. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 1', p);
  6791. taicpu(p).opcode := A_MOV;
  6792. taicpu(p).oper[1]^.reg := ThisReg;
  6793. taicpu(p).opsize := TargetSize;
  6794. end;
  6795. Result := True;
  6796. end
  6797. else if TargetSize <> MaxSize then
  6798. begin
  6799. case MaxSize of
  6800. S_L:
  6801. if TargetSize = S_W then
  6802. begin
  6803. DebugMsg(SPeepholeOptimization + 'movzbl2movzbw', p);
  6804. taicpu(p).opsize := S_BW;
  6805. taicpu(p).oper[1]^.reg := ThisReg;
  6806. Result := True;
  6807. end
  6808. else
  6809. InternalError(2020112341);
  6810. S_W:
  6811. if TargetSize = S_L then
  6812. begin
  6813. DebugMsg(SPeepholeOptimization + 'movzbw2movzbl', p);
  6814. taicpu(p).opsize := S_BL;
  6815. taicpu(p).oper[1]^.reg := ThisReg;
  6816. Result := True;
  6817. end
  6818. else
  6819. InternalError(2020112342);
  6820. else
  6821. ;
  6822. end;
  6823. end;
  6824. if (MaxSize = TargetSize) or
  6825. ((TargetSize = S_L) and (taicpu(hp1).opsize in [S_L, S_BL, S_WL])) or
  6826. ((TargetSize = S_W) and (taicpu(hp1).opsize in [S_W, S_BW])) then
  6827. begin
  6828. { Convert the output MOVZX to a MOV }
  6829. if SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, ThisReg) then
  6830. begin
  6831. { Or remove it completely! }
  6832. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 2', hp1);
  6833. { Be careful; if p = hp1 and p was also removed, p
  6834. will become a dangling pointer }
  6835. if p = hp1 then
  6836. RemoveCurrentp(p) { p = hp1 and will then become the next instruction }
  6837. else
  6838. RemoveInstruction(hp1);
  6839. end
  6840. else
  6841. begin
  6842. taicpu(hp1).opcode := A_MOV;
  6843. taicpu(hp1).oper[0]^.reg := ThisReg;
  6844. taicpu(hp1).opsize := TargetSize;
  6845. { Check to see if the active register is used afterwards;
  6846. if not, we can change it and make a saving. }
  6847. RegInUse := False;
  6848. TransferUsedRegs(TmpUsedRegs);
  6849. { The target register may be marked as in use to cross
  6850. a jump to a distant label, so exclude it }
  6851. ExcludeRegFromUsedRegs(taicpu(hp1).oper[1]^.reg, TmpUsedRegs);
  6852. hp2 := p;
  6853. repeat
  6854. UpdateUsedRegs(TmpUsedRegs, tai(hp2.next));
  6855. { Explicitly check for the excluded register (don't include the first
  6856. instruction as it may be reading from here }
  6857. if ((p <> hp2) and (RegInInstruction(taicpu(hp1).oper[1]^.reg, hp2))) or
  6858. RegInUsedRegs(taicpu(hp1).oper[1]^.reg, TmpUsedRegs) then
  6859. begin
  6860. RegInUse := True;
  6861. Break;
  6862. end;
  6863. if not GetNextInstruction(hp2, hp2) then
  6864. InternalError(2020112340);
  6865. until (hp2 = hp1);
  6866. if not RegInUse and not RegUsedAfterInstruction(ThisReg, hp1, TmpUsedRegs) then
  6867. begin
  6868. DebugMsg(SPeepholeOptimization + 'Simplified register usage so ' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' = ' + debug_regname(taicpu(p).oper[1]^.reg), p);
  6869. ThisReg := taicpu(hp1).oper[1]^.reg;
  6870. RegChanged := True;
  6871. TransferUsedRegs(TmpUsedRegs);
  6872. AllocRegBetween(ThisReg, p, hp1, TmpUsedRegs);
  6873. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 3', hp1);
  6874. if p = hp1 then
  6875. RemoveCurrentp(p) { p = hp1 and will then become the next instruction }
  6876. else
  6877. RemoveInstruction(hp1);
  6878. { Instruction will become "mov %reg,%reg" }
  6879. if not p_removed and (taicpu(p).opcode = A_MOV) and
  6880. MatchOperand(taicpu(p).oper[0]^, ThisReg) then
  6881. begin
  6882. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 6', p);
  6883. RemoveCurrentP(p);
  6884. p_removed := True;
  6885. end
  6886. else
  6887. taicpu(p).oper[1]^.reg := ThisReg;
  6888. Result := True;
  6889. end
  6890. else
  6891. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 2', hp1);
  6892. end;
  6893. end
  6894. else
  6895. InternalError(2020112330);
  6896. { Now go through every instruction we found and change the
  6897. size. If TargetSize = MaxSize, then almost no changes are
  6898. needed and Result can remain False if it hasn't been set
  6899. yet.
  6900. If RegChanged is True, then the register requires changing
  6901. and so the point about TargetSize = MaxSize doesn't apply. }
  6902. if ((TargetSize <> MaxSize) or RegChanged) and (InstrMax >= 0) then
  6903. begin
  6904. for Index := 0 to InstrMax do
  6905. begin
  6906. { If p_removed is true, then the original MOV/Z was removed
  6907. and removing the AND instruction may not be safe if it
  6908. appears first }
  6909. if (InstrList[Index].oper[InstrList[Index].ops - 1]^.typ <> top_reg) then
  6910. InternalError(2020112310);
  6911. if InstrList[Index].oper[0]^.typ = top_reg then
  6912. InstrList[Index].oper[0]^.reg := ThisReg;
  6913. InstrList[Index].oper[InstrList[Index].ops - 1]^.reg := ThisReg;
  6914. InstrList[Index].opsize := TargetSize;
  6915. end;
  6916. Result := True;
  6917. end;
  6918. Exit;
  6919. end;
  6920. else
  6921. { This includes ADC, SBB, IDIV and SAR }
  6922. Break;
  6923. end;
  6924. if (TestValMin < 0) or (TestValMax < 0) or
  6925. (TestValMin > UpperLimit) or (TestValMax > UpperLimit) then
  6926. { Overflow }
  6927. Break
  6928. else if not SmallerOverflow and (TrySmaller <> S_NO) and
  6929. ((TestValMin > TrySmallerLimit) or (TestValMax > TrySmallerLimit)) then
  6930. SmallerOverflow := True;
  6931. { Contains highest index (so instruction count - 1) }
  6932. Inc(InstrMax);
  6933. if InstrMax > High(InstrList) then
  6934. SetLength(InstrList, InstrMax + LIST_STEP_SIZE);
  6935. InstrList[InstrMax] := taicpu(hp1);
  6936. end;
  6937. end;
  6938. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  6939. var
  6940. hp1 : tai;
  6941. begin
  6942. Result:=false;
  6943. if (taicpu(p).ops >= 2) and
  6944. ((taicpu(p).oper[0]^.typ = top_const) or
  6945. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  6946. (taicpu(p).oper[1]^.typ = top_reg) and
  6947. ((taicpu(p).ops = 2) or
  6948. ((taicpu(p).oper[2]^.typ = top_reg) and
  6949. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  6950. GetLastInstruction(p,hp1) and
  6951. MatchInstruction(hp1,A_MOV,[]) and
  6952. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  6953. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  6954. begin
  6955. TransferUsedRegs(TmpUsedRegs);
  6956. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) or
  6957. ((taicpu(p).ops = 3) and (taicpu(p).oper[1]^.reg=taicpu(p).oper[2]^.reg)) then
  6958. { change
  6959. mov reg1,reg2
  6960. imul y,reg2 to imul y,reg1,reg2 }
  6961. begin
  6962. taicpu(p).ops := 3;
  6963. taicpu(p).loadreg(2,taicpu(p).oper[1]^.reg);
  6964. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  6965. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  6966. RemoveInstruction(hp1);
  6967. result:=true;
  6968. end;
  6969. end;
  6970. end;
  6971. procedure TX86AsmOptimizer.ConvertJumpToRET(const p: tai; const ret_p: tai);
  6972. var
  6973. ThisLabel: TAsmLabel;
  6974. begin
  6975. ThisLabel := tasmlabel(taicpu(p).oper[0]^.ref^.symbol);
  6976. ThisLabel.decrefs;
  6977. taicpu(p).opcode := A_RET;
  6978. taicpu(p).is_jmp := false;
  6979. taicpu(p).ops := taicpu(ret_p).ops;
  6980. case taicpu(ret_p).ops of
  6981. 0:
  6982. taicpu(p).clearop(0);
  6983. 1:
  6984. taicpu(p).loadconst(0,taicpu(ret_p).oper[0]^.val);
  6985. else
  6986. internalerror(2016041301);
  6987. end;
  6988. { If the original label is now dead, it might turn out that the label
  6989. immediately follows p. As a result, everything beyond it, which will
  6990. be just some final register configuration and a RET instruction, is
  6991. now dead code. [Kit] }
  6992. { NOTE: This is much faster than introducing a OptPass2RET routine and
  6993. running RemoveDeadCodeAfterJump for each RET instruction, because
  6994. this optimisation rarely happens and most RETs appear at the end of
  6995. routines where there is nothing that can be stripped. [Kit] }
  6996. if not ThisLabel.is_used then
  6997. RemoveDeadCodeAfterJump(p);
  6998. end;
  6999. function TX86AsmOptimizer.OptPass2SETcc(var p: tai): boolean;
  7000. var
  7001. hp1,hp2,next: tai; SetC, JumpC: TAsmCond;
  7002. Unconditional, PotentialModified: Boolean;
  7003. OperPtr: POper;
  7004. NewRef: TReference;
  7005. InstrList: array of taicpu;
  7006. InstrMax, Index: Integer;
  7007. const
  7008. {$ifdef DEBUG_AOPTCPU}
  7009. SNoFlags: shortstring = ' so the flags aren''t modified';
  7010. {$else DEBUG_AOPTCPU}
  7011. SNoFlags = '';
  7012. {$endif DEBUG_AOPTCPU}
  7013. begin
  7014. Result:=false;
  7015. if MatchOpType(taicpu(p),top_reg) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  7016. begin
  7017. if MatchInstruction(hp1, A_TEST, [S_B]) and
  7018. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  7019. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  7020. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  7021. GetNextInstruction(hp1, hp2) and
  7022. MatchInstruction(hp2, A_Jcc, A_SETcc, []) then
  7023. { Change from: To:
  7024. set(C) %reg j(~C) label
  7025. test %reg,%reg/cmp $0,%reg
  7026. je label
  7027. set(C) %reg j(C) label
  7028. test %reg,%reg/cmp $0,%reg
  7029. jne label
  7030. (Also do something similar with sete/setne instead of je/jne)
  7031. }
  7032. begin
  7033. { Before we do anything else, we need to check the instructions
  7034. in between SETcc and TEST to make sure they don't modify the
  7035. FLAGS register - if -O2 or under, there won't be any
  7036. instructions between SET and TEST }
  7037. TransferUsedRegs(TmpUsedRegs);
  7038. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  7039. if (cs_opt_level3 in current_settings.optimizerswitches) then
  7040. begin
  7041. next := p;
  7042. SetLength(InstrList, 0);
  7043. InstrMax := -1;
  7044. PotentialModified := False;
  7045. { Make a note of every instruction that modifies the FLAGS
  7046. register }
  7047. while GetNextInstruction(next, next) and (next <> hp1) do
  7048. begin
  7049. if next.typ <> ait_instruction then
  7050. { GetNextInstructionUsingReg should have returned False }
  7051. InternalError(2021051701);
  7052. if RegModifiedByInstruction(NR_DEFAULTFLAGS, next) then
  7053. begin
  7054. case taicpu(next).opcode of
  7055. A_SETcc,
  7056. A_CMOVcc,
  7057. A_Jcc:
  7058. begin
  7059. if PotentialModified then
  7060. { Not safe because the flags were modified earlier }
  7061. Exit
  7062. else
  7063. { Condition is the same as the initial SETcc, so this is safe
  7064. (don't add to instruction list though) }
  7065. Continue;
  7066. end;
  7067. A_ADD:
  7068. begin
  7069. if (taicpu(next).opsize = S_B) or
  7070. { LEA doesn't support 8-bit operands }
  7071. (taicpu(next).oper[1]^.typ <> top_reg) or
  7072. { Must write to a register }
  7073. (taicpu(next).oper[0]^.typ = top_ref) then
  7074. { Require a constant or a register }
  7075. Exit;
  7076. PotentialModified := True;
  7077. end;
  7078. A_SUB:
  7079. begin
  7080. if (taicpu(next).opsize = S_B) or
  7081. { LEA doesn't support 8-bit operands }
  7082. (taicpu(next).oper[1]^.typ <> top_reg) or
  7083. { Must write to a register }
  7084. (taicpu(next).oper[0]^.typ <> top_const) or
  7085. (taicpu(next).oper[0]^.val = $80000000) then
  7086. { Can't subtract a register with LEA - also
  7087. check that the value isn't -2^31, as this
  7088. can't be negated }
  7089. Exit;
  7090. PotentialModified := True;
  7091. end;
  7092. A_SAL,
  7093. A_SHL:
  7094. begin
  7095. if (taicpu(next).opsize = S_B) or
  7096. { LEA doesn't support 8-bit operands }
  7097. (taicpu(next).oper[1]^.typ <> top_reg) or
  7098. { Must write to a register }
  7099. (taicpu(next).oper[0]^.typ <> top_const) or
  7100. (taicpu(next).oper[0]^.val < 0) or
  7101. (taicpu(next).oper[0]^.val > 3) then
  7102. Exit;
  7103. PotentialModified := True;
  7104. end;
  7105. A_IMUL:
  7106. begin
  7107. if (taicpu(next).ops <> 3) or
  7108. (taicpu(next).oper[1]^.typ <> top_reg) or
  7109. { Must write to a register }
  7110. (taicpu(next).oper[2]^.val in [2,3,4,5,8,9]) then
  7111. { We can convert "imul x,%reg1,%reg2" (where x = 2, 4 or 8)
  7112. to "lea (%reg1,x),%reg2". If x = 3, 5 or 9, we can
  7113. change this to "lea (%reg1,%reg1,(x-1)),%reg2" }
  7114. Exit
  7115. else
  7116. PotentialModified := True;
  7117. end;
  7118. else
  7119. { Don't know how to change this, so abort }
  7120. Exit;
  7121. end;
  7122. { Contains highest index (so instruction count - 1) }
  7123. Inc(InstrMax);
  7124. if InstrMax > High(InstrList) then
  7125. SetLength(InstrList, InstrMax + LIST_STEP_SIZE);
  7126. InstrList[InstrMax] := taicpu(next);
  7127. end;
  7128. UpdateUsedRegs(TmpUsedRegs, tai(next.next));
  7129. end;
  7130. if not Assigned(next) or (next <> hp1) then
  7131. { It should be equal to hp1 }
  7132. InternalError(2021051702);
  7133. { Cycle through each instruction and check to see if we can
  7134. change them to versions that don't modify the flags }
  7135. if (InstrMax >= 0) then
  7136. begin
  7137. for Index := 0 to InstrMax do
  7138. case InstrList[Index].opcode of
  7139. A_ADD:
  7140. begin
  7141. DebugMsg(SPeepholeOptimization + 'ADD -> LEA' + SNoFlags, InstrList[Index]);
  7142. InstrList[Index].opcode := A_LEA;
  7143. reference_reset(NewRef, 1, []);
  7144. NewRef.base := InstrList[Index].oper[1]^.reg;
  7145. if InstrList[Index].oper[0]^.typ = top_reg then
  7146. begin
  7147. NewRef.index := InstrList[Index].oper[0]^.reg;
  7148. NewRef.scalefactor := 1;
  7149. end
  7150. else
  7151. NewRef.offset := InstrList[Index].oper[0]^.val;
  7152. InstrList[Index].loadref(0, NewRef);
  7153. end;
  7154. A_SUB:
  7155. begin
  7156. DebugMsg(SPeepholeOptimization + 'SUB -> LEA' + SNoFlags, InstrList[Index]);
  7157. InstrList[Index].opcode := A_LEA;
  7158. reference_reset(NewRef, 1, []);
  7159. NewRef.base := InstrList[Index].oper[1]^.reg;
  7160. NewRef.offset := -InstrList[Index].oper[0]^.val;
  7161. InstrList[Index].loadref(0, NewRef);
  7162. end;
  7163. A_SHL,
  7164. A_SAL:
  7165. begin
  7166. DebugMsg(SPeepholeOptimization + 'SHL -> LEA' + SNoFlags, InstrList[Index]);
  7167. InstrList[Index].opcode := A_LEA;
  7168. reference_reset(NewRef, 1, []);
  7169. NewRef.index := InstrList[Index].oper[1]^.reg;
  7170. NewRef.scalefactor := 1 shl (InstrList[Index].oper[0]^.val);
  7171. InstrList[Index].loadref(0, NewRef);
  7172. end;
  7173. A_IMUL:
  7174. begin
  7175. DebugMsg(SPeepholeOptimization + 'IMUL -> LEA' + SNoFlags, InstrList[Index]);
  7176. InstrList[Index].opcode := A_LEA;
  7177. reference_reset(NewRef, 1, []);
  7178. NewRef.index := InstrList[Index].oper[1]^.reg;
  7179. case InstrList[Index].oper[0]^.val of
  7180. 2, 4, 8:
  7181. NewRef.scalefactor := InstrList[Index].oper[0]^.val;
  7182. else {3, 5 and 9}
  7183. begin
  7184. NewRef.scalefactor := InstrList[Index].oper[0]^.val - 1;
  7185. NewRef.base := InstrList[Index].oper[1]^.reg;
  7186. end;
  7187. end;
  7188. InstrList[Index].loadref(0, NewRef);
  7189. end;
  7190. else
  7191. InternalError(2021051710);
  7192. end;
  7193. end;
  7194. { Mark the FLAGS register as used across this whole block }
  7195. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  7196. end;
  7197. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  7198. JumpC := taicpu(hp2).condition;
  7199. Unconditional := False;
  7200. if conditions_equal(JumpC, C_E) then
  7201. SetC := inverse_cond(taicpu(p).condition)
  7202. else if conditions_equal(JumpC, C_NE) then
  7203. SetC := taicpu(p).condition
  7204. else
  7205. { We've got something weird here (and inefficent) }
  7206. begin
  7207. DebugMsg('DEBUG: Inefficient jump - check code generation', p);
  7208. SetC := C_NONE;
  7209. { JAE/JNB will always branch (use 'condition_in', since C_AE <> C_NB normally) }
  7210. if condition_in(C_AE, JumpC) then
  7211. Unconditional := True
  7212. else
  7213. { Not sure what to do with this jump - drop out }
  7214. Exit;
  7215. end;
  7216. RemoveInstruction(hp1);
  7217. if Unconditional then
  7218. MakeUnconditional(taicpu(hp2))
  7219. else
  7220. begin
  7221. if SetC = C_NONE then
  7222. InternalError(2018061402);
  7223. taicpu(hp2).SetCondition(SetC);
  7224. end;
  7225. { as hp2 is a jump, we cannot use RegUsedAfterInstruction but we have to check if it is included in
  7226. TmpUsedRegs }
  7227. if not TmpUsedRegs[getregtype(taicpu(p).oper[0]^.reg)].IsUsed(taicpu(p).oper[0]^.reg) then
  7228. begin
  7229. RemoveCurrentp(p, hp2);
  7230. if taicpu(hp2).opcode = A_SETcc then
  7231. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/SETcc -> SETcc',p)
  7232. else
  7233. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> Jcc',p);
  7234. end
  7235. else
  7236. if taicpu(hp2).opcode = A_SETcc then
  7237. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/SETcc -> SETcc/SETcc',p)
  7238. else
  7239. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> SETcc/Jcc',p);
  7240. Result := True;
  7241. end
  7242. else if
  7243. { Make sure the instructions are adjacent }
  7244. (
  7245. not (cs_opt_level3 in current_settings.optimizerswitches) or
  7246. GetNextInstruction(p, hp1)
  7247. ) and
  7248. MatchInstruction(hp1, A_MOV, [S_B]) and
  7249. { Writing to memory is allowed }
  7250. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) then
  7251. begin
  7252. {
  7253. Watch out for sequences such as:
  7254. set(c)b %regb
  7255. movb %regb,(ref)
  7256. movb $0,1(ref)
  7257. movb $0,2(ref)
  7258. movb $0,3(ref)
  7259. Much more efficient to turn it into:
  7260. movl $0,%regl
  7261. set(c)b %regb
  7262. movl %regl,(ref)
  7263. Or:
  7264. set(c)b %regb
  7265. movzbl %regb,%regl
  7266. movl %regl,(ref)
  7267. }
  7268. if (taicpu(hp1).oper[1]^.typ = top_ref) and
  7269. GetNextInstruction(hp1, hp2) and
  7270. MatchInstruction(hp2, A_MOV, [S_B]) and
  7271. (taicpu(hp2).oper[1]^.typ = top_ref) and
  7272. CheckMemoryWrite(taicpu(hp1), taicpu(hp2)) then
  7273. begin
  7274. { Don't do anything else except set Result to True }
  7275. end
  7276. else
  7277. begin
  7278. if taicpu(p).oper[0]^.typ = top_reg then
  7279. begin
  7280. TransferUsedRegs(TmpUsedRegs);
  7281. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  7282. end;
  7283. { If it's not a register, it's a memory address }
  7284. if (taicpu(p).oper[0]^.typ <> top_reg) or RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp1, TmpUsedRegs) then
  7285. begin
  7286. { Even if the register is still in use, we can minimise the
  7287. pipeline stall by changing the MOV into another SETcc. }
  7288. taicpu(hp1).opcode := A_SETcc;
  7289. taicpu(hp1).condition := taicpu(p).condition;
  7290. if taicpu(hp1).oper[1]^.typ = top_ref then
  7291. begin
  7292. { Swapping the operand pointers like this is probably a
  7293. bit naughty, but it is far faster than using loadoper
  7294. to transfer the reference from oper[1] to oper[0] if
  7295. you take into account the extra procedure calls and
  7296. the memory allocation and deallocation required }
  7297. OperPtr := taicpu(hp1).oper[1];
  7298. taicpu(hp1).oper[1] := taicpu(hp1).oper[0];
  7299. taicpu(hp1).oper[0] := OperPtr;
  7300. end
  7301. else
  7302. taicpu(hp1).oper[0]^.reg := taicpu(hp1).oper[1]^.reg;
  7303. taicpu(hp1).clearop(1);
  7304. taicpu(hp1).ops := 1;
  7305. DebugMsg(SPeepholeOptimization + 'SETcc/Mov -> SETcc/SETcc',p);
  7306. end
  7307. else
  7308. begin
  7309. if taicpu(hp1).oper[1]^.typ = top_reg then
  7310. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  7311. taicpu(p).loadoper(0, taicpu(hp1).oper[1]^);
  7312. RemoveInstruction(hp1);
  7313. DebugMsg(SPeepholeOptimization + 'SETcc/Mov -> SETcc',p);
  7314. end
  7315. end;
  7316. Result := True;
  7317. end;
  7318. end;
  7319. end;
  7320. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  7321. var
  7322. hp1: tai;
  7323. Count: Integer;
  7324. OrigLabel: TAsmLabel;
  7325. begin
  7326. result := False;
  7327. { Sometimes, the optimisations below can permit this }
  7328. RemoveDeadCodeAfterJump(p);
  7329. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  7330. (taicpu(p).oper[0]^.ref^.index=NR_NO) and (taicpu(p).oper[0]^.ref^.symbol is tasmlabel) then
  7331. begin
  7332. OrigLabel := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  7333. { Also a side-effect of optimisations }
  7334. if CollapseZeroDistJump(p, OrigLabel) then
  7335. begin
  7336. Result := True;
  7337. Exit;
  7338. end;
  7339. hp1 := GetLabelWithSym(OrigLabel);
  7340. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ = ait_instruction) then
  7341. begin
  7342. case taicpu(hp1).opcode of
  7343. A_RET:
  7344. {
  7345. change
  7346. jmp .L1
  7347. ...
  7348. .L1:
  7349. ret
  7350. into
  7351. ret
  7352. }
  7353. begin
  7354. ConvertJumpToRET(p, hp1);
  7355. result:=true;
  7356. end;
  7357. { Check any kind of direct assignment instruction }
  7358. A_MOV,
  7359. A_MOVD,
  7360. A_MOVQ,
  7361. A_MOVSX,
  7362. {$ifdef x86_64}
  7363. A_MOVSXD,
  7364. {$endif x86_64}
  7365. A_MOVZX,
  7366. A_MOVAPS,
  7367. A_MOVUPS,
  7368. A_MOVSD,
  7369. A_MOVAPD,
  7370. A_MOVUPD,
  7371. A_MOVDQA,
  7372. A_MOVDQU,
  7373. A_VMOVSS,
  7374. A_VMOVAPS,
  7375. A_VMOVUPS,
  7376. A_VMOVSD,
  7377. A_VMOVAPD,
  7378. A_VMOVUPD,
  7379. A_VMOVDQA,
  7380. A_VMOVDQU:
  7381. if ((current_settings.optimizerswitches * [cs_opt_level3, cs_opt_size]) <> [cs_opt_size]) and
  7382. CheckJumpMovTransferOpt(p, hp1, 0, Count) then
  7383. begin
  7384. Result := True;
  7385. Exit;
  7386. end;
  7387. else
  7388. ;
  7389. end;
  7390. end;
  7391. end;
  7392. end;
  7393. class function TX86AsmOptimizer.CanBeCMOV(p : tai) : boolean;
  7394. begin
  7395. CanBeCMOV:=assigned(p) and
  7396. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  7397. { we can't use cmov ref,reg because
  7398. ref could be nil and cmov still throws an exception
  7399. if ref=nil but the mov isn't done (FK)
  7400. or ((taicpu(p).oper[0]^.typ = top_ref) and
  7401. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  7402. }
  7403. (taicpu(p).oper[1]^.typ = top_reg) and
  7404. (
  7405. (taicpu(p).oper[0]^.typ = top_reg) or
  7406. { allow references, but only pure symbols or got rel. addressing with RIP as based,
  7407. it is not expected that this can cause a seg. violation }
  7408. (
  7409. (taicpu(p).oper[0]^.typ = top_ref) and
  7410. IsRefSafe(taicpu(p).oper[0]^.ref)
  7411. )
  7412. );
  7413. end;
  7414. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  7415. var
  7416. hp1,hp2: tai;
  7417. {$ifndef i8086}
  7418. hp3,hp4,hpmov2, hp5: tai;
  7419. l : Longint;
  7420. condition : TAsmCond;
  7421. {$endif i8086}
  7422. carryadd_opcode : TAsmOp;
  7423. symbol: TAsmSymbol;
  7424. reg: tsuperregister;
  7425. increg, tmpreg: TRegister;
  7426. begin
  7427. result:=false;
  7428. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) then
  7429. begin
  7430. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  7431. if (
  7432. (
  7433. ((Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB)) and
  7434. MatchOptype(Taicpu(hp1),top_const,top_reg) and
  7435. (Taicpu(hp1).oper[0]^.val=1)
  7436. ) or
  7437. ((Taicpu(hp1).opcode=A_INC) or (Taicpu(hp1).opcode=A_DEC))
  7438. ) and
  7439. GetNextInstruction(hp1,hp2) and
  7440. SkipAligns(hp2, hp2) and
  7441. (hp2.typ = ait_label) and
  7442. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  7443. { jb @@1 cmc
  7444. inc/dec operand --> adc/sbb operand,0
  7445. @@1:
  7446. ... and ...
  7447. jnb @@1
  7448. inc/dec operand --> adc/sbb operand,0
  7449. @@1: }
  7450. begin
  7451. if Taicpu(p).condition in [C_NAE,C_B,C_C] then
  7452. begin
  7453. case taicpu(hp1).opcode of
  7454. A_INC,
  7455. A_ADD:
  7456. carryadd_opcode:=A_ADC;
  7457. A_DEC,
  7458. A_SUB:
  7459. carryadd_opcode:=A_SBB;
  7460. else
  7461. InternalError(2021011001);
  7462. end;
  7463. Taicpu(p).clearop(0);
  7464. Taicpu(p).ops:=0;
  7465. Taicpu(p).is_jmp:=false;
  7466. Taicpu(p).opcode:=A_CMC;
  7467. Taicpu(p).condition:=C_NONE;
  7468. DebugMsg(SPeepholeOptimization+'JccAdd/Inc/Dec2CmcAdc/Sbb',p);
  7469. Taicpu(hp1).ops:=2;
  7470. if (Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB) then
  7471. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[1]^)
  7472. else
  7473. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  7474. Taicpu(hp1).loadconst(0,0);
  7475. Taicpu(hp1).opcode:=carryadd_opcode;
  7476. result:=true;
  7477. exit;
  7478. end
  7479. else if Taicpu(p).condition in [C_AE,C_NB,C_NC] then
  7480. begin
  7481. case taicpu(hp1).opcode of
  7482. A_INC,
  7483. A_ADD:
  7484. carryadd_opcode:=A_ADC;
  7485. A_DEC,
  7486. A_SUB:
  7487. carryadd_opcode:=A_SBB;
  7488. else
  7489. InternalError(2021011002);
  7490. end;
  7491. Taicpu(hp1).ops:=2;
  7492. DebugMsg(SPeepholeOptimization+'JccAdd/Inc/Dec2Adc/Sbb',p);
  7493. if (Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB) then
  7494. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[1]^)
  7495. else
  7496. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  7497. Taicpu(hp1).loadconst(0,0);
  7498. Taicpu(hp1).opcode:=carryadd_opcode;
  7499. RemoveCurrentP(p, hp1);
  7500. result:=true;
  7501. exit;
  7502. end
  7503. {
  7504. jcc @@1 setcc tmpreg
  7505. inc/dec/add/sub operand -> (movzx tmpreg)
  7506. @@1: add/sub tmpreg,operand
  7507. While this increases code size slightly, it makes the code much faster if the
  7508. jump is unpredictable
  7509. }
  7510. else if not(cs_opt_size in current_settings.optimizerswitches) then
  7511. begin
  7512. { search for an available register which is volatile }
  7513. for reg in tcpuregisterset do
  7514. begin
  7515. if
  7516. {$if defined(i386) or defined(i8086)}
  7517. { Only use registers whose lowest 8-bits can Be accessed }
  7518. (reg in [RS_EAX,RS_EBX,RS_ECX,RS_EDX]) and
  7519. {$endif i386 or i8086}
  7520. (reg in paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption)) and
  7521. not(reg in UsedRegs[R_INTREGISTER].GetUsedRegs)
  7522. { We don't need to check if tmpreg is in hp1 or not, because
  7523. it will be marked as in use at p (if not, this is
  7524. indictive of a compiler bug). }
  7525. then
  7526. begin
  7527. TAsmLabel(symbol).decrefs;
  7528. increg := newreg(R_INTREGISTER,reg,R_SUBL);
  7529. Taicpu(p).clearop(0);
  7530. Taicpu(p).ops:=1;
  7531. Taicpu(p).is_jmp:=false;
  7532. Taicpu(p).opcode:=A_SETcc;
  7533. DebugMsg(SPeepholeOptimization+'JccAdd2SetccAdd',p);
  7534. Taicpu(p).condition:=inverse_cond(Taicpu(p).condition);
  7535. Taicpu(p).loadreg(0,increg);
  7536. if getsubreg(Taicpu(hp1).oper[1]^.reg)<>R_SUBL then
  7537. begin
  7538. case getsubreg(Taicpu(hp1).oper[1]^.reg) of
  7539. R_SUBW:
  7540. begin
  7541. tmpreg := newreg(R_INTREGISTER,reg,R_SUBW);
  7542. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BW,increg,tmpreg);
  7543. end;
  7544. R_SUBD:
  7545. begin
  7546. tmpreg := newreg(R_INTREGISTER,reg,R_SUBD);
  7547. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BL,increg,tmpreg);
  7548. end;
  7549. {$ifdef x86_64}
  7550. R_SUBQ:
  7551. begin
  7552. { MOVZX doesn't have a 64-bit variant, because
  7553. the 32-bit version implicitly zeroes the
  7554. upper 32-bits of the destination register }
  7555. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BL,increg,
  7556. newreg(R_INTREGISTER,reg,R_SUBD));
  7557. tmpreg := newreg(R_INTREGISTER,reg,R_SUBQ);
  7558. end;
  7559. {$endif x86_64}
  7560. else
  7561. Internalerror(2020030601);
  7562. end;
  7563. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  7564. asml.InsertAfter(hp2,p);
  7565. end
  7566. else
  7567. tmpreg := increg;
  7568. if (Taicpu(hp1).opcode=A_INC) or (Taicpu(hp1).opcode=A_DEC) then
  7569. begin
  7570. Taicpu(hp1).ops:=2;
  7571. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^)
  7572. end;
  7573. Taicpu(hp1).loadreg(0,tmpreg);
  7574. AllocRegBetween(tmpreg,p,hp1,UsedRegs);
  7575. Result := True;
  7576. { p is no longer a Jcc instruction, so exit }
  7577. Exit;
  7578. end;
  7579. end;
  7580. end;
  7581. end;
  7582. { Detect the following:
  7583. jmp<cond> @Lbl1
  7584. jmp @Lbl2
  7585. ...
  7586. @Lbl1:
  7587. ret
  7588. Change to:
  7589. jmp<inv_cond> @Lbl2
  7590. ret
  7591. }
  7592. if MatchInstruction(hp1,A_JMP,[]) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  7593. begin
  7594. hp2:=getlabelwithsym(TAsmLabel(symbol));
  7595. if Assigned(hp2) and SkipLabels(hp2,hp2) and
  7596. MatchInstruction(hp2,A_RET,[S_NO]) then
  7597. begin
  7598. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  7599. { Change label address to that of the unconditional jump }
  7600. taicpu(p).loadoper(0, taicpu(hp1).oper[0]^);
  7601. TAsmLabel(symbol).DecRefs;
  7602. taicpu(hp1).opcode := A_RET;
  7603. taicpu(hp1).is_jmp := false;
  7604. taicpu(hp1).ops := taicpu(hp2).ops;
  7605. DebugMsg(SPeepholeOptimization+'JccJmpRet2J!ccRet',p);
  7606. case taicpu(hp2).ops of
  7607. 0:
  7608. taicpu(hp1).clearop(0);
  7609. 1:
  7610. taicpu(hp1).loadconst(0,taicpu(hp2).oper[0]^.val);
  7611. else
  7612. internalerror(2016041302);
  7613. end;
  7614. end;
  7615. {$ifndef i8086}
  7616. end
  7617. {
  7618. convert
  7619. j<c> .L1
  7620. mov 1,reg
  7621. jmp .L2
  7622. .L1
  7623. mov 0,reg
  7624. .L2
  7625. into
  7626. mov 0,reg
  7627. set<not(c)> reg
  7628. take care of alignment and that the mov 0,reg is not converted into a xor as this
  7629. would destroy the flag contents
  7630. }
  7631. else if MatchInstruction(hp1,A_MOV,[]) and
  7632. MatchOpType(taicpu(hp1),top_const,top_reg) and
  7633. {$ifdef i386}
  7634. (
  7635. { Under i386, ESI, EDI, EBP and ESP
  7636. don't have an 8-bit representation }
  7637. not (getsupreg(taicpu(hp1).oper[1]^.reg) in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  7638. ) and
  7639. {$endif i386}
  7640. (taicpu(hp1).oper[0]^.val=1) and
  7641. GetNextInstruction(hp1,hp2) and
  7642. MatchInstruction(hp2,A_JMP,[]) and (taicpu(hp2).oper[0]^.ref^.refaddr=addr_full) and
  7643. GetNextInstruction(hp2,hp3) and
  7644. { skip align }
  7645. ((hp3.typ<>ait_align) or GetNextInstruction(hp3,hp3)) and
  7646. (hp3.typ=ait_label) and
  7647. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol)=tai_label(hp3).labsym) and
  7648. (tai_label(hp3).labsym.getrefs=1) and
  7649. GetNextInstruction(hp3,hp4) and
  7650. MatchInstruction(hp4,A_MOV,[]) and
  7651. MatchOpType(taicpu(hp4),top_const,top_reg) and
  7652. (taicpu(hp4).oper[0]^.val=0) and
  7653. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp4).oper[1]^) and
  7654. GetNextInstruction(hp4,hp5) and
  7655. (hp5.typ=ait_label) and
  7656. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol)=tai_label(hp5).labsym) and
  7657. (tai_label(hp5).labsym.getrefs=1) then
  7658. begin
  7659. AllocRegBetween(NR_FLAGS,p,hp4,UsedRegs);
  7660. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2MovSetcc',p);
  7661. { remove last label }
  7662. RemoveInstruction(hp5);
  7663. { remove second label }
  7664. RemoveInstruction(hp3);
  7665. { if align is present remove it }
  7666. if GetNextInstruction(hp2,hp3) and (hp3.typ=ait_align) then
  7667. RemoveInstruction(hp3);
  7668. { remove jmp }
  7669. RemoveInstruction(hp2);
  7670. if taicpu(hp1).opsize=S_B then
  7671. RemoveInstruction(hp1)
  7672. else
  7673. taicpu(hp1).loadconst(0,0);
  7674. taicpu(hp4).opcode:=A_SETcc;
  7675. taicpu(hp4).opsize:=S_B;
  7676. taicpu(hp4).condition:=inverse_cond(taicpu(p).condition);
  7677. taicpu(hp4).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(hp4).oper[1]^.reg),R_SUBL));
  7678. taicpu(hp4).opercnt:=1;
  7679. taicpu(hp4).ops:=1;
  7680. taicpu(hp4).freeop(1);
  7681. RemoveCurrentP(p);
  7682. Result:=true;
  7683. exit;
  7684. end
  7685. else if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  7686. begin
  7687. { check for
  7688. jCC xxx
  7689. <several movs>
  7690. xxx:
  7691. }
  7692. l:=0;
  7693. while assigned(hp1) and
  7694. CanBeCMOV(hp1) and
  7695. { stop on labels }
  7696. not(hp1.typ=ait_label) do
  7697. begin
  7698. inc(l);
  7699. GetNextInstruction(hp1,hp1);
  7700. end;
  7701. if assigned(hp1) then
  7702. begin
  7703. if FindLabel(tasmlabel(symbol),hp1) then
  7704. begin
  7705. if (l<=4) and (l>0) then
  7706. begin
  7707. condition:=inverse_cond(taicpu(p).condition);
  7708. GetNextInstruction(p,hp1);
  7709. repeat
  7710. if not Assigned(hp1) then
  7711. InternalError(2018062900);
  7712. taicpu(hp1).opcode:=A_CMOVcc;
  7713. taicpu(hp1).condition:=condition;
  7714. UpdateUsedRegs(hp1);
  7715. GetNextInstruction(hp1,hp1);
  7716. until not(CanBeCMOV(hp1));
  7717. { Remember what hp1 is in case there's multiple aligns to get rid of }
  7718. hp2 := hp1;
  7719. repeat
  7720. if not Assigned(hp2) then
  7721. InternalError(2018062910);
  7722. case hp2.typ of
  7723. ait_label:
  7724. { What we expected - break out of the loop (it won't be a dead label at the top of
  7725. a cluster because that was optimised at an earlier stage) }
  7726. Break;
  7727. ait_align:
  7728. { Go to the next entry until a label is found (may be multiple aligns before it) }
  7729. begin
  7730. hp2 := tai(hp2.Next);
  7731. Continue;
  7732. end;
  7733. else
  7734. begin
  7735. { Might be a comment or temporary allocation entry }
  7736. if not (hp2.typ in SkipInstr) then
  7737. InternalError(2018062911);
  7738. hp2 := tai(hp2.Next);
  7739. Continue;
  7740. end;
  7741. end;
  7742. until False;
  7743. { Now we can safely decrement the reference count }
  7744. tasmlabel(symbol).decrefs;
  7745. DebugMsg(SPeepholeOptimization+'JccMov2CMov',p);
  7746. { Remove the original jump }
  7747. RemoveInstruction(p); { Note, the choice to not use RemoveCurrentp is deliberate }
  7748. GetNextInstruction(hp2, p); { Instruction after the label }
  7749. { Remove the label if this is its final reference }
  7750. if (tasmlabel(symbol).getrefs=0) then
  7751. StripLabelFast(hp1);
  7752. if Assigned(p) then
  7753. begin
  7754. UpdateUsedRegs(p);
  7755. result:=true;
  7756. end;
  7757. exit;
  7758. end;
  7759. end
  7760. else
  7761. begin
  7762. { check further for
  7763. jCC xxx
  7764. <several movs 1>
  7765. jmp yyy
  7766. xxx:
  7767. <several movs 2>
  7768. yyy:
  7769. }
  7770. { hp2 points to jmp yyy }
  7771. hp2:=hp1;
  7772. { skip hp1 to xxx (or an align right before it) }
  7773. GetNextInstruction(hp1, hp1);
  7774. if assigned(hp2) and
  7775. assigned(hp1) and
  7776. (l<=3) and
  7777. (hp2.typ=ait_instruction) and
  7778. (taicpu(hp2).is_jmp) and
  7779. (taicpu(hp2).condition=C_None) and
  7780. { real label and jump, no further references to the
  7781. label are allowed }
  7782. (tasmlabel(symbol).getrefs=1) and
  7783. FindLabel(tasmlabel(symbol),hp1) then
  7784. begin
  7785. l:=0;
  7786. { skip hp1 to <several moves 2> }
  7787. if (hp1.typ = ait_align) then
  7788. GetNextInstruction(hp1, hp1);
  7789. GetNextInstruction(hp1, hpmov2);
  7790. hp1 := hpmov2;
  7791. while assigned(hp1) and
  7792. CanBeCMOV(hp1) do
  7793. begin
  7794. inc(l);
  7795. GetNextInstruction(hp1, hp1);
  7796. end;
  7797. { hp1 points to yyy (or an align right before it) }
  7798. hp3 := hp1;
  7799. if assigned(hp1) and
  7800. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  7801. begin
  7802. condition:=inverse_cond(taicpu(p).condition);
  7803. GetNextInstruction(p,hp1);
  7804. repeat
  7805. taicpu(hp1).opcode:=A_CMOVcc;
  7806. taicpu(hp1).condition:=condition;
  7807. UpdateUsedRegs(hp1);
  7808. GetNextInstruction(hp1,hp1);
  7809. until not(assigned(hp1)) or
  7810. not(CanBeCMOV(hp1));
  7811. condition:=inverse_cond(condition);
  7812. hp1 := hpmov2;
  7813. { hp1 is now at <several movs 2> }
  7814. while Assigned(hp1) and CanBeCMOV(hp1) do
  7815. begin
  7816. taicpu(hp1).opcode:=A_CMOVcc;
  7817. taicpu(hp1).condition:=condition;
  7818. UpdateUsedRegs(hp1);
  7819. GetNextInstruction(hp1,hp1);
  7820. end;
  7821. hp1 := p;
  7822. { Get first instruction after label }
  7823. GetNextInstruction(hp3, p);
  7824. if assigned(p) and (hp3.typ = ait_align) then
  7825. GetNextInstruction(p, p);
  7826. { Don't dereference yet, as doing so will cause
  7827. GetNextInstruction to skip the label and
  7828. optional align marker. [Kit] }
  7829. GetNextInstruction(hp2, hp4);
  7830. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2CMovCMov',hp1);
  7831. { remove jCC }
  7832. RemoveInstruction(hp1);
  7833. { Now we can safely decrement it }
  7834. tasmlabel(symbol).decrefs;
  7835. { Remove label xxx (it will have a ref of zero due to the initial check }
  7836. StripLabelFast(hp4);
  7837. { remove jmp }
  7838. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  7839. RemoveInstruction(hp2);
  7840. { As before, now we can safely decrement it }
  7841. tasmlabel(symbol).decrefs;
  7842. { Remove label yyy (and the optional alignment) if its reference falls to zero }
  7843. if tasmlabel(symbol).getrefs = 0 then
  7844. StripLabelFast(hp3);
  7845. if Assigned(p) then
  7846. begin
  7847. UpdateUsedRegs(p);
  7848. result:=true;
  7849. end;
  7850. exit;
  7851. end;
  7852. end;
  7853. end;
  7854. end;
  7855. {$endif i8086}
  7856. end;
  7857. end;
  7858. end;
  7859. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  7860. var
  7861. hp1,hp2: tai;
  7862. reg_and_hp1_is_instr: Boolean;
  7863. begin
  7864. result:=false;
  7865. reg_and_hp1_is_instr:=(taicpu(p).oper[1]^.typ = top_reg) and
  7866. GetNextInstruction(p,hp1) and
  7867. (hp1.typ = ait_instruction);
  7868. if reg_and_hp1_is_instr and
  7869. (
  7870. (taicpu(hp1).opcode <> A_LEA) or
  7871. { If the LEA instruction can be converted into an arithmetic instruction,
  7872. it may be possible to then fold it. }
  7873. (
  7874. { If the flags register is in use, don't change the instruction
  7875. to an ADD otherwise this will scramble the flags. [Kit] }
  7876. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  7877. ConvertLEA(taicpu(hp1))
  7878. )
  7879. ) and
  7880. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  7881. GetNextInstruction(hp1,hp2) and
  7882. MatchInstruction(hp2,A_MOV,[]) and
  7883. (taicpu(hp2).oper[0]^.typ = top_reg) and
  7884. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  7885. ((taicpu(p).opsize in [S_BW,S_BL]) and (taicpu(hp2).opsize=S_B) or
  7886. (taicpu(p).opsize in [S_WL]) and (taicpu(hp2).opsize=S_W)) and
  7887. {$ifdef i386}
  7888. { not all registers have byte size sub registers on i386 }
  7889. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  7890. {$endif i386}
  7891. (((taicpu(hp1).ops=2) and
  7892. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  7893. ((taicpu(hp1).ops=1) and
  7894. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  7895. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  7896. begin
  7897. { change movsX/movzX reg/ref, reg2
  7898. add/sub/or/... reg3/$const, reg2
  7899. mov reg2 reg/ref
  7900. to add/sub/or/... reg3/$const, reg/ref }
  7901. { by example:
  7902. movswl %si,%eax movswl %si,%eax p
  7903. decl %eax addl %edx,%eax hp1
  7904. movw %ax,%si movw %ax,%si hp2
  7905. ->
  7906. movswl %si,%eax movswl %si,%eax p
  7907. decw %eax addw %edx,%eax hp1
  7908. movw %ax,%si movw %ax,%si hp2
  7909. }
  7910. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  7911. {
  7912. ->
  7913. movswl %si,%eax movswl %si,%eax p
  7914. decw %si addw %dx,%si hp1
  7915. movw %ax,%si movw %ax,%si hp2
  7916. }
  7917. case taicpu(hp1).ops of
  7918. 1:
  7919. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  7920. 2:
  7921. begin
  7922. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  7923. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  7924. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  7925. end;
  7926. else
  7927. internalerror(2008042702);
  7928. end;
  7929. {
  7930. ->
  7931. decw %si addw %dx,%si p
  7932. }
  7933. DebugMsg(SPeepholeOptimization + 'var3',p);
  7934. RemoveCurrentP(p, hp1);
  7935. RemoveInstruction(hp2);
  7936. end
  7937. else if reg_and_hp1_is_instr and
  7938. (taicpu(hp1).opcode = A_MOV) and
  7939. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  7940. (MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^)
  7941. {$ifdef x86_64}
  7942. { check for implicit extension to 64 bit }
  7943. or
  7944. ((taicpu(p).opsize in [S_BL,S_WL]) and
  7945. (taicpu(hp1).opsize=S_Q) and
  7946. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.reg)
  7947. )
  7948. {$endif x86_64}
  7949. )
  7950. then
  7951. begin
  7952. { change
  7953. movx %reg1,%reg2
  7954. mov %reg2,%reg3
  7955. dealloc %reg2
  7956. into
  7957. movx %reg,%reg3
  7958. }
  7959. TransferUsedRegs(TmpUsedRegs);
  7960. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  7961. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  7962. begin
  7963. DebugMsg(SPeepholeOptimization + 'MovxMov2Movx',p);
  7964. {$ifdef x86_64}
  7965. if (taicpu(p).opsize in [S_BL,S_WL]) and
  7966. (taicpu(hp1).opsize=S_Q) then
  7967. taicpu(p).loadreg(1,newreg(R_INTREGISTER,getsupreg(taicpu(hp1).oper[1]^.reg),R_SUBD))
  7968. else
  7969. {$endif x86_64}
  7970. taicpu(p).loadreg(1,taicpu(hp1).oper[1]^.reg);
  7971. RemoveInstruction(hp1);
  7972. end;
  7973. end
  7974. else if reg_and_hp1_is_instr and
  7975. (taicpu(hp1).opcode = A_MOV) and
  7976. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  7977. (((taicpu(p).opsize in [S_BW,S_BL,S_WL{$ifdef x86_64},S_BQ,S_WQ,S_LQ{$endif x86_64}]) and
  7978. (taicpu(hp1).opsize=S_B)) or
  7979. ((taicpu(p).opsize in [S_WL{$ifdef x86_64},S_WQ,S_LQ{$endif x86_64}]) and
  7980. (taicpu(hp1).opsize=S_W))
  7981. {$ifdef x86_64}
  7982. or ((taicpu(p).opsize=S_LQ) and
  7983. (taicpu(hp1).opsize=S_L))
  7984. {$endif x86_64}
  7985. ) and
  7986. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.reg) then
  7987. begin
  7988. { change
  7989. movx %reg1,%reg2
  7990. mov %reg2,%reg3
  7991. dealloc %reg2
  7992. into
  7993. mov %reg1,%reg3
  7994. if the second mov accesses only the bits stored in reg1
  7995. }
  7996. TransferUsedRegs(TmpUsedRegs);
  7997. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  7998. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  7999. begin
  8000. DebugMsg(SPeepholeOptimization + 'MovxMov2Mov',p);
  8001. if taicpu(p).oper[0]^.typ=top_reg then
  8002. begin
  8003. case taicpu(hp1).opsize of
  8004. S_B:
  8005. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBL));
  8006. S_W:
  8007. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBW));
  8008. S_L:
  8009. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBD));
  8010. else
  8011. Internalerror(2020102301);
  8012. end;
  8013. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  8014. end
  8015. else
  8016. taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
  8017. RemoveCurrentP(p);
  8018. result:=true;
  8019. exit;
  8020. end;
  8021. end
  8022. else if reg_and_hp1_is_instr and
  8023. (taicpu(p).oper[0]^.typ = top_reg) and
  8024. (
  8025. (taicpu(hp1).opcode = A_SHL) or (taicpu(hp1).opcode = A_SAL)
  8026. ) and
  8027. (taicpu(hp1).oper[0]^.typ = top_const) and
  8028. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  8029. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  8030. { Minimum shift value allowed is the bit difference between the sizes }
  8031. (taicpu(hp1).oper[0]^.val >=
  8032. { Multiply by 8 because tcgsize2size returns bytes, not bits }
  8033. 8 * (
  8034. tcgsize2size[reg_cgsize(taicpu(p).oper[1]^.reg)] -
  8035. tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)]
  8036. )
  8037. ) then
  8038. begin
  8039. { For:
  8040. movsx/movzx %reg1,%reg1 (same register, just different sizes)
  8041. shl/sal ##, %reg1
  8042. Remove the movsx/movzx instruction if the shift overwrites the
  8043. extended bits of the register (e.g. movslq %eax,%rax; shlq $32,%rax
  8044. }
  8045. DebugMsg(SPeepholeOptimization + 'MovxShl2Shl',p);
  8046. RemoveCurrentP(p, hp1);
  8047. Result := True;
  8048. Exit;
  8049. end
  8050. else if reg_and_hp1_is_instr and
  8051. (taicpu(p).oper[0]^.typ = top_reg) and
  8052. (
  8053. ((taicpu(hp1).opcode = A_SHR) and (taicpu(p).opcode = A_MOVZX)) or
  8054. ((taicpu(hp1).opcode = A_SAR) and (taicpu(p).opcode <> A_MOVZX))
  8055. ) and
  8056. (taicpu(hp1).oper[0]^.typ = top_const) and
  8057. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  8058. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  8059. { Minimum shift value allowed is the bit size of the smallest register - 1 }
  8060. (taicpu(hp1).oper[0]^.val <
  8061. { Multiply by 8 because tcgsize2size returns bytes, not bits }
  8062. 8 * (
  8063. tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)]
  8064. )
  8065. ) then
  8066. begin
  8067. { For:
  8068. movsx %reg1,%reg1 movzx %reg1,%reg1 (same register, just different sizes)
  8069. sar ##, %reg1 shr ##, %reg1
  8070. Move the shift to before the movx instruction if the shift value
  8071. is not too large.
  8072. }
  8073. asml.Remove(hp1);
  8074. asml.InsertBefore(hp1, p);
  8075. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[0]^.reg;
  8076. case taicpu(p).opsize of
  8077. s_BW, S_BL{$ifdef x86_64}, S_BQ{$endif}:
  8078. taicpu(hp1).opsize := S_B;
  8079. S_WL{$ifdef x86_64}, S_WQ{$endif}:
  8080. taicpu(hp1).opsize := S_W;
  8081. {$ifdef x86_64}
  8082. S_LQ:
  8083. taicpu(hp1).opsize := S_L;
  8084. {$endif}
  8085. else
  8086. InternalError(2020112401);
  8087. end;
  8088. if (taicpu(hp1).opcode = A_SHR) then
  8089. DebugMsg(SPeepholeOptimization + 'MovzShr2ShrMovz', hp1)
  8090. else
  8091. DebugMsg(SPeepholeOptimization + 'MovsSar2SarMovs', hp1);
  8092. Result := True;
  8093. end
  8094. else if taicpu(p).opcode=A_MOVZX then
  8095. begin
  8096. { removes superfluous And's after movzx's }
  8097. if reg_and_hp1_is_instr and
  8098. (taicpu(hp1).opcode = A_AND) and
  8099. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8100. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)
  8101. {$ifdef x86_64}
  8102. { check for implicit extension to 64 bit }
  8103. or
  8104. ((taicpu(p).opsize in [S_BL,S_WL]) and
  8105. (taicpu(hp1).opsize=S_Q) and
  8106. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg)
  8107. )
  8108. {$endif x86_64}
  8109. )
  8110. then
  8111. begin
  8112. case taicpu(p).opsize Of
  8113. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  8114. if (taicpu(hp1).oper[0]^.val = $ff) then
  8115. begin
  8116. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz1',p);
  8117. RemoveInstruction(hp1);
  8118. Result:=true;
  8119. exit;
  8120. end;
  8121. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  8122. if (taicpu(hp1).oper[0]^.val = $ffff) then
  8123. begin
  8124. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz2',p);
  8125. RemoveInstruction(hp1);
  8126. Result:=true;
  8127. exit;
  8128. end;
  8129. {$ifdef x86_64}
  8130. S_LQ:
  8131. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  8132. begin
  8133. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz3',p);
  8134. RemoveInstruction(hp1);
  8135. Result:=true;
  8136. exit;
  8137. end;
  8138. {$endif x86_64}
  8139. else
  8140. ;
  8141. end;
  8142. { we cannot get rid of the and, but can we get rid of the movz ?}
  8143. if SuperRegistersEqual(taicpu(p).oper[0]^.reg,taicpu(p).oper[1]^.reg) then
  8144. begin
  8145. case taicpu(p).opsize Of
  8146. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  8147. if (taicpu(hp1).oper[0]^.val and $ff)=taicpu(hp1).oper[0]^.val then
  8148. begin
  8149. DebugMsg(SPeepholeOptimization + 'MovzAnd2And1',p);
  8150. RemoveCurrentP(p,hp1);
  8151. Result:=true;
  8152. exit;
  8153. end;
  8154. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  8155. if (taicpu(hp1).oper[0]^.val and $ffff)=taicpu(hp1).oper[0]^.val then
  8156. begin
  8157. DebugMsg(SPeepholeOptimization + 'MovzAnd2And2',p);
  8158. RemoveCurrentP(p,hp1);
  8159. Result:=true;
  8160. exit;
  8161. end;
  8162. {$ifdef x86_64}
  8163. S_LQ:
  8164. if (taicpu(hp1).oper[0]^.val and $ffffffff)=taicpu(hp1).oper[0]^.val then
  8165. begin
  8166. DebugMsg(SPeepholeOptimization + 'MovzAnd2And3',p);
  8167. RemoveCurrentP(p,hp1);
  8168. Result:=true;
  8169. exit;
  8170. end;
  8171. {$endif x86_64}
  8172. else
  8173. ;
  8174. end;
  8175. end;
  8176. end;
  8177. { changes some movzx constructs to faster synonyms (all examples
  8178. are given with eax/ax, but are also valid for other registers)}
  8179. if MatchOpType(taicpu(p),top_reg,top_reg) then
  8180. begin
  8181. case taicpu(p).opsize of
  8182. { Technically, movzbw %al,%ax cannot be encoded in 32/64-bit mode
  8183. (the machine code is equivalent to movzbl %al,%eax), but the
  8184. code generator still generates that assembler instruction and
  8185. it is silently converted. This should probably be checked.
  8186. [Kit] }
  8187. S_BW:
  8188. begin
  8189. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  8190. (
  8191. not IsMOVZXAcceptable
  8192. { and $0xff,%ax has a smaller encoding but risks a partial write penalty }
  8193. or (
  8194. (cs_opt_size in current_settings.optimizerswitches) and
  8195. (taicpu(p).oper[1]^.reg = NR_AX)
  8196. )
  8197. ) then
  8198. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  8199. begin
  8200. DebugMsg(SPeepholeOptimization + 'var7',p);
  8201. taicpu(p).opcode := A_AND;
  8202. taicpu(p).changeopsize(S_W);
  8203. taicpu(p).loadConst(0,$ff);
  8204. Result := True;
  8205. end
  8206. else if not IsMOVZXAcceptable and
  8207. GetNextInstruction(p, hp1) and
  8208. (tai(hp1).typ = ait_instruction) and
  8209. (taicpu(hp1).opcode = A_AND) and
  8210. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8211. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8212. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  8213. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  8214. begin
  8215. DebugMsg(SPeepholeOptimization + 'var8',p);
  8216. taicpu(p).opcode := A_MOV;
  8217. taicpu(p).changeopsize(S_W);
  8218. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  8219. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8220. Result := True;
  8221. end;
  8222. end;
  8223. {$ifndef i8086} { movzbl %al,%eax cannot be encoded in 16-bit mode (the machine code is equivalent to movzbw %al,%ax }
  8224. S_BL:
  8225. begin
  8226. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  8227. (
  8228. not IsMOVZXAcceptable
  8229. { and $0xff,%eax has a smaller encoding but risks a partial write penalty }
  8230. or (
  8231. (cs_opt_size in current_settings.optimizerswitches) and
  8232. (taicpu(p).oper[1]^.reg = NR_EAX)
  8233. )
  8234. ) then
  8235. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  8236. begin
  8237. DebugMsg(SPeepholeOptimization + 'var9',p);
  8238. taicpu(p).opcode := A_AND;
  8239. taicpu(p).changeopsize(S_L);
  8240. taicpu(p).loadConst(0,$ff);
  8241. Result := True;
  8242. end
  8243. else if not IsMOVZXAcceptable and
  8244. GetNextInstruction(p, hp1) and
  8245. (tai(hp1).typ = ait_instruction) and
  8246. (taicpu(hp1).opcode = A_AND) and
  8247. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8248. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8249. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  8250. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  8251. begin
  8252. DebugMsg(SPeepholeOptimization + 'var10',p);
  8253. taicpu(p).opcode := A_MOV;
  8254. taicpu(p).changeopsize(S_L);
  8255. { do not use R_SUBWHOLE
  8256. as movl %rdx,%eax
  8257. is invalid in assembler PM }
  8258. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  8259. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8260. Result := True;
  8261. end;
  8262. end;
  8263. {$endif i8086}
  8264. S_WL:
  8265. if not IsMOVZXAcceptable then
  8266. begin
  8267. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) then
  8268. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  8269. begin
  8270. DebugMsg(SPeepholeOptimization + 'var11',p);
  8271. taicpu(p).opcode := A_AND;
  8272. taicpu(p).changeopsize(S_L);
  8273. taicpu(p).loadConst(0,$ffff);
  8274. Result := True;
  8275. end
  8276. else if GetNextInstruction(p, hp1) and
  8277. (tai(hp1).typ = ait_instruction) and
  8278. (taicpu(hp1).opcode = A_AND) and
  8279. (taicpu(hp1).oper[0]^.typ = top_const) and
  8280. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8281. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8282. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  8283. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  8284. begin
  8285. DebugMsg(SPeepholeOptimization + 'var12',p);
  8286. taicpu(p).opcode := A_MOV;
  8287. taicpu(p).changeopsize(S_L);
  8288. { do not use R_SUBWHOLE
  8289. as movl %rdx,%eax
  8290. is invalid in assembler PM }
  8291. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  8292. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  8293. Result := True;
  8294. end;
  8295. end;
  8296. else
  8297. InternalError(2017050705);
  8298. end;
  8299. end
  8300. else if not IsMOVZXAcceptable and (taicpu(p).oper[0]^.typ = top_ref) then
  8301. begin
  8302. if GetNextInstruction(p, hp1) and
  8303. (tai(hp1).typ = ait_instruction) and
  8304. (taicpu(hp1).opcode = A_AND) and
  8305. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8306. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8307. begin
  8308. //taicpu(p).opcode := A_MOV;
  8309. case taicpu(p).opsize Of
  8310. S_BL:
  8311. begin
  8312. DebugMsg(SPeepholeOptimization + 'var13',p);
  8313. taicpu(hp1).changeopsize(S_L);
  8314. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8315. end;
  8316. S_WL:
  8317. begin
  8318. DebugMsg(SPeepholeOptimization + 'var14',p);
  8319. taicpu(hp1).changeopsize(S_L);
  8320. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  8321. end;
  8322. S_BW:
  8323. begin
  8324. DebugMsg(SPeepholeOptimization + 'var15',p);
  8325. taicpu(hp1).changeopsize(S_W);
  8326. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8327. end;
  8328. else
  8329. Internalerror(2017050704)
  8330. end;
  8331. Result := True;
  8332. end;
  8333. end;
  8334. end;
  8335. end;
  8336. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  8337. var
  8338. hp1, hp2 : tai;
  8339. MaskLength : Cardinal;
  8340. MaskedBits : TCgInt;
  8341. begin
  8342. Result:=false;
  8343. { There are no optimisations for reference targets }
  8344. if (taicpu(p).oper[1]^.typ <> top_reg) then
  8345. Exit;
  8346. while GetNextInstruction(p, hp1) and
  8347. (hp1.typ = ait_instruction) do
  8348. begin
  8349. if (taicpu(p).oper[0]^.typ = top_const) then
  8350. begin
  8351. case taicpu(hp1).opcode of
  8352. A_AND:
  8353. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8354. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  8355. { the second register must contain the first one, so compare their subreg types }
  8356. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  8357. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  8358. { change
  8359. and const1, reg
  8360. and const2, reg
  8361. to
  8362. and (const1 and const2), reg
  8363. }
  8364. begin
  8365. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  8366. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  8367. RemoveCurrentP(p, hp1);
  8368. Result:=true;
  8369. exit;
  8370. end;
  8371. A_CMP:
  8372. if (PopCnt(DWord(taicpu(p).oper[0]^.val)) = 1) and { Only 1 bit set }
  8373. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.val) and
  8374. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  8375. { Just check that the condition on the next instruction is compatible }
  8376. GetNextInstruction(hp1, hp2) and
  8377. (hp2.typ = ait_instruction) and
  8378. (taicpu(hp2).condition in [C_Z, C_E, C_NZ, C_NE])
  8379. then
  8380. { change
  8381. and 2^n, reg
  8382. cmp 2^n, reg
  8383. j(c) / set(c) / cmov(c) (c is equal or not equal)
  8384. to
  8385. and 2^n, reg
  8386. test reg, reg
  8387. j(~c) / set(~c) / cmov(~c)
  8388. }
  8389. begin
  8390. { Keep TEST instruction in, rather than remove it, because
  8391. it may trigger other optimisations such as MovAndTest2Test }
  8392. taicpu(hp1).loadreg(0, taicpu(hp1).oper[1]^.reg);
  8393. taicpu(hp1).opcode := A_TEST;
  8394. DebugMsg(SPeepholeOptimization + 'AND/CMP/J(c) -> AND/J(~c) with power of 2 constant', p);
  8395. taicpu(hp2).condition := inverse_cond(taicpu(hp2).condition);
  8396. Result := True;
  8397. Exit;
  8398. end;
  8399. A_MOVZX:
  8400. if MatchOpType(taicpu(hp1),top_reg,top_reg) and
  8401. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
  8402. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  8403. (
  8404. (
  8405. (taicpu(p).opsize=S_W) and
  8406. (taicpu(hp1).opsize=S_BW)
  8407. ) or
  8408. (
  8409. (taicpu(p).opsize=S_L) and
  8410. (taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}])
  8411. )
  8412. {$ifdef x86_64}
  8413. or
  8414. (
  8415. (taicpu(p).opsize=S_Q) and
  8416. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL])
  8417. )
  8418. {$endif x86_64}
  8419. ) then
  8420. begin
  8421. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  8422. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  8423. ) or
  8424. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  8425. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  8426. then
  8427. begin
  8428. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  8429. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  8430. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  8431. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  8432. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  8433. }
  8434. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  8435. RemoveInstruction(hp1);
  8436. { See if there are other optimisations possible }
  8437. Continue;
  8438. end;
  8439. end;
  8440. A_SHL:
  8441. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8442. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  8443. begin
  8444. {$ifopt R+}
  8445. {$define RANGE_WAS_ON}
  8446. {$R-}
  8447. {$endif}
  8448. { get length of potential and mask }
  8449. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  8450. { really a mask? }
  8451. {$ifdef RANGE_WAS_ON}
  8452. {$R+}
  8453. {$endif}
  8454. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  8455. { unmasked part shifted out? }
  8456. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  8457. begin
  8458. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  8459. RemoveCurrentP(p, hp1);
  8460. Result:=true;
  8461. exit;
  8462. end;
  8463. end;
  8464. A_SHR:
  8465. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8466. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
  8467. (taicpu(hp1).oper[0]^.val <= 63) then
  8468. begin
  8469. { Does SHR combined with the AND cover all the bits?
  8470. e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
  8471. MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
  8472. if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
  8473. ((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
  8474. ((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
  8475. begin
  8476. DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
  8477. RemoveCurrentP(p, hp1);
  8478. Result := True;
  8479. Exit;
  8480. end;
  8481. end;
  8482. A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
  8483. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  8484. SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
  8485. begin
  8486. if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
  8487. (
  8488. (
  8489. (taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  8490. ((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
  8491. ) or (
  8492. (taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  8493. ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
  8494. {$ifdef x86_64}
  8495. ) or (
  8496. (taicpu(hp1).opsize = S_LQ) and
  8497. ((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
  8498. {$endif x86_64}
  8499. )
  8500. ) then
  8501. begin
  8502. if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
  8503. begin
  8504. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  8505. RemoveInstruction(hp1);
  8506. { See if there are other optimisations possible }
  8507. Continue;
  8508. end;
  8509. { The super-registers are the same though.
  8510. Note that this change by itself doesn't improve
  8511. code speed, but it opens up other optimisations. }
  8512. {$ifdef x86_64}
  8513. { Convert 64-bit register to 32-bit }
  8514. case taicpu(hp1).opsize of
  8515. S_BQ:
  8516. begin
  8517. taicpu(hp1).opsize := S_BL;
  8518. taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
  8519. end;
  8520. S_WQ:
  8521. begin
  8522. taicpu(hp1).opsize := S_WL;
  8523. taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
  8524. end
  8525. else
  8526. ;
  8527. end;
  8528. {$endif x86_64}
  8529. DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
  8530. taicpu(hp1).opcode := A_MOVZX;
  8531. { See if there are other optimisations possible }
  8532. Continue;
  8533. end;
  8534. end;
  8535. else
  8536. ;
  8537. end;
  8538. end;
  8539. if (taicpu(hp1).is_jmp) and
  8540. (taicpu(hp1).opcode<>A_JMP) and
  8541. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  8542. begin
  8543. { change
  8544. and x, reg
  8545. jxx
  8546. to
  8547. test x, reg
  8548. jxx
  8549. if reg is deallocated before the
  8550. jump, but only if it's a conditional jump (PFV)
  8551. }
  8552. taicpu(p).opcode := A_TEST;
  8553. Exit;
  8554. end;
  8555. Break;
  8556. end;
  8557. { Lone AND tests }
  8558. if (taicpu(p).oper[0]^.typ = top_const) then
  8559. begin
  8560. {
  8561. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  8562. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  8563. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  8564. }
  8565. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  8566. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  8567. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  8568. begin
  8569. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  8570. if taicpu(p).opsize = S_L then
  8571. begin
  8572. Include(OptsToCheck,aoc_MovAnd2Mov_3);
  8573. Result := True;
  8574. end;
  8575. end;
  8576. end;
  8577. { Backward check to determine necessity of and %reg,%reg }
  8578. if (taicpu(p).oper[0]^.typ = top_reg) and
  8579. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  8580. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  8581. GetLastInstruction(p, hp2) and
  8582. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp2) and
  8583. { Check size of adjacent instruction to determine if the AND is
  8584. effectively a null operation }
  8585. (
  8586. (taicpu(p).opsize = taicpu(hp2).opsize) or
  8587. { Note: Don't include S_Q }
  8588. ((taicpu(p).opsize = S_L) and (taicpu(hp2).opsize in [S_BL, S_WL])) or
  8589. ((taicpu(p).opsize = S_W) and (taicpu(hp2).opsize in [S_BW, S_BL, S_WL, S_L])) or
  8590. ((taicpu(p).opsize = S_B) and (taicpu(hp2).opsize in [S_BW, S_BL, S_WL, S_W, S_L]))
  8591. ) then
  8592. begin
  8593. DebugMsg(SPeepholeOptimization + 'And2Nop', p);
  8594. { If GetNextInstruction returned False, hp1 will be nil }
  8595. RemoveCurrentP(p, hp1);
  8596. Result := True;
  8597. Exit;
  8598. end;
  8599. end;
  8600. function TX86AsmOptimizer.OptPass2ADD(var p : tai) : boolean;
  8601. var
  8602. hp1: tai; NewRef: TReference;
  8603. { This entire nested function is used in an if-statement below, but we
  8604. want to avoid all the used reg transfers and GetNextInstruction calls
  8605. until we really have to check }
  8606. function MemRegisterNotUsedLater: Boolean; inline;
  8607. var
  8608. hp2: tai;
  8609. begin
  8610. TransferUsedRegs(TmpUsedRegs);
  8611. hp2 := p;
  8612. repeat
  8613. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  8614. until not GetNextInstruction(hp2, hp2) or (hp2 = hp1);
  8615. Result := not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs);
  8616. end;
  8617. begin
  8618. Result := False;
  8619. if not GetNextInstruction(p, hp1) or (hp1.typ <> ait_instruction) then
  8620. Exit;
  8621. if (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif}]) then
  8622. begin
  8623. { Change:
  8624. add %reg2,%reg1
  8625. mov/s/z #(%reg1),%reg1 (%reg1 superregisters must be the same)
  8626. To:
  8627. mov/s/z #(%reg1,%reg2),%reg1
  8628. }
  8629. if MatchOpType(taicpu(p), top_reg, top_reg) and
  8630. MatchInstruction(hp1, [A_MOV, A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif}], []) and
  8631. MatchOpType(taicpu(hp1), top_ref, top_reg) and
  8632. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
  8633. (
  8634. (
  8635. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  8636. (taicpu(hp1).oper[0]^.ref^.index = NR_NO) and
  8637. { r/esp cannot be an index }
  8638. (taicpu(p).oper[0]^.reg<>NR_STACK_POINTER_REG)
  8639. ) or (
  8640. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  8641. (taicpu(hp1).oper[0]^.ref^.base = NR_NO)
  8642. )
  8643. ) and (
  8644. Reg1WriteOverwritesReg2Entirely(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) or
  8645. (
  8646. { If the super registers ARE equal, then this MOV/S/Z does a partial write }
  8647. not SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
  8648. MemRegisterNotUsedLater
  8649. )
  8650. ) then
  8651. begin
  8652. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[1]^.reg;
  8653. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  8654. DebugMsg(SPeepholeOptimization + 'AddMov2Mov done', p);
  8655. RemoveCurrentp(p, hp1);
  8656. Result := True;
  8657. Exit;
  8658. end;
  8659. { Change:
  8660. addl/q $x,%reg1
  8661. movl/q %reg1,%reg2
  8662. To:
  8663. leal/q $x(%reg1),%reg2
  8664. addl/q $x,%reg1 (can be removed if %reg1 or the flags are not used afterwards)
  8665. Breaks the dependency chain.
  8666. }
  8667. if MatchOpType(taicpu(p),top_const,top_reg) and
  8668. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  8669. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8670. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
  8671. (
  8672. { Don't do AddMov2LeaAdd under -Os, but do allow AddMov2Lea }
  8673. not (cs_opt_size in current_settings.optimizerswitches) or
  8674. (
  8675. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) and
  8676. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)
  8677. )
  8678. ) then
  8679. begin
  8680. { Change the MOV instruction to a LEA instruction, and update the
  8681. first operand }
  8682. reference_reset(NewRef, 1, []);
  8683. NewRef.base := taicpu(p).oper[1]^.reg;
  8684. NewRef.scalefactor := 1;
  8685. NewRef.offset := taicpu(p).oper[0]^.val;
  8686. taicpu(hp1).opcode := A_LEA;
  8687. taicpu(hp1).loadref(0, NewRef);
  8688. TransferUsedRegs(TmpUsedRegs);
  8689. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  8690. if RegUsedAfterInstruction(NewRef.base, hp1, TmpUsedRegs) or
  8691. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
  8692. begin
  8693. { Move what is now the LEA instruction to before the SUB instruction }
  8694. Asml.Remove(hp1);
  8695. Asml.InsertBefore(hp1, p);
  8696. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
  8697. DebugMsg(SPeepholeOptimization + 'AddMov2LeaAdd', p);
  8698. p := hp1;
  8699. end
  8700. else
  8701. begin
  8702. { Since %reg1 or the flags aren't used afterwards, we can delete p completely }
  8703. RemoveCurrentP(p, hp1);
  8704. DebugMsg(SPeepholeOptimization + 'AddMov2Lea', p);
  8705. end;
  8706. Result := True;
  8707. end;
  8708. end;
  8709. end;
  8710. function TX86AsmOptimizer.OptPass2Lea(var p : tai) : Boolean;
  8711. begin
  8712. Result:=false;
  8713. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  8714. begin
  8715. if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  8716. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  8717. begin
  8718. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  8719. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  8720. taicpu(p).opcode:=A_ADD;
  8721. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  8722. result:=true;
  8723. end
  8724. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  8725. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  8726. begin
  8727. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  8728. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  8729. taicpu(p).opcode:=A_ADD;
  8730. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  8731. result:=true;
  8732. end;
  8733. end;
  8734. end;
  8735. function TX86AsmOptimizer.OptPass2SUB(var p: tai): Boolean;
  8736. var
  8737. hp1: tai; NewRef: TReference;
  8738. begin
  8739. { Change:
  8740. subl/q $x,%reg1
  8741. movl/q %reg1,%reg2
  8742. To:
  8743. leal/q $-x(%reg1),%reg2
  8744. subl/q $x,%reg1 (can be removed if %reg1 or the flags are not used afterwards)
  8745. Breaks the dependency chain and potentially permits the removal of
  8746. a CMP instruction if one follows.
  8747. }
  8748. Result := False;
  8749. if (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  8750. MatchOpType(taicpu(p),top_const,top_reg) and
  8751. GetNextInstruction(p, hp1) and
  8752. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  8753. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8754. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
  8755. (
  8756. { Don't do SubMov2LeaSub under -Os, but do allow SubMov2Lea }
  8757. not (cs_opt_size in current_settings.optimizerswitches) or
  8758. (
  8759. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) and
  8760. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)
  8761. )
  8762. ) then
  8763. begin
  8764. { Change the MOV instruction to a LEA instruction, and update the
  8765. first operand }
  8766. reference_reset(NewRef, 1, []);
  8767. NewRef.base := taicpu(p).oper[1]^.reg;
  8768. NewRef.scalefactor := 1;
  8769. NewRef.offset := -taicpu(p).oper[0]^.val;
  8770. taicpu(hp1).opcode := A_LEA;
  8771. taicpu(hp1).loadref(0, NewRef);
  8772. TransferUsedRegs(TmpUsedRegs);
  8773. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  8774. if RegUsedAfterInstruction(NewRef.base, hp1, TmpUsedRegs) or
  8775. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
  8776. begin
  8777. { Move what is now the LEA instruction to before the SUB instruction }
  8778. Asml.Remove(hp1);
  8779. Asml.InsertBefore(hp1, p);
  8780. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
  8781. DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
  8782. p := hp1;
  8783. end
  8784. else
  8785. begin
  8786. { Since %reg1 or the flags aren't used afterwards, we can delete p completely }
  8787. RemoveCurrentP(p, hp1);
  8788. DebugMsg(SPeepholeOptimization + 'SubMov2Lea', p);
  8789. end;
  8790. Result := True;
  8791. end;
  8792. end;
  8793. function TX86AsmOptimizer.SkipSimpleInstructions(var hp1 : tai) : Boolean;
  8794. begin
  8795. { we can skip all instructions not messing with the stack pointer }
  8796. while assigned(hp1) and {MatchInstruction(hp1,[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
  8797. A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
  8798. A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
  8799. A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
  8800. ({(taicpu(hp1).ops=0) or }
  8801. ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
  8802. (MatchOpType(taicpu(hp1),top_ref,top_reg))
  8803. ) and }
  8804. not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
  8805. )
  8806. ) do
  8807. GetNextInstruction(hp1,hp1);
  8808. Result:=assigned(hp1);
  8809. end;
  8810. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  8811. var
  8812. hp1, hp2, hp3, hp4, hp5: tai;
  8813. begin
  8814. Result:=false;
  8815. hp5:=nil;
  8816. { replace
  8817. leal(q) x(<stackpointer>),<stackpointer>
  8818. call procname
  8819. leal(q) -x(<stackpointer>),<stackpointer>
  8820. ret
  8821. by
  8822. jmp procname
  8823. but do it only on level 4 because it destroys stack back traces
  8824. }
  8825. if (cs_opt_level4 in current_settings.optimizerswitches) and
  8826. MatchOpType(taicpu(p),top_ref,top_reg) and
  8827. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  8828. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  8829. { the -8 or -24 are not required, but bail out early if possible,
  8830. higher values are unlikely }
  8831. ((taicpu(p).oper[0]^.ref^.offset=-8) or
  8832. (taicpu(p).oper[0]^.ref^.offset=-24)) and
  8833. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  8834. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  8835. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  8836. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  8837. GetNextInstruction(p, hp1) and
  8838. { Take a copy of hp1 }
  8839. SetAndTest(hp1, hp4) and
  8840. { trick to skip label }
  8841. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  8842. SkipSimpleInstructions(hp1) and
  8843. MatchInstruction(hp1,A_CALL,[S_NO]) and
  8844. GetNextInstruction(hp1, hp2) and
  8845. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  8846. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  8847. (taicpu(hp2).oper[0]^.ref^.offset=-taicpu(p).oper[0]^.ref^.offset) and
  8848. (taicpu(hp2).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  8849. (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
  8850. (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
  8851. (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
  8852. (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
  8853. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  8854. GetNextInstruction(hp2, hp3) and
  8855. { trick to skip label }
  8856. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  8857. (MatchInstruction(hp3,A_RET,[S_NO]) or
  8858. (MatchInstruction(hp3,A_VZEROUPPER,[S_NO]) and
  8859. SetAndTest(hp3,hp5) and
  8860. GetNextInstruction(hp3,hp3) and
  8861. MatchInstruction(hp3,A_RET,[S_NO])
  8862. )
  8863. ) and
  8864. (taicpu(hp3).ops=0) then
  8865. begin
  8866. taicpu(hp1).opcode := A_JMP;
  8867. taicpu(hp1).is_jmp := true;
  8868. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  8869. RemoveCurrentP(p, hp4);
  8870. RemoveInstruction(hp2);
  8871. RemoveInstruction(hp3);
  8872. if Assigned(hp5) then
  8873. begin
  8874. AsmL.Remove(hp5);
  8875. ASmL.InsertBefore(hp5,hp1)
  8876. end;
  8877. Result:=true;
  8878. end;
  8879. end;
  8880. function TX86AsmOptimizer.PostPeepholeOptPush(var p : tai) : Boolean;
  8881. {$ifdef x86_64}
  8882. var
  8883. hp1, hp2, hp3, hp4, hp5: tai;
  8884. {$endif x86_64}
  8885. begin
  8886. Result:=false;
  8887. {$ifdef x86_64}
  8888. hp5:=nil;
  8889. { replace
  8890. push %rax
  8891. call procname
  8892. pop %rcx
  8893. ret
  8894. by
  8895. jmp procname
  8896. but do it only on level 4 because it destroys stack back traces
  8897. It depends on the fact, that the sequence push rax/pop rcx is used for stack alignment as rcx is volatile
  8898. for all supported calling conventions
  8899. }
  8900. if (cs_opt_level4 in current_settings.optimizerswitches) and
  8901. MatchOpType(taicpu(p),top_reg) and
  8902. (taicpu(p).oper[0]^.reg=NR_RAX) and
  8903. GetNextInstruction(p, hp1) and
  8904. { Take a copy of hp1 }
  8905. SetAndTest(hp1, hp4) and
  8906. { trick to skip label }
  8907. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  8908. SkipSimpleInstructions(hp1) and
  8909. MatchInstruction(hp1,A_CALL,[S_NO]) and
  8910. GetNextInstruction(hp1, hp2) and
  8911. MatchInstruction(hp2,A_POP,[taicpu(p).opsize]) and
  8912. MatchOpType(taicpu(hp2),top_reg) and
  8913. (taicpu(hp2).oper[0]^.reg=NR_RCX) and
  8914. GetNextInstruction(hp2, hp3) and
  8915. { trick to skip label }
  8916. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  8917. (MatchInstruction(hp3,A_RET,[S_NO]) or
  8918. (MatchInstruction(hp3,A_VZEROUPPER,[S_NO]) and
  8919. SetAndTest(hp3,hp5) and
  8920. GetNextInstruction(hp3,hp3) and
  8921. MatchInstruction(hp3,A_RET,[S_NO])
  8922. )
  8923. ) and
  8924. (taicpu(hp3).ops=0) then
  8925. begin
  8926. taicpu(hp1).opcode := A_JMP;
  8927. taicpu(hp1).is_jmp := true;
  8928. DebugMsg(SPeepholeOptimization + 'PushCallPushRet2Jmp done',p);
  8929. RemoveCurrentP(p, hp4);
  8930. RemoveInstruction(hp2);
  8931. RemoveInstruction(hp3);
  8932. if Assigned(hp5) then
  8933. begin
  8934. AsmL.Remove(hp5);
  8935. ASmL.InsertBefore(hp5,hp1)
  8936. end;
  8937. Result:=true;
  8938. end;
  8939. {$endif x86_64}
  8940. end;
  8941. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  8942. var
  8943. Value, RegName: string;
  8944. begin
  8945. Result:=false;
  8946. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  8947. begin
  8948. case taicpu(p).oper[0]^.val of
  8949. 0:
  8950. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  8951. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  8952. begin
  8953. { change "mov $0,%reg" into "xor %reg,%reg" }
  8954. taicpu(p).opcode := A_XOR;
  8955. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  8956. Result := True;
  8957. {$ifdef x86_64}
  8958. end
  8959. else if (taicpu(p).opsize = S_Q) then
  8960. begin
  8961. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  8962. { The actual optimization }
  8963. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  8964. taicpu(p).changeopsize(S_L);
  8965. DebugMsg(SPeepholeOptimization + 'movq $0,' + RegName + ' -> movl $0,' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  8966. Result := True;
  8967. end;
  8968. $1..$FFFFFFFF:
  8969. begin
  8970. { Code size reduction by J. Gareth "Kit" Moreton }
  8971. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  8972. case taicpu(p).opsize of
  8973. S_Q:
  8974. begin
  8975. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  8976. Value := debug_tostr(taicpu(p).oper[0]^.val);
  8977. { The actual optimization }
  8978. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  8979. taicpu(p).changeopsize(S_L);
  8980. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  8981. Result := True;
  8982. end;
  8983. else
  8984. { Do nothing };
  8985. end;
  8986. {$endif x86_64}
  8987. end;
  8988. -1:
  8989. { Don't make this optimisation if the CPU flags are required, since OR scrambles them }
  8990. if (cs_opt_size in current_settings.optimizerswitches) and
  8991. (taicpu(p).opsize <> S_B) and
  8992. not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  8993. begin
  8994. { change "mov $-1,%reg" into "or $-1,%reg" }
  8995. { NOTES:
  8996. - No size saving is made when changing a Word-sized assignment unless the register is AX (smaller encoding)
  8997. - This operation creates a false dependency on the register, so only do it when optimising for size
  8998. - It is possible to set memory operands using this method, but this creates an even greater false dependency, so don't do this at all
  8999. }
  9000. taicpu(p).opcode := A_OR;
  9001. Result := True;
  9002. end;
  9003. else
  9004. { Do nothing };
  9005. end;
  9006. end;
  9007. end;
  9008. function TX86AsmOptimizer.PostPeepholeOptAnd(var p : tai) : boolean;
  9009. var
  9010. hp1: tai;
  9011. begin
  9012. { Detect:
  9013. andw x, %ax (0 <= x < $8000)
  9014. ...
  9015. movzwl %ax,%eax
  9016. Change movzwl %ax,%eax to cwtl (shorter encoding for movswl %ax,%eax)
  9017. }
  9018. Result := False; if MatchOpType(taicpu(p), top_const, top_reg) and
  9019. (taicpu(p).oper[1]^.reg = NR_AX) and { This is also enough to determine that opsize = S_W }
  9020. ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val) and
  9021. GetNextInstructionUsingReg(p, hp1, NR_EAX) and
  9022. MatchInstruction(hp1, A_MOVZX, [S_WL]) and
  9023. MatchOperand(taicpu(hp1).oper[0]^, NR_AX) and
  9024. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) then
  9025. begin
  9026. DebugMsg(SPeepholeOptimization + 'Converted movzwl %ax,%eax to cwtl (via AndMovz2AndCwtl)', hp1);
  9027. taicpu(hp1).opcode := A_CWDE;
  9028. taicpu(hp1).clearop(0);
  9029. taicpu(hp1).clearop(1);
  9030. taicpu(hp1).ops := 0;
  9031. { A change was made, but not with p, so move forward 1 }
  9032. p := tai(p.Next);
  9033. Result := True;
  9034. end;
  9035. end;
  9036. function TX86AsmOptimizer.PostPeepholeOptMOVSX(var p : tai) : boolean;
  9037. begin
  9038. Result := False;
  9039. if not MatchOpType(taicpu(p), top_reg, top_reg) then
  9040. Exit;
  9041. { Convert:
  9042. movswl %ax,%eax -> cwtl
  9043. movslq %eax,%rax -> cdqe
  9044. NOTE: Don't convert movswl %al,%ax to cbw, because cbw and cwde
  9045. refer to the same opcode and depends only on the assembler's
  9046. current operand-size attribute. [Kit]
  9047. }
  9048. with taicpu(p) do
  9049. case opsize of
  9050. S_WL:
  9051. if (oper[0]^.reg = NR_AX) and (oper[1]^.reg = NR_EAX) then
  9052. begin
  9053. DebugMsg(SPeepholeOptimization + 'Converted movswl %ax,%eax to cwtl', p);
  9054. opcode := A_CWDE;
  9055. clearop(0);
  9056. clearop(1);
  9057. ops := 0;
  9058. Result := True;
  9059. end;
  9060. {$ifdef x86_64}
  9061. S_LQ:
  9062. if (oper[0]^.reg = NR_EAX) and (oper[1]^.reg = NR_RAX) then
  9063. begin
  9064. DebugMsg(SPeepholeOptimization + 'Converted movslq %eax,%rax to cltq', p);
  9065. opcode := A_CDQE;
  9066. clearop(0);
  9067. clearop(1);
  9068. ops := 0;
  9069. Result := True;
  9070. end;
  9071. {$endif x86_64}
  9072. else
  9073. ;
  9074. end;
  9075. end;
  9076. function TX86AsmOptimizer.PostPeepholeOptShr(var p : tai) : boolean;
  9077. var
  9078. hp1: tai;
  9079. begin
  9080. { Detect:
  9081. shr x, %ax (x > 0)
  9082. ...
  9083. movzwl %ax,%eax
  9084. Change movzwl %ax,%eax to cwtl (shorter encoding for movswl %ax,%eax)
  9085. }
  9086. Result := False;
  9087. if MatchOpType(taicpu(p), top_const, top_reg) and
  9088. (taicpu(p).oper[1]^.reg = NR_AX) and { This is also enough to determine that opsize = S_W }
  9089. (taicpu(p).oper[0]^.val > 0) and
  9090. GetNextInstructionUsingReg(p, hp1, NR_EAX) and
  9091. MatchInstruction(hp1, A_MOVZX, [S_WL]) and
  9092. MatchOperand(taicpu(hp1).oper[0]^, NR_AX) and
  9093. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) then
  9094. begin
  9095. DebugMsg(SPeepholeOptimization + 'Converted movzwl %ax,%eax to cwtl (via ShrMovz2ShrCwtl)', hp1);
  9096. taicpu(hp1).opcode := A_CWDE;
  9097. taicpu(hp1).clearop(0);
  9098. taicpu(hp1).clearop(1);
  9099. taicpu(hp1).ops := 0;
  9100. { A change was made, but not with p, so move forward 1 }
  9101. p := tai(p.Next);
  9102. Result := True;
  9103. end;
  9104. end;
  9105. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  9106. begin
  9107. Result:=false;
  9108. { change "cmp $0, %reg" to "test %reg, %reg" }
  9109. if MatchOpType(taicpu(p),top_const,top_reg) and
  9110. (taicpu(p).oper[0]^.val = 0) then
  9111. begin
  9112. taicpu(p).opcode := A_TEST;
  9113. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  9114. Result:=true;
  9115. end;
  9116. end;
  9117. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  9118. var
  9119. IsTestConstX : Boolean;
  9120. hp1,hp2 : tai;
  9121. begin
  9122. Result:=false;
  9123. { removes the line marked with (x) from the sequence
  9124. and/or/xor/add/sub/... $x, %y
  9125. test/or %y, %y | test $-1, %y (x)
  9126. j(n)z _Label
  9127. as the first instruction already adjusts the ZF
  9128. %y operand may also be a reference }
  9129. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  9130. MatchOperand(taicpu(p).oper[0]^,-1);
  9131. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  9132. GetLastInstruction(p, hp1) and
  9133. (tai(hp1).typ = ait_instruction) and
  9134. GetNextInstruction(p,hp2) and
  9135. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  9136. case taicpu(hp1).opcode Of
  9137. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  9138. begin
  9139. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  9140. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9141. { and in case of carry for A(E)/B(E)/C/NC }
  9142. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  9143. ((taicpu(hp1).opcode <> A_ADD) and
  9144. (taicpu(hp1).opcode <> A_SUB))) then
  9145. begin
  9146. RemoveCurrentP(p, hp2);
  9147. Result:=true;
  9148. Exit;
  9149. end;
  9150. end;
  9151. A_SHL, A_SAL, A_SHR, A_SAR:
  9152. begin
  9153. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  9154. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  9155. { therefore, it's only safe to do this optimization for }
  9156. { shifts by a (nonzero) constant }
  9157. (taicpu(hp1).oper[0]^.typ = top_const) and
  9158. (taicpu(hp1).oper[0]^.val <> 0) and
  9159. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9160. { and in case of carry for A(E)/B(E)/C/NC }
  9161. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  9162. begin
  9163. RemoveCurrentP(p, hp2);
  9164. Result:=true;
  9165. Exit;
  9166. end;
  9167. end;
  9168. A_DEC, A_INC, A_NEG:
  9169. begin
  9170. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  9171. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9172. { and in case of carry for A(E)/B(E)/C/NC }
  9173. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  9174. begin
  9175. RemoveCurrentP(p, hp2);
  9176. Result:=true;
  9177. Exit;
  9178. end;
  9179. end
  9180. else
  9181. ;
  9182. end; { case }
  9183. { change "test $-1,%reg" into "test %reg,%reg" }
  9184. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  9185. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  9186. { Change "or %reg,%reg" to "test %reg,%reg" as OR generates a false dependency }
  9187. if MatchInstruction(p, A_OR, []) and
  9188. { Can only match if they're both registers }
  9189. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) then
  9190. begin
  9191. DebugMsg(SPeepholeOptimization + 'or %reg,%reg -> test %reg,%reg to remove false dependency (Or2Test)', p);
  9192. taicpu(p).opcode := A_TEST;
  9193. { No need to set Result to True, as we've done all the optimisations we can }
  9194. end;
  9195. end;
  9196. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  9197. var
  9198. hp1,hp3 : tai;
  9199. {$ifndef x86_64}
  9200. hp2 : taicpu;
  9201. {$endif x86_64}
  9202. begin
  9203. Result:=false;
  9204. hp3:=nil;
  9205. {$ifndef x86_64}
  9206. { don't do this on modern CPUs, this really hurts them due to
  9207. broken call/ret pairing }
  9208. if (current_settings.optimizecputype < cpu_Pentium2) and
  9209. not(cs_create_pic in current_settings.moduleswitches) and
  9210. GetNextInstruction(p, hp1) and
  9211. MatchInstruction(hp1,A_JMP,[S_NO]) and
  9212. MatchOpType(taicpu(hp1),top_ref) and
  9213. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  9214. begin
  9215. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  9216. InsertLLItem(p.previous, p, hp2);
  9217. taicpu(p).opcode := A_JMP;
  9218. taicpu(p).is_jmp := true;
  9219. RemoveInstruction(hp1);
  9220. Result:=true;
  9221. end
  9222. else
  9223. {$endif x86_64}
  9224. { replace
  9225. call procname
  9226. ret
  9227. by
  9228. jmp procname
  9229. but do it only on level 4 because it destroys stack back traces
  9230. else if the subroutine is marked as no return, remove the ret
  9231. }
  9232. if ((cs_opt_level4 in current_settings.optimizerswitches) or
  9233. (po_noreturn in current_procinfo.procdef.procoptions)) and
  9234. GetNextInstruction(p, hp1) and
  9235. (MatchInstruction(hp1,A_RET,[S_NO]) or
  9236. (MatchInstruction(hp1,A_VZEROUPPER,[S_NO]) and
  9237. SetAndTest(hp1,hp3) and
  9238. GetNextInstruction(hp1,hp1) and
  9239. MatchInstruction(hp1,A_RET,[S_NO])
  9240. )
  9241. ) and
  9242. (taicpu(hp1).ops=0) then
  9243. begin
  9244. if (cs_opt_level4 in current_settings.optimizerswitches) and
  9245. { we might destroy stack alignment here if we do not do a call }
  9246. (target_info.stackalign<=sizeof(SizeUInt)) then
  9247. begin
  9248. taicpu(p).opcode := A_JMP;
  9249. taicpu(p).is_jmp := true;
  9250. DebugMsg(SPeepholeOptimization + 'CallRet2Jmp done',p);
  9251. end
  9252. else
  9253. DebugMsg(SPeepholeOptimization + 'CallRet2Call done',p);
  9254. RemoveInstruction(hp1);
  9255. if Assigned(hp3) then
  9256. begin
  9257. AsmL.Remove(hp3);
  9258. AsmL.InsertBefore(hp3,p)
  9259. end;
  9260. Result:=true;
  9261. end;
  9262. end;
  9263. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  9264. function ConstInRange(const Val: TCGInt; const OpSize: TOpSize): Boolean;
  9265. begin
  9266. case OpSize of
  9267. S_B, S_BW, S_BL{$ifdef x86_64}, S_BQ{$endif x86_64}:
  9268. Result := (Val <= $FF) and (Val >= -128);
  9269. S_W, S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  9270. Result := (Val <= $FFFF) and (Val >= -32768);
  9271. S_L{$ifdef x86_64}, S_LQ{$endif x86_64}:
  9272. Result := (Val <= $FFFFFFFF) and (Val >= -2147483648);
  9273. else
  9274. Result := True;
  9275. end;
  9276. end;
  9277. var
  9278. hp1, hp2 : tai;
  9279. SizeChange: Boolean;
  9280. PreMessage: string;
  9281. begin
  9282. Result := False;
  9283. if (taicpu(p).oper[0]^.typ = top_reg) and
  9284. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  9285. GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) then
  9286. begin
  9287. { Change (using movzbl %al,%eax as an example):
  9288. movzbl %al, %eax movzbl %al, %eax
  9289. cmpl x, %eax testl %eax,%eax
  9290. To:
  9291. cmpb x, %al testb %al, %al (Move one back to avoid a false dependency)
  9292. movzbl %al, %eax movzbl %al, %eax
  9293. Smaller instruction and minimises pipeline stall as the CPU
  9294. doesn't have to wait for the register to get zero-extended. [Kit]
  9295. Also allow if the smaller of the two registers is being checked,
  9296. as this still removes the false dependency.
  9297. }
  9298. if
  9299. (
  9300. (
  9301. (taicpu(hp1).opcode = A_CMP) and MatchOpType(taicpu(hp1), top_const, top_reg) and
  9302. ConstInRange(taicpu(hp1).oper[0]^.val, taicpu(p).opsize)
  9303. ) or (
  9304. { If MatchOperand returns True, they must both be registers }
  9305. (taicpu(hp1).opcode = A_TEST) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)
  9306. )
  9307. ) and
  9308. (reg2opsize(taicpu(hp1).oper[1]^.reg) <= reg2opsize(taicpu(p).oper[1]^.reg)) and
  9309. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) then
  9310. begin
  9311. PreMessage := debug_op2str(taicpu(hp1).opcode) + debug_opsize2str(taicpu(hp1).opsize) + ' ' + debug_operstr(taicpu(hp1).oper[0]^) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' -> ' + debug_op2str(taicpu(hp1).opcode);
  9312. asml.Remove(hp1);
  9313. asml.InsertBefore(hp1, p);
  9314. { Swap instructions in the case of cmp 0,%reg or test %reg,%reg }
  9315. if (taicpu(hp1).opcode = A_TEST) or (taicpu(hp1).oper[0]^.val = 0) then
  9316. begin
  9317. taicpu(hp1).opcode := A_TEST;
  9318. taicpu(hp1).loadreg(0, taicpu(p).oper[0]^.reg);
  9319. end;
  9320. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[0]^.reg;
  9321. case taicpu(p).opsize of
  9322. S_BW, S_BL:
  9323. begin
  9324. SizeChange := taicpu(hp1).opsize <> S_B;
  9325. taicpu(hp1).changeopsize(S_B);
  9326. end;
  9327. S_WL:
  9328. begin
  9329. SizeChange := taicpu(hp1).opsize <> S_W;
  9330. taicpu(hp1).changeopsize(S_W);
  9331. end
  9332. else
  9333. InternalError(2020112701);
  9334. end;
  9335. UpdateUsedRegs(tai(p.Next));
  9336. { Check if the register is used aferwards - if not, we can
  9337. remove the movzx instruction completely }
  9338. if not RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, p, UsedRegs) then
  9339. begin
  9340. { Hp1 is a better position than p for debugging purposes }
  9341. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 4a', hp1);
  9342. RemoveCurrentp(p, hp1);
  9343. Result := True;
  9344. end;
  9345. if SizeChange then
  9346. DebugMsg(SPeepholeOptimization + PreMessage +
  9347. debug_opsize2str(taicpu(hp1).opsize) + ' ' + debug_operstr(taicpu(hp1).oper[0]^) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' (smaller and minimises pipeline stall - MovzxCmp2CmpMovzx)', hp1)
  9348. else
  9349. DebugMsg(SPeepholeOptimization + 'MovzxCmp2CmpMovzx', hp1);
  9350. Exit;
  9351. end;
  9352. { Change (using movzwl %ax,%eax as an example):
  9353. movzwl %ax, %eax
  9354. movb %al, (dest) (Register is smaller than read register in movz)
  9355. To:
  9356. movb %al, (dest) (Move one back to avoid a false dependency)
  9357. movzwl %ax, %eax
  9358. }
  9359. if (taicpu(hp1).opcode = A_MOV) and
  9360. (taicpu(hp1).oper[0]^.typ = top_reg) and
  9361. not RegInOp(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^) and
  9362. SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg) and
  9363. (reg2opsize(taicpu(hp1).oper[0]^.reg) <= reg2opsize(taicpu(p).oper[0]^.reg)) then
  9364. begin
  9365. DebugMsg(SPeepholeOptimization + 'MovzxMov2MovMovzx', hp1);
  9366. hp2 := tai(hp1.Previous); { Effectively the old position of hp1 }
  9367. asml.Remove(hp1);
  9368. asml.InsertBefore(hp1, p);
  9369. if taicpu(hp1).oper[1]^.typ = top_reg then
  9370. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, hp2, UsedRegs);
  9371. { Check if the register is used aferwards - if not, we can
  9372. remove the movzx instruction completely }
  9373. if not RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, p, UsedRegs) then
  9374. begin
  9375. { Hp1 is a better position than p for debugging purposes }
  9376. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 4b', hp1);
  9377. RemoveCurrentp(p, hp1);
  9378. Result := True;
  9379. end;
  9380. Exit;
  9381. end;
  9382. end;
  9383. {$ifdef x86_64}
  9384. { Code size reduction by J. Gareth "Kit" Moreton }
  9385. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  9386. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  9387. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  9388. then
  9389. begin
  9390. { Has 64-bit register name and opcode suffix }
  9391. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  9392. { The actual optimization }
  9393. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  9394. if taicpu(p).opsize = S_BQ then
  9395. taicpu(p).changeopsize(S_BL)
  9396. else
  9397. taicpu(p).changeopsize(S_WL);
  9398. DebugMsg(SPeepholeOptimization + PreMessage +
  9399. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  9400. end;
  9401. {$endif}
  9402. end;
  9403. {$ifdef x86_64}
  9404. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  9405. var
  9406. PreMessage, RegName: string;
  9407. begin
  9408. { Code size reduction by J. Gareth "Kit" Moreton }
  9409. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  9410. as this removes the REX prefix }
  9411. Result := False;
  9412. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  9413. Exit;
  9414. if taicpu(p).oper[0]^.typ <> top_reg then
  9415. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  9416. InternalError(2018011500);
  9417. case taicpu(p).opsize of
  9418. S_Q:
  9419. begin
  9420. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  9421. begin
  9422. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  9423. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  9424. { The actual optimization }
  9425. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  9426. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  9427. taicpu(p).changeopsize(S_L);
  9428. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  9429. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  9430. end;
  9431. end;
  9432. else
  9433. ;
  9434. end;
  9435. end;
  9436. {$endif}
  9437. class procedure TX86AsmOptimizer.OptimizeRefs(var p: taicpu);
  9438. var
  9439. OperIdx: Integer;
  9440. begin
  9441. for OperIdx := 0 to p.ops - 1 do
  9442. if p.oper[OperIdx]^.typ = top_ref then
  9443. optimize_ref(p.oper[OperIdx]^.ref^, False);
  9444. end;
  9445. end.