ScalarEvolution.cpp 326 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534
  1. //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains the implementation of the scalar evolution analysis
  11. // engine, which is used primarily to analyze expressions involving induction
  12. // variables in loops.
  13. //
  14. // There are several aspects to this library. First is the representation of
  15. // scalar expressions, which are represented as subclasses of the SCEV class.
  16. // These classes are used to represent certain types of subexpressions that we
  17. // can handle. We only create one SCEV of a particular shape, so
  18. // pointer-comparisons for equality are legal.
  19. //
  20. // One important aspect of the SCEV objects is that they are never cyclic, even
  21. // if there is a cycle in the dataflow for an expression (ie, a PHI node). If
  22. // the PHI node is one of the idioms that we can represent (e.g., a polynomial
  23. // recurrence) then we represent it directly as a recurrence node, otherwise we
  24. // represent it as a SCEVUnknown node.
  25. //
  26. // In addition to being able to represent expressions of various types, we also
  27. // have folders that are used to build the *canonical* representation for a
  28. // particular expression. These folders are capable of using a variety of
  29. // rewrite rules to simplify the expressions.
  30. //
  31. // Once the folders are defined, we can implement the more interesting
  32. // higher-level code, such as the code that recognizes PHI nodes of various
  33. // types, computes the execution count of a loop, etc.
  34. //
  35. // TODO: We should use these routines and value representations to implement
  36. // dependence analysis!
  37. //
  38. //===----------------------------------------------------------------------===//
  39. //
  40. // There are several good references for the techniques used in this analysis.
  41. //
  42. // Chains of recurrences -- a method to expedite the evaluation
  43. // of closed-form functions
  44. // Olaf Bachmann, Paul S. Wang, Eugene V. Zima
  45. //
  46. // On computational properties of chains of recurrences
  47. // Eugene V. Zima
  48. //
  49. // Symbolic Evaluation of Chains of Recurrences for Loop Optimization
  50. // Robert A. van Engelen
  51. //
  52. // Efficient Symbolic Analysis for Optimizing Compilers
  53. // Robert A. van Engelen
  54. //
  55. // Using the chains of recurrences algebra for data dependence testing and
  56. // induction variable substitution
  57. // MS Thesis, Johnie Birch
  58. //
  59. //===----------------------------------------------------------------------===//
  60. #include "llvm/Analysis/ScalarEvolution.h"
  61. #include "llvm/ADT/Optional.h"
  62. #include "llvm/ADT/STLExtras.h"
  63. #include "llvm/ADT/SmallPtrSet.h"
  64. #include "llvm/ADT/Statistic.h"
  65. #include "llvm/Analysis/AssumptionCache.h"
  66. #include "llvm/Analysis/ConstantFolding.h"
  67. #include "llvm/Analysis/InstructionSimplify.h"
  68. #include "llvm/Analysis/LoopInfo.h"
  69. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  70. #include "llvm/Analysis/TargetLibraryInfo.h"
  71. #include "llvm/Analysis/ValueTracking.h"
  72. #include "llvm/IR/ConstantRange.h"
  73. #include "llvm/IR/Constants.h"
  74. #include "llvm/IR/DataLayout.h"
  75. #include "llvm/IR/DerivedTypes.h"
  76. #include "llvm/IR/Dominators.h"
  77. #include "llvm/IR/GetElementPtrTypeIterator.h"
  78. #include "llvm/IR/GlobalAlias.h"
  79. #include "llvm/IR/GlobalVariable.h"
  80. #include "llvm/IR/InstIterator.h"
  81. #include "llvm/IR/Instructions.h"
  82. #include "llvm/IR/LLVMContext.h"
  83. #include "llvm/IR/Metadata.h"
  84. #include "llvm/IR/Operator.h"
  85. #include "llvm/Support/CommandLine.h"
  86. #include "llvm/Support/Debug.h"
  87. #include "llvm/Support/ErrorHandling.h"
  88. #include "llvm/Support/MathExtras.h"
  89. #include "llvm/Support/raw_ostream.h"
  90. #include <algorithm>
  91. using namespace llvm;
  92. #define DEBUG_TYPE "scalar-evolution"
  93. STATISTIC(NumArrayLenItCounts,
  94. "Number of trip counts computed with array length");
  95. STATISTIC(NumTripCountsComputed,
  96. "Number of loops with predictable loop counts");
  97. STATISTIC(NumTripCountsNotComputed,
  98. "Number of loops without predictable loop counts");
  99. STATISTIC(NumBruteForceTripCountsComputed,
  100. "Number of loops with trip counts computed by force");
  101. static cl::opt<unsigned>
  102. MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
  103. cl::desc("Maximum number of iterations SCEV will "
  104. "symbolically execute a constant "
  105. "derived loop"),
  106. cl::init(100));
  107. // FIXME: Enable this with XDEBUG when the test suite is clean.
  108. static cl::opt<bool>
  109. VerifySCEV("verify-scev",
  110. cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
  111. INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
  112. "Scalar Evolution Analysis", false, true)
  113. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  114. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  115. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  116. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  117. INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
  118. "Scalar Evolution Analysis", false, true)
  119. char ScalarEvolution::ID = 0;
  120. //===----------------------------------------------------------------------===//
  121. // SCEV class definitions
  122. //===----------------------------------------------------------------------===//
  123. //===----------------------------------------------------------------------===//
  124. // Implementation of the SCEV class.
  125. //
  126. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  127. void SCEV::dump() const {
  128. print(dbgs());
  129. dbgs() << '\n';
  130. }
  131. #endif
  132. void SCEV::print(raw_ostream &OS) const {
  133. switch (static_cast<SCEVTypes>(getSCEVType())) {
  134. case scConstant:
  135. cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
  136. return;
  137. case scTruncate: {
  138. const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
  139. const SCEV *Op = Trunc->getOperand();
  140. OS << "(trunc " << *Op->getType() << " " << *Op << " to "
  141. << *Trunc->getType() << ")";
  142. return;
  143. }
  144. case scZeroExtend: {
  145. const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
  146. const SCEV *Op = ZExt->getOperand();
  147. OS << "(zext " << *Op->getType() << " " << *Op << " to "
  148. << *ZExt->getType() << ")";
  149. return;
  150. }
  151. case scSignExtend: {
  152. const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
  153. const SCEV *Op = SExt->getOperand();
  154. OS << "(sext " << *Op->getType() << " " << *Op << " to "
  155. << *SExt->getType() << ")";
  156. return;
  157. }
  158. case scAddRecExpr: {
  159. const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
  160. OS << "{" << *AR->getOperand(0);
  161. for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
  162. OS << ",+," << *AR->getOperand(i);
  163. OS << "}<";
  164. if (AR->getNoWrapFlags(FlagNUW))
  165. OS << "nuw><";
  166. if (AR->getNoWrapFlags(FlagNSW))
  167. OS << "nsw><";
  168. if (AR->getNoWrapFlags(FlagNW) &&
  169. !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
  170. OS << "nw><";
  171. AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  172. OS << ">";
  173. return;
  174. }
  175. case scAddExpr:
  176. case scMulExpr:
  177. case scUMaxExpr:
  178. case scSMaxExpr: {
  179. const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
  180. const char *OpStr = nullptr;
  181. switch (NAry->getSCEVType()) {
  182. case scAddExpr: OpStr = " + "; break;
  183. case scMulExpr: OpStr = " * "; break;
  184. case scUMaxExpr: OpStr = " umax "; break;
  185. case scSMaxExpr: OpStr = " smax "; break;
  186. }
  187. OS << "(";
  188. for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
  189. I != E; ++I) {
  190. OS << **I;
  191. if (std::next(I) != E)
  192. OS << OpStr;
  193. }
  194. OS << ")";
  195. switch (NAry->getSCEVType()) {
  196. case scAddExpr:
  197. case scMulExpr:
  198. if (NAry->getNoWrapFlags(FlagNUW))
  199. OS << "<nuw>";
  200. if (NAry->getNoWrapFlags(FlagNSW))
  201. OS << "<nsw>";
  202. }
  203. return;
  204. }
  205. case scUDivExpr: {
  206. const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
  207. OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
  208. return;
  209. }
  210. case scUnknown: {
  211. const SCEVUnknown *U = cast<SCEVUnknown>(this);
  212. Type *AllocTy;
  213. if (U->isSizeOf(AllocTy)) {
  214. OS << "sizeof(" << *AllocTy << ")";
  215. return;
  216. }
  217. if (U->isAlignOf(AllocTy)) {
  218. OS << "alignof(" << *AllocTy << ")";
  219. return;
  220. }
  221. Type *CTy;
  222. Constant *FieldNo;
  223. if (U->isOffsetOf(CTy, FieldNo)) {
  224. OS << "offsetof(" << *CTy << ", ";
  225. FieldNo->printAsOperand(OS, false);
  226. OS << ")";
  227. return;
  228. }
  229. // Otherwise just print it normally.
  230. U->getValue()->printAsOperand(OS, false);
  231. return;
  232. }
  233. case scCouldNotCompute:
  234. OS << "***COULDNOTCOMPUTE***";
  235. return;
  236. }
  237. llvm_unreachable("Unknown SCEV kind!");
  238. }
  239. Type *SCEV::getType() const {
  240. switch (static_cast<SCEVTypes>(getSCEVType())) {
  241. case scConstant:
  242. return cast<SCEVConstant>(this)->getType();
  243. case scTruncate:
  244. case scZeroExtend:
  245. case scSignExtend:
  246. return cast<SCEVCastExpr>(this)->getType();
  247. case scAddRecExpr:
  248. case scMulExpr:
  249. case scUMaxExpr:
  250. case scSMaxExpr:
  251. return cast<SCEVNAryExpr>(this)->getType();
  252. case scAddExpr:
  253. return cast<SCEVAddExpr>(this)->getType();
  254. case scUDivExpr:
  255. return cast<SCEVUDivExpr>(this)->getType();
  256. case scUnknown:
  257. return cast<SCEVUnknown>(this)->getType();
  258. case scCouldNotCompute:
  259. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  260. }
  261. llvm_unreachable("Unknown SCEV kind!");
  262. }
  263. bool SCEV::isZero() const {
  264. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
  265. return SC->getValue()->isZero();
  266. return false;
  267. }
  268. bool SCEV::isOne() const {
  269. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
  270. return SC->getValue()->isOne();
  271. return false;
  272. }
  273. bool SCEV::isAllOnesValue() const {
  274. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
  275. return SC->getValue()->isAllOnesValue();
  276. return false;
  277. }
  278. /// isNonConstantNegative - Return true if the specified scev is negated, but
  279. /// not a constant.
  280. bool SCEV::isNonConstantNegative() const {
  281. const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
  282. if (!Mul) return false;
  283. // If there is a constant factor, it will be first.
  284. const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
  285. if (!SC) return false;
  286. // Return true if the value is negative, this matches things like (-42 * V).
  287. return SC->getValue()->getValue().isNegative();
  288. }
  289. SCEVCouldNotCompute::SCEVCouldNotCompute() :
  290. SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
  291. bool SCEVCouldNotCompute::classof(const SCEV *S) {
  292. return S->getSCEVType() == scCouldNotCompute;
  293. }
  294. const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
  295. FoldingSetNodeID ID;
  296. ID.AddInteger(scConstant);
  297. ID.AddPointer(V);
  298. void *IP = nullptr;
  299. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  300. SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
  301. UniqueSCEVs.InsertNode(S, IP);
  302. return S;
  303. }
  304. const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
  305. return getConstant(ConstantInt::get(getContext(), Val));
  306. }
  307. const SCEV *
  308. ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
  309. IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
  310. return getConstant(ConstantInt::get(ITy, V, isSigned));
  311. }
  312. SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
  313. unsigned SCEVTy, const SCEV *op, Type *ty)
  314. : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
  315. SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
  316. const SCEV *op, Type *ty)
  317. : SCEVCastExpr(ID, scTruncate, op, ty) {
  318. assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
  319. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  320. "Cannot truncate non-integer value!");
  321. }
  322. SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
  323. const SCEV *op, Type *ty)
  324. : SCEVCastExpr(ID, scZeroExtend, op, ty) {
  325. assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
  326. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  327. "Cannot zero extend non-integer value!");
  328. }
  329. SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
  330. const SCEV *op, Type *ty)
  331. : SCEVCastExpr(ID, scSignExtend, op, ty) {
  332. assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
  333. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  334. "Cannot sign extend non-integer value!");
  335. }
  336. void SCEVUnknown::deleted() {
  337. // Clear this SCEVUnknown from various maps.
  338. SE->forgetMemoizedResults(this);
  339. // Remove this SCEVUnknown from the uniquing map.
  340. SE->UniqueSCEVs.RemoveNode(this);
  341. // Release the value.
  342. setValPtr(nullptr);
  343. }
  344. void SCEVUnknown::allUsesReplacedWith(Value *New) {
  345. // Clear this SCEVUnknown from various maps.
  346. SE->forgetMemoizedResults(this);
  347. // Remove this SCEVUnknown from the uniquing map.
  348. SE->UniqueSCEVs.RemoveNode(this);
  349. // Update this SCEVUnknown to point to the new value. This is needed
  350. // because there may still be outstanding SCEVs which still point to
  351. // this SCEVUnknown.
  352. setValPtr(New);
  353. }
  354. bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
  355. if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
  356. if (VCE->getOpcode() == Instruction::PtrToInt)
  357. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
  358. if (CE->getOpcode() == Instruction::GetElementPtr &&
  359. CE->getOperand(0)->isNullValue() &&
  360. CE->getNumOperands() == 2)
  361. if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
  362. if (CI->isOne()) {
  363. AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
  364. ->getElementType();
  365. return true;
  366. }
  367. return false;
  368. }
  369. bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
  370. if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
  371. if (VCE->getOpcode() == Instruction::PtrToInt)
  372. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
  373. if (CE->getOpcode() == Instruction::GetElementPtr &&
  374. CE->getOperand(0)->isNullValue()) {
  375. Type *Ty =
  376. cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
  377. if (StructType *STy = dyn_cast<StructType>(Ty))
  378. if (!STy->isPacked() &&
  379. CE->getNumOperands() == 3 &&
  380. CE->getOperand(1)->isNullValue()) {
  381. if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
  382. if (CI->isOne() &&
  383. STy->getNumElements() == 2 &&
  384. STy->getElementType(0)->isIntegerTy(1)) {
  385. AllocTy = STy->getElementType(1);
  386. return true;
  387. }
  388. }
  389. }
  390. return false;
  391. }
  392. bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
  393. if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
  394. if (VCE->getOpcode() == Instruction::PtrToInt)
  395. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
  396. if (CE->getOpcode() == Instruction::GetElementPtr &&
  397. CE->getNumOperands() == 3 &&
  398. CE->getOperand(0)->isNullValue() &&
  399. CE->getOperand(1)->isNullValue()) {
  400. Type *Ty =
  401. cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
  402. // Ignore vector types here so that ScalarEvolutionExpander doesn't
  403. // emit getelementptrs that index into vectors.
  404. if (Ty->isStructTy() || Ty->isArrayTy()) {
  405. CTy = Ty;
  406. FieldNo = CE->getOperand(2);
  407. return true;
  408. }
  409. }
  410. return false;
  411. }
  412. //===----------------------------------------------------------------------===//
  413. // SCEV Utilities
  414. //===----------------------------------------------------------------------===//
  415. namespace {
  416. /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
  417. /// than the complexity of the RHS. This comparator is used to canonicalize
  418. /// expressions.
  419. class SCEVComplexityCompare {
  420. const LoopInfo *const LI;
  421. public:
  422. explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
  423. // Return true or false if LHS is less than, or at least RHS, respectively.
  424. bool operator()(const SCEV *LHS, const SCEV *RHS) const {
  425. return compare(LHS, RHS) < 0;
  426. }
  427. // Return negative, zero, or positive, if LHS is less than, equal to, or
  428. // greater than RHS, respectively. A three-way result allows recursive
  429. // comparisons to be more efficient.
  430. int compare(const SCEV *LHS, const SCEV *RHS) const {
  431. // Fast-path: SCEVs are uniqued so we can do a quick equality check.
  432. if (LHS == RHS)
  433. return 0;
  434. // Primarily, sort the SCEVs by their getSCEVType().
  435. unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
  436. if (LType != RType)
  437. return (int)LType - (int)RType;
  438. // Aside from the getSCEVType() ordering, the particular ordering
  439. // isn't very important except that it's beneficial to be consistent,
  440. // so that (a + b) and (b + a) don't end up as different expressions.
  441. switch (static_cast<SCEVTypes>(LType)) {
  442. case scUnknown: {
  443. const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
  444. const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
  445. // Sort SCEVUnknown values with some loose heuristics. TODO: This is
  446. // not as complete as it could be.
  447. const Value *LV = LU->getValue(), *RV = RU->getValue();
  448. // Order pointer values after integer values. This helps SCEVExpander
  449. // form GEPs.
  450. bool LIsPointer = LV->getType()->isPointerTy(),
  451. RIsPointer = RV->getType()->isPointerTy();
  452. if (LIsPointer != RIsPointer)
  453. return (int)LIsPointer - (int)RIsPointer;
  454. // Compare getValueID values.
  455. unsigned LID = LV->getValueID(),
  456. RID = RV->getValueID();
  457. if (LID != RID)
  458. return (int)LID - (int)RID;
  459. // Sort arguments by their position.
  460. if (const Argument *LA = dyn_cast<Argument>(LV)) {
  461. const Argument *RA = cast<Argument>(RV);
  462. unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
  463. return (int)LArgNo - (int)RArgNo;
  464. }
  465. // For instructions, compare their loop depth, and their operand
  466. // count. This is pretty loose.
  467. if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
  468. const Instruction *RInst = cast<Instruction>(RV);
  469. // Compare loop depths.
  470. const BasicBlock *LParent = LInst->getParent(),
  471. *RParent = RInst->getParent();
  472. if (LParent != RParent) {
  473. unsigned LDepth = LI->getLoopDepth(LParent),
  474. RDepth = LI->getLoopDepth(RParent);
  475. if (LDepth != RDepth)
  476. return (int)LDepth - (int)RDepth;
  477. }
  478. // Compare the number of operands.
  479. unsigned LNumOps = LInst->getNumOperands(),
  480. RNumOps = RInst->getNumOperands();
  481. return (int)LNumOps - (int)RNumOps;
  482. }
  483. return 0;
  484. }
  485. case scConstant: {
  486. const SCEVConstant *LC = cast<SCEVConstant>(LHS);
  487. const SCEVConstant *RC = cast<SCEVConstant>(RHS);
  488. // Compare constant values.
  489. const APInt &LA = LC->getValue()->getValue();
  490. const APInt &RA = RC->getValue()->getValue();
  491. unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
  492. if (LBitWidth != RBitWidth)
  493. return (int)LBitWidth - (int)RBitWidth;
  494. return LA.ult(RA) ? -1 : 1;
  495. }
  496. case scAddRecExpr: {
  497. const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
  498. const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
  499. // Compare addrec loop depths.
  500. const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
  501. if (LLoop != RLoop) {
  502. unsigned LDepth = LLoop->getLoopDepth(),
  503. RDepth = RLoop->getLoopDepth();
  504. if (LDepth != RDepth)
  505. return (int)LDepth - (int)RDepth;
  506. }
  507. // Addrec complexity grows with operand count.
  508. unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
  509. if (LNumOps != RNumOps)
  510. return (int)LNumOps - (int)RNumOps;
  511. // Lexicographically compare.
  512. for (unsigned i = 0; i != LNumOps; ++i) {
  513. long X = compare(LA->getOperand(i), RA->getOperand(i));
  514. if (X != 0)
  515. return X;
  516. }
  517. return 0;
  518. }
  519. case scAddExpr:
  520. case scMulExpr:
  521. case scSMaxExpr:
  522. case scUMaxExpr: {
  523. const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
  524. const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
  525. // Lexicographically compare n-ary expressions.
  526. unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
  527. if (LNumOps != RNumOps)
  528. return (int)LNumOps - (int)RNumOps;
  529. for (unsigned i = 0; i != LNumOps; ++i) {
  530. if (i >= RNumOps)
  531. return 1;
  532. long X = compare(LC->getOperand(i), RC->getOperand(i));
  533. if (X != 0)
  534. return X;
  535. }
  536. return (int)LNumOps - (int)RNumOps;
  537. }
  538. case scUDivExpr: {
  539. const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
  540. const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
  541. // Lexicographically compare udiv expressions.
  542. long X = compare(LC->getLHS(), RC->getLHS());
  543. if (X != 0)
  544. return X;
  545. return compare(LC->getRHS(), RC->getRHS());
  546. }
  547. case scTruncate:
  548. case scZeroExtend:
  549. case scSignExtend: {
  550. const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
  551. const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
  552. // Compare cast expressions by operand.
  553. return compare(LC->getOperand(), RC->getOperand());
  554. }
  555. case scCouldNotCompute:
  556. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  557. }
  558. llvm_unreachable("Unknown SCEV kind!");
  559. }
  560. };
  561. }
  562. /// GroupByComplexity - Given a list of SCEV objects, order them by their
  563. /// complexity, and group objects of the same complexity together by value.
  564. /// When this routine is finished, we know that any duplicates in the vector are
  565. /// consecutive and that complexity is monotonically increasing.
  566. ///
  567. /// Note that we go take special precautions to ensure that we get deterministic
  568. /// results from this routine. In other words, we don't want the results of
  569. /// this to depend on where the addresses of various SCEV objects happened to
  570. /// land in memory.
  571. ///
  572. static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
  573. LoopInfo *LI) {
  574. if (Ops.size() < 2) return; // Noop
  575. if (Ops.size() == 2) {
  576. // This is the common case, which also happens to be trivially simple.
  577. // Special case it.
  578. const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
  579. if (SCEVComplexityCompare(LI)(RHS, LHS))
  580. std::swap(LHS, RHS);
  581. return;
  582. }
  583. // Do the rough sort by complexity.
  584. std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
  585. // Now that we are sorted by complexity, group elements of the same
  586. // complexity. Note that this is, at worst, N^2, but the vector is likely to
  587. // be extremely short in practice. Note that we take this approach because we
  588. // do not want to depend on the addresses of the objects we are grouping.
  589. for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
  590. const SCEV *S = Ops[i];
  591. unsigned Complexity = S->getSCEVType();
  592. // If there are any objects of the same complexity and same value as this
  593. // one, group them.
  594. for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
  595. if (Ops[j] == S) { // Found a duplicate.
  596. // Move it to immediately after i'th element.
  597. std::swap(Ops[i+1], Ops[j]);
  598. ++i; // no need to rescan it.
  599. if (i == e-2) return; // Done!
  600. }
  601. }
  602. }
  603. }
  604. namespace {
  605. struct FindSCEVSize {
  606. int Size;
  607. FindSCEVSize() : Size(0) {}
  608. bool follow(const SCEV *S) {
  609. ++Size;
  610. // Keep looking at all operands of S.
  611. return true;
  612. }
  613. bool isDone() const {
  614. return false;
  615. }
  616. };
  617. }
  618. // Returns the size of the SCEV S.
  619. static inline int sizeOfSCEV(const SCEV *S) {
  620. FindSCEVSize F;
  621. SCEVTraversal<FindSCEVSize> ST(F);
  622. ST.visitAll(S);
  623. return F.Size;
  624. }
  625. namespace {
  626. struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
  627. public:
  628. // Computes the Quotient and Remainder of the division of Numerator by
  629. // Denominator.
  630. static void divide(ScalarEvolution &SE, const SCEV *Numerator,
  631. const SCEV *Denominator, const SCEV **Quotient,
  632. const SCEV **Remainder) {
  633. assert(Numerator && Denominator && "Uninitialized SCEV");
  634. SCEVDivision D(SE, Numerator, Denominator);
  635. // Check for the trivial case here to avoid having to check for it in the
  636. // rest of the code.
  637. if (Numerator == Denominator) {
  638. *Quotient = D.One;
  639. *Remainder = D.Zero;
  640. return;
  641. }
  642. if (Numerator->isZero()) {
  643. *Quotient = D.Zero;
  644. *Remainder = D.Zero;
  645. return;
  646. }
  647. // A simple case when N/1. The quotient is N.
  648. if (Denominator->isOne()) {
  649. *Quotient = Numerator;
  650. *Remainder = D.Zero;
  651. return;
  652. }
  653. // Split the Denominator when it is a product.
  654. if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
  655. const SCEV *Q, *R;
  656. *Quotient = Numerator;
  657. for (const SCEV *Op : T->operands()) {
  658. divide(SE, *Quotient, Op, &Q, &R);
  659. *Quotient = Q;
  660. // Bail out when the Numerator is not divisible by one of the terms of
  661. // the Denominator.
  662. if (!R->isZero()) {
  663. *Quotient = D.Zero;
  664. *Remainder = Numerator;
  665. return;
  666. }
  667. }
  668. *Remainder = D.Zero;
  669. return;
  670. }
  671. D.visit(Numerator);
  672. *Quotient = D.Quotient;
  673. *Remainder = D.Remainder;
  674. }
  675. // Except in the trivial case described above, we do not know how to divide
  676. // Expr by Denominator for the following functions with empty implementation.
  677. void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
  678. void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
  679. void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
  680. void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
  681. void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
  682. void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
  683. void visitUnknown(const SCEVUnknown *Numerator) {}
  684. void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
  685. void visitConstant(const SCEVConstant *Numerator) {
  686. if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
  687. APInt NumeratorVal = Numerator->getValue()->getValue();
  688. APInt DenominatorVal = D->getValue()->getValue();
  689. uint32_t NumeratorBW = NumeratorVal.getBitWidth();
  690. uint32_t DenominatorBW = DenominatorVal.getBitWidth();
  691. if (NumeratorBW > DenominatorBW)
  692. DenominatorVal = DenominatorVal.sext(NumeratorBW);
  693. else if (NumeratorBW < DenominatorBW)
  694. NumeratorVal = NumeratorVal.sext(DenominatorBW);
  695. APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
  696. APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
  697. APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
  698. Quotient = SE.getConstant(QuotientVal);
  699. Remainder = SE.getConstant(RemainderVal);
  700. return;
  701. }
  702. }
  703. void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
  704. const SCEV *StartQ, *StartR, *StepQ, *StepR;
  705. assert(Numerator->isAffine() && "Numerator should be affine");
  706. divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
  707. divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
  708. // Bail out if the types do not match.
  709. Type *Ty = Denominator->getType();
  710. if (Ty != StartQ->getType() || Ty != StartR->getType() ||
  711. Ty != StepQ->getType() || Ty != StepR->getType()) {
  712. Quotient = Zero;
  713. Remainder = Numerator;
  714. return;
  715. }
  716. Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
  717. Numerator->getNoWrapFlags());
  718. Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
  719. Numerator->getNoWrapFlags());
  720. }
  721. void visitAddExpr(const SCEVAddExpr *Numerator) {
  722. SmallVector<const SCEV *, 2> Qs, Rs;
  723. Type *Ty = Denominator->getType();
  724. for (const SCEV *Op : Numerator->operands()) {
  725. const SCEV *Q, *R;
  726. divide(SE, Op, Denominator, &Q, &R);
  727. // Bail out if types do not match.
  728. if (Ty != Q->getType() || Ty != R->getType()) {
  729. Quotient = Zero;
  730. Remainder = Numerator;
  731. return;
  732. }
  733. Qs.push_back(Q);
  734. Rs.push_back(R);
  735. }
  736. if (Qs.size() == 1) {
  737. Quotient = Qs[0];
  738. Remainder = Rs[0];
  739. return;
  740. }
  741. Quotient = SE.getAddExpr(Qs);
  742. Remainder = SE.getAddExpr(Rs);
  743. }
  744. void visitMulExpr(const SCEVMulExpr *Numerator) {
  745. SmallVector<const SCEV *, 2> Qs;
  746. Type *Ty = Denominator->getType();
  747. bool FoundDenominatorTerm = false;
  748. for (const SCEV *Op : Numerator->operands()) {
  749. // Bail out if types do not match.
  750. if (Ty != Op->getType()) {
  751. Quotient = Zero;
  752. Remainder = Numerator;
  753. return;
  754. }
  755. if (FoundDenominatorTerm) {
  756. Qs.push_back(Op);
  757. continue;
  758. }
  759. // Check whether Denominator divides one of the product operands.
  760. const SCEV *Q, *R;
  761. divide(SE, Op, Denominator, &Q, &R);
  762. if (!R->isZero()) {
  763. Qs.push_back(Op);
  764. continue;
  765. }
  766. // Bail out if types do not match.
  767. if (Ty != Q->getType()) {
  768. Quotient = Zero;
  769. Remainder = Numerator;
  770. return;
  771. }
  772. FoundDenominatorTerm = true;
  773. Qs.push_back(Q);
  774. }
  775. if (FoundDenominatorTerm) {
  776. Remainder = Zero;
  777. if (Qs.size() == 1)
  778. Quotient = Qs[0];
  779. else
  780. Quotient = SE.getMulExpr(Qs);
  781. return;
  782. }
  783. if (!isa<SCEVUnknown>(Denominator)) {
  784. Quotient = Zero;
  785. Remainder = Numerator;
  786. return;
  787. }
  788. // The Remainder is obtained by replacing Denominator by 0 in Numerator.
  789. ValueToValueMap RewriteMap;
  790. RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
  791. cast<SCEVConstant>(Zero)->getValue();
  792. Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
  793. if (Remainder->isZero()) {
  794. // The Quotient is obtained by replacing Denominator by 1 in Numerator.
  795. RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
  796. cast<SCEVConstant>(One)->getValue();
  797. Quotient =
  798. SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
  799. return;
  800. }
  801. // Quotient is (Numerator - Remainder) divided by Denominator.
  802. const SCEV *Q, *R;
  803. const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
  804. if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
  805. // This SCEV does not seem to simplify: fail the division here.
  806. Quotient = Zero;
  807. Remainder = Numerator;
  808. return;
  809. }
  810. divide(SE, Diff, Denominator, &Q, &R);
  811. assert(R == Zero &&
  812. "(Numerator - Remainder) should evenly divide Denominator");
  813. Quotient = Q;
  814. }
  815. private:
  816. SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
  817. const SCEV *Denominator)
  818. : SE(S), Denominator(Denominator) {
  819. Zero = SE.getConstant(Denominator->getType(), 0);
  820. One = SE.getConstant(Denominator->getType(), 1);
  821. // By default, we don't know how to divide Expr by Denominator.
  822. // Providing the default here simplifies the rest of the code.
  823. Quotient = Zero;
  824. Remainder = Numerator;
  825. }
  826. ScalarEvolution &SE;
  827. const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
  828. };
  829. }
  830. //===----------------------------------------------------------------------===//
  831. // Simple SCEV method implementations
  832. //===----------------------------------------------------------------------===//
  833. /// BinomialCoefficient - Compute BC(It, K). The result has width W.
  834. /// Assume, K > 0.
  835. static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
  836. ScalarEvolution &SE,
  837. Type *ResultTy) {
  838. // Handle the simplest case efficiently.
  839. if (K == 1)
  840. return SE.getTruncateOrZeroExtend(It, ResultTy);
  841. // We are using the following formula for BC(It, K):
  842. //
  843. // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
  844. //
  845. // Suppose, W is the bitwidth of the return value. We must be prepared for
  846. // overflow. Hence, we must assure that the result of our computation is
  847. // equal to the accurate one modulo 2^W. Unfortunately, division isn't
  848. // safe in modular arithmetic.
  849. //
  850. // However, this code doesn't use exactly that formula; the formula it uses
  851. // is something like the following, where T is the number of factors of 2 in
  852. // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
  853. // exponentiation:
  854. //
  855. // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
  856. //
  857. // This formula is trivially equivalent to the previous formula. However,
  858. // this formula can be implemented much more efficiently. The trick is that
  859. // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
  860. // arithmetic. To do exact division in modular arithmetic, all we have
  861. // to do is multiply by the inverse. Therefore, this step can be done at
  862. // width W.
  863. //
  864. // The next issue is how to safely do the division by 2^T. The way this
  865. // is done is by doing the multiplication step at a width of at least W + T
  866. // bits. This way, the bottom W+T bits of the product are accurate. Then,
  867. // when we perform the division by 2^T (which is equivalent to a right shift
  868. // by T), the bottom W bits are accurate. Extra bits are okay; they'll get
  869. // truncated out after the division by 2^T.
  870. //
  871. // In comparison to just directly using the first formula, this technique
  872. // is much more efficient; using the first formula requires W * K bits,
  873. // but this formula less than W + K bits. Also, the first formula requires
  874. // a division step, whereas this formula only requires multiplies and shifts.
  875. //
  876. // It doesn't matter whether the subtraction step is done in the calculation
  877. // width or the input iteration count's width; if the subtraction overflows,
  878. // the result must be zero anyway. We prefer here to do it in the width of
  879. // the induction variable because it helps a lot for certain cases; CodeGen
  880. // isn't smart enough to ignore the overflow, which leads to much less
  881. // efficient code if the width of the subtraction is wider than the native
  882. // register width.
  883. //
  884. // (It's possible to not widen at all by pulling out factors of 2 before
  885. // the multiplication; for example, K=2 can be calculated as
  886. // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
  887. // extra arithmetic, so it's not an obvious win, and it gets
  888. // much more complicated for K > 3.)
  889. // Protection from insane SCEVs; this bound is conservative,
  890. // but it probably doesn't matter.
  891. if (K > 1000)
  892. return SE.getCouldNotCompute();
  893. unsigned W = SE.getTypeSizeInBits(ResultTy);
  894. // Calculate K! / 2^T and T; we divide out the factors of two before
  895. // multiplying for calculating K! / 2^T to avoid overflow.
  896. // Other overflow doesn't matter because we only care about the bottom
  897. // W bits of the result.
  898. APInt OddFactorial(W, 1);
  899. unsigned T = 1;
  900. for (unsigned i = 3; i <= K; ++i) {
  901. APInt Mult(W, i);
  902. unsigned TwoFactors = Mult.countTrailingZeros();
  903. T += TwoFactors;
  904. Mult = Mult.lshr(TwoFactors);
  905. OddFactorial *= Mult;
  906. }
  907. // We need at least W + T bits for the multiplication step
  908. unsigned CalculationBits = W + T;
  909. // Calculate 2^T, at width T+W.
  910. APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
  911. // Calculate the multiplicative inverse of K! / 2^T;
  912. // this multiplication factor will perform the exact division by
  913. // K! / 2^T.
  914. APInt Mod = APInt::getSignedMinValue(W+1);
  915. APInt MultiplyFactor = OddFactorial.zext(W+1);
  916. MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
  917. MultiplyFactor = MultiplyFactor.trunc(W);
  918. // Calculate the product, at width T+W
  919. IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
  920. CalculationBits);
  921. const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
  922. for (unsigned i = 1; i != K; ++i) {
  923. const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
  924. Dividend = SE.getMulExpr(Dividend,
  925. SE.getTruncateOrZeroExtend(S, CalculationTy));
  926. }
  927. // Divide by 2^T
  928. const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
  929. // Truncate the result, and divide by K! / 2^T.
  930. return SE.getMulExpr(SE.getConstant(MultiplyFactor),
  931. SE.getTruncateOrZeroExtend(DivResult, ResultTy));
  932. }
  933. /// evaluateAtIteration - Return the value of this chain of recurrences at
  934. /// the specified iteration number. We can evaluate this recurrence by
  935. /// multiplying each element in the chain by the binomial coefficient
  936. /// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as:
  937. ///
  938. /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
  939. ///
  940. /// where BC(It, k) stands for binomial coefficient.
  941. ///
  942. const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
  943. ScalarEvolution &SE) const {
  944. const SCEV *Result = getStart();
  945. for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
  946. // The computation is correct in the face of overflow provided that the
  947. // multiplication is performed _after_ the evaluation of the binomial
  948. // coefficient.
  949. const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
  950. if (isa<SCEVCouldNotCompute>(Coeff))
  951. return Coeff;
  952. Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
  953. }
  954. return Result;
  955. }
  956. //===----------------------------------------------------------------------===//
  957. // SCEV Expression folder implementations
  958. //===----------------------------------------------------------------------===//
  959. const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
  960. Type *Ty) {
  961. assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
  962. "This is not a truncating conversion!");
  963. assert(isSCEVable(Ty) &&
  964. "This is not a conversion to a SCEVable type!");
  965. Ty = getEffectiveSCEVType(Ty);
  966. FoldingSetNodeID ID;
  967. ID.AddInteger(scTruncate);
  968. ID.AddPointer(Op);
  969. ID.AddPointer(Ty);
  970. void *IP = nullptr;
  971. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  972. // Fold if the operand is constant.
  973. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
  974. return getConstant(
  975. cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
  976. // trunc(trunc(x)) --> trunc(x)
  977. if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
  978. return getTruncateExpr(ST->getOperand(), Ty);
  979. // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
  980. if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
  981. return getTruncateOrSignExtend(SS->getOperand(), Ty);
  982. // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
  983. if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
  984. return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
  985. // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
  986. // eliminate all the truncates, or we replace other casts with truncates.
  987. if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
  988. SmallVector<const SCEV *, 4> Operands;
  989. bool hasTrunc = false;
  990. for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
  991. const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
  992. if (!isa<SCEVCastExpr>(SA->getOperand(i)))
  993. hasTrunc = isa<SCEVTruncateExpr>(S);
  994. Operands.push_back(S);
  995. }
  996. if (!hasTrunc)
  997. return getAddExpr(Operands);
  998. UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
  999. }
  1000. // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
  1001. // eliminate all the truncates, or we replace other casts with truncates.
  1002. if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
  1003. SmallVector<const SCEV *, 4> Operands;
  1004. bool hasTrunc = false;
  1005. for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
  1006. const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
  1007. if (!isa<SCEVCastExpr>(SM->getOperand(i)))
  1008. hasTrunc = isa<SCEVTruncateExpr>(S);
  1009. Operands.push_back(S);
  1010. }
  1011. if (!hasTrunc)
  1012. return getMulExpr(Operands);
  1013. UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
  1014. }
  1015. // If the input value is a chrec scev, truncate the chrec's operands.
  1016. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
  1017. SmallVector<const SCEV *, 4> Operands;
  1018. for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
  1019. Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
  1020. return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
  1021. }
  1022. // The cast wasn't folded; create an explicit cast node. We can reuse
  1023. // the existing insert position since if we get here, we won't have
  1024. // made any changes which would invalidate it.
  1025. SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
  1026. Op, Ty);
  1027. UniqueSCEVs.InsertNode(S, IP);
  1028. return S;
  1029. }
  1030. // Get the limit of a recurrence such that incrementing by Step cannot cause
  1031. // signed overflow as long as the value of the recurrence within the
  1032. // loop does not exceed this limit before incrementing.
  1033. static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
  1034. ICmpInst::Predicate *Pred,
  1035. ScalarEvolution *SE) {
  1036. unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
  1037. if (SE->isKnownPositive(Step)) {
  1038. *Pred = ICmpInst::ICMP_SLT;
  1039. return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
  1040. SE->getSignedRange(Step).getSignedMax());
  1041. }
  1042. if (SE->isKnownNegative(Step)) {
  1043. *Pred = ICmpInst::ICMP_SGT;
  1044. return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
  1045. SE->getSignedRange(Step).getSignedMin());
  1046. }
  1047. return nullptr;
  1048. }
  1049. // Get the limit of a recurrence such that incrementing by Step cannot cause
  1050. // unsigned overflow as long as the value of the recurrence within the loop does
  1051. // not exceed this limit before incrementing.
  1052. static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
  1053. ICmpInst::Predicate *Pred,
  1054. ScalarEvolution *SE) {
  1055. unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
  1056. *Pred = ICmpInst::ICMP_ULT;
  1057. return SE->getConstant(APInt::getMinValue(BitWidth) -
  1058. SE->getUnsignedRange(Step).getUnsignedMax());
  1059. }
  1060. namespace {
  1061. struct ExtendOpTraitsBase {
  1062. typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
  1063. };
  1064. // Used to make code generic over signed and unsigned overflow.
  1065. template <typename ExtendOp> struct ExtendOpTraits {
  1066. // Members present:
  1067. //
  1068. // static const SCEV::NoWrapFlags WrapType;
  1069. //
  1070. // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
  1071. //
  1072. // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
  1073. // ICmpInst::Predicate *Pred,
  1074. // ScalarEvolution *SE);
  1075. };
  1076. template <>
  1077. struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
  1078. static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
  1079. static const GetExtendExprTy GetExtendExpr;
  1080. static const SCEV *getOverflowLimitForStep(const SCEV *Step,
  1081. ICmpInst::Predicate *Pred,
  1082. ScalarEvolution *SE) {
  1083. return getSignedOverflowLimitForStep(Step, Pred, SE);
  1084. }
  1085. };
  1086. const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
  1087. SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
  1088. template <>
  1089. struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
  1090. static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
  1091. static const GetExtendExprTy GetExtendExpr;
  1092. static const SCEV *getOverflowLimitForStep(const SCEV *Step,
  1093. ICmpInst::Predicate *Pred,
  1094. ScalarEvolution *SE) {
  1095. return getUnsignedOverflowLimitForStep(Step, Pred, SE);
  1096. }
  1097. };
  1098. const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
  1099. SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
  1100. }
  1101. // The recurrence AR has been shown to have no signed/unsigned wrap or something
  1102. // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
  1103. // easily prove NSW/NUW for its preincrement or postincrement sibling. This
  1104. // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
  1105. // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
  1106. // expression "Step + sext/zext(PreIncAR)" is congruent with
  1107. // "sext/zext(PostIncAR)"
  1108. template <typename ExtendOpTy>
  1109. static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
  1110. ScalarEvolution *SE) {
  1111. auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
  1112. auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
  1113. const Loop *L = AR->getLoop();
  1114. const SCEV *Start = AR->getStart();
  1115. const SCEV *Step = AR->getStepRecurrence(*SE);
  1116. // Check for a simple looking step prior to loop entry.
  1117. const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
  1118. if (!SA)
  1119. return nullptr;
  1120. // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
  1121. // subtraction is expensive. For this purpose, perform a quick and dirty
  1122. // difference, by checking for Step in the operand list.
  1123. SmallVector<const SCEV *, 4> DiffOps;
  1124. for (const SCEV *Op : SA->operands())
  1125. if (Op != Step)
  1126. DiffOps.push_back(Op);
  1127. if (DiffOps.size() == SA->getNumOperands())
  1128. return nullptr;
  1129. // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
  1130. // `Step`:
  1131. // 1. NSW/NUW flags on the step increment.
  1132. const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
  1133. const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
  1134. SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
  1135. // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
  1136. // "S+X does not sign/unsign-overflow".
  1137. //
  1138. const SCEV *BECount = SE->getBackedgeTakenCount(L);
  1139. if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
  1140. !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
  1141. return PreStart;
  1142. // 2. Direct overflow check on the step operation's expression.
  1143. unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
  1144. Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
  1145. const SCEV *OperandExtendedStart =
  1146. SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
  1147. (SE->*GetExtendExpr)(Step, WideTy));
  1148. if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
  1149. if (PreAR && AR->getNoWrapFlags(WrapType)) {
  1150. // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
  1151. // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
  1152. // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact.
  1153. const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
  1154. }
  1155. return PreStart;
  1156. }
  1157. // 3. Loop precondition.
  1158. ICmpInst::Predicate Pred;
  1159. const SCEV *OverflowLimit =
  1160. ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
  1161. if (OverflowLimit &&
  1162. SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
  1163. return PreStart;
  1164. }
  1165. return nullptr;
  1166. }
  1167. // Get the normalized zero or sign extended expression for this AddRec's Start.
  1168. template <typename ExtendOpTy>
  1169. static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
  1170. ScalarEvolution *SE) {
  1171. auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
  1172. const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
  1173. if (!PreStart)
  1174. return (SE->*GetExtendExpr)(AR->getStart(), Ty);
  1175. return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
  1176. (SE->*GetExtendExpr)(PreStart, Ty));
  1177. }
  1178. // Try to prove away overflow by looking at "nearby" add recurrences. A
  1179. // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
  1180. // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
  1181. //
  1182. // Formally:
  1183. //
  1184. // {S,+,X} == {S-T,+,X} + T
  1185. // => Ext({S,+,X}) == Ext({S-T,+,X} + T)
  1186. //
  1187. // If ({S-T,+,X} + T) does not overflow ... (1)
  1188. //
  1189. // RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
  1190. //
  1191. // If {S-T,+,X} does not overflow ... (2)
  1192. //
  1193. // RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
  1194. // == {Ext(S-T)+Ext(T),+,Ext(X)}
  1195. //
  1196. // If (S-T)+T does not overflow ... (3)
  1197. //
  1198. // RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
  1199. // == {Ext(S),+,Ext(X)} == LHS
  1200. //
  1201. // Thus, if (1), (2) and (3) are true for some T, then
  1202. // Ext({S,+,X}) == {Ext(S),+,Ext(X)}
  1203. //
  1204. // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
  1205. // does not overflow" restricted to the 0th iteration. Therefore we only need
  1206. // to check for (1) and (2).
  1207. //
  1208. // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
  1209. // is `Delta` (defined below).
  1210. //
  1211. template <typename ExtendOpTy>
  1212. bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
  1213. const SCEV *Step,
  1214. const Loop *L) {
  1215. auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
  1216. // We restrict `Start` to a constant to prevent SCEV from spending too much
  1217. // time here. It is correct (but more expensive) to continue with a
  1218. // non-constant `Start` and do a general SCEV subtraction to compute
  1219. // `PreStart` below.
  1220. //
  1221. const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
  1222. if (!StartC)
  1223. return false;
  1224. APInt StartAI = StartC->getValue()->getValue();
  1225. for (unsigned Delta : {-2, -1, 1, 2}) {
  1226. const SCEV *PreStart = getConstant(StartAI - Delta);
  1227. // Give up if we don't already have the add recurrence we need because
  1228. // actually constructing an add recurrence is relatively expensive.
  1229. const SCEVAddRecExpr *PreAR = [&]() {
  1230. FoldingSetNodeID ID;
  1231. ID.AddInteger(scAddRecExpr);
  1232. ID.AddPointer(PreStart);
  1233. ID.AddPointer(Step);
  1234. ID.AddPointer(L);
  1235. void *IP = nullptr;
  1236. return static_cast<SCEVAddRecExpr *>(
  1237. this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
  1238. }();
  1239. if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
  1240. const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
  1241. ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
  1242. const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
  1243. DeltaS, &Pred, this);
  1244. if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1)
  1245. return true;
  1246. }
  1247. }
  1248. return false;
  1249. }
  1250. const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
  1251. Type *Ty) {
  1252. assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
  1253. "This is not an extending conversion!");
  1254. assert(isSCEVable(Ty) &&
  1255. "This is not a conversion to a SCEVable type!");
  1256. Ty = getEffectiveSCEVType(Ty);
  1257. // Fold if the operand is constant.
  1258. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
  1259. return getConstant(
  1260. cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
  1261. // zext(zext(x)) --> zext(x)
  1262. if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
  1263. return getZeroExtendExpr(SZ->getOperand(), Ty);
  1264. // Before doing any expensive analysis, check to see if we've already
  1265. // computed a SCEV for this Op and Ty.
  1266. FoldingSetNodeID ID;
  1267. ID.AddInteger(scZeroExtend);
  1268. ID.AddPointer(Op);
  1269. ID.AddPointer(Ty);
  1270. void *IP = nullptr;
  1271. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1272. // zext(trunc(x)) --> zext(x) or x or trunc(x)
  1273. if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
  1274. // It's possible the bits taken off by the truncate were all zero bits. If
  1275. // so, we should be able to simplify this further.
  1276. const SCEV *X = ST->getOperand();
  1277. ConstantRange CR = getUnsignedRange(X);
  1278. unsigned TruncBits = getTypeSizeInBits(ST->getType());
  1279. unsigned NewBits = getTypeSizeInBits(Ty);
  1280. if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
  1281. CR.zextOrTrunc(NewBits)))
  1282. return getTruncateOrZeroExtend(X, Ty);
  1283. }
  1284. // If the input value is a chrec scev, and we can prove that the value
  1285. // did not overflow the old, smaller, value, we can zero extend all of the
  1286. // operands (often constants). This allows analysis of something like
  1287. // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
  1288. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
  1289. if (AR->isAffine()) {
  1290. const SCEV *Start = AR->getStart();
  1291. const SCEV *Step = AR->getStepRecurrence(*this);
  1292. unsigned BitWidth = getTypeSizeInBits(AR->getType());
  1293. const Loop *L = AR->getLoop();
  1294. // If we have special knowledge that this addrec won't overflow,
  1295. // we don't need to do any further analysis.
  1296. if (AR->getNoWrapFlags(SCEV::FlagNUW))
  1297. return getAddRecExpr(
  1298. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
  1299. getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1300. // Check whether the backedge-taken count is SCEVCouldNotCompute.
  1301. // Note that this serves two purposes: It filters out loops that are
  1302. // simply not analyzable, and it covers the case where this code is
  1303. // being called from within backedge-taken count analysis, such that
  1304. // attempting to ask for the backedge-taken count would likely result
  1305. // in infinite recursion. In the later case, the analysis code will
  1306. // cope with a conservative value, and it will take care to purge
  1307. // that value once it has finished.
  1308. const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
  1309. if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
  1310. // Manually compute the final value for AR, checking for
  1311. // overflow.
  1312. // Check whether the backedge-taken count can be losslessly casted to
  1313. // the addrec's type. The count is always unsigned.
  1314. const SCEV *CastedMaxBECount =
  1315. getTruncateOrZeroExtend(MaxBECount, Start->getType());
  1316. const SCEV *RecastedMaxBECount =
  1317. getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
  1318. if (MaxBECount == RecastedMaxBECount) {
  1319. Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
  1320. // Check whether Start+Step*MaxBECount has no unsigned overflow.
  1321. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
  1322. const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
  1323. const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
  1324. const SCEV *WideMaxBECount =
  1325. getZeroExtendExpr(CastedMaxBECount, WideTy);
  1326. const SCEV *OperandExtendedAdd =
  1327. getAddExpr(WideStart,
  1328. getMulExpr(WideMaxBECount,
  1329. getZeroExtendExpr(Step, WideTy)));
  1330. if (ZAdd == OperandExtendedAdd) {
  1331. // Cache knowledge of AR NUW, which is propagated to this AddRec.
  1332. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
  1333. // Return the expression with the addrec on the outside.
  1334. return getAddRecExpr(
  1335. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
  1336. getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1337. }
  1338. // Similar to above, only this time treat the step value as signed.
  1339. // This covers loops that count down.
  1340. OperandExtendedAdd =
  1341. getAddExpr(WideStart,
  1342. getMulExpr(WideMaxBECount,
  1343. getSignExtendExpr(Step, WideTy)));
  1344. if (ZAdd == OperandExtendedAdd) {
  1345. // Cache knowledge of AR NW, which is propagated to this AddRec.
  1346. // Negative step causes unsigned wrap, but it still can't self-wrap.
  1347. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
  1348. // Return the expression with the addrec on the outside.
  1349. return getAddRecExpr(
  1350. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
  1351. getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1352. }
  1353. }
  1354. // If the backedge is guarded by a comparison with the pre-inc value
  1355. // the addrec is safe. Also, if the entry is guarded by a comparison
  1356. // with the start value and the backedge is guarded by a comparison
  1357. // with the post-inc value, the addrec is safe.
  1358. if (isKnownPositive(Step)) {
  1359. const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
  1360. getUnsignedRange(Step).getUnsignedMax());
  1361. if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
  1362. (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
  1363. isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
  1364. AR->getPostIncExpr(*this), N))) {
  1365. // Cache knowledge of AR NUW, which is propagated to this AddRec.
  1366. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
  1367. // Return the expression with the addrec on the outside.
  1368. return getAddRecExpr(
  1369. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
  1370. getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1371. }
  1372. } else if (isKnownNegative(Step)) {
  1373. const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
  1374. getSignedRange(Step).getSignedMin());
  1375. if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
  1376. (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
  1377. isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
  1378. AR->getPostIncExpr(*this), N))) {
  1379. // Cache knowledge of AR NW, which is propagated to this AddRec.
  1380. // Negative step causes unsigned wrap, but it still can't self-wrap.
  1381. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
  1382. // Return the expression with the addrec on the outside.
  1383. return getAddRecExpr(
  1384. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
  1385. getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1386. }
  1387. }
  1388. }
  1389. if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
  1390. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
  1391. return getAddRecExpr(
  1392. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
  1393. getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1394. }
  1395. }
  1396. // The cast wasn't folded; create an explicit cast node.
  1397. // Recompute the insert position, as it may have been invalidated.
  1398. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1399. SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
  1400. Op, Ty);
  1401. UniqueSCEVs.InsertNode(S, IP);
  1402. return S;
  1403. }
  1404. const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
  1405. Type *Ty) {
  1406. assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
  1407. "This is not an extending conversion!");
  1408. assert(isSCEVable(Ty) &&
  1409. "This is not a conversion to a SCEVable type!");
  1410. Ty = getEffectiveSCEVType(Ty);
  1411. // Fold if the operand is constant.
  1412. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
  1413. return getConstant(
  1414. cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
  1415. // sext(sext(x)) --> sext(x)
  1416. if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
  1417. return getSignExtendExpr(SS->getOperand(), Ty);
  1418. // sext(zext(x)) --> zext(x)
  1419. if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
  1420. return getZeroExtendExpr(SZ->getOperand(), Ty);
  1421. // Before doing any expensive analysis, check to see if we've already
  1422. // computed a SCEV for this Op and Ty.
  1423. FoldingSetNodeID ID;
  1424. ID.AddInteger(scSignExtend);
  1425. ID.AddPointer(Op);
  1426. ID.AddPointer(Ty);
  1427. void *IP = nullptr;
  1428. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1429. // If the input value is provably positive, build a zext instead.
  1430. if (isKnownNonNegative(Op))
  1431. return getZeroExtendExpr(Op, Ty);
  1432. // sext(trunc(x)) --> sext(x) or x or trunc(x)
  1433. if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
  1434. // It's possible the bits taken off by the truncate were all sign bits. If
  1435. // so, we should be able to simplify this further.
  1436. const SCEV *X = ST->getOperand();
  1437. ConstantRange CR = getSignedRange(X);
  1438. unsigned TruncBits = getTypeSizeInBits(ST->getType());
  1439. unsigned NewBits = getTypeSizeInBits(Ty);
  1440. if (CR.truncate(TruncBits).signExtend(NewBits).contains(
  1441. CR.sextOrTrunc(NewBits)))
  1442. return getTruncateOrSignExtend(X, Ty);
  1443. }
  1444. // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
  1445. if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
  1446. if (SA->getNumOperands() == 2) {
  1447. auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
  1448. auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
  1449. if (SMul && SC1) {
  1450. if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
  1451. const APInt &C1 = SC1->getValue()->getValue();
  1452. const APInt &C2 = SC2->getValue()->getValue();
  1453. if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
  1454. C2.ugt(C1) && C2.isPowerOf2())
  1455. return getAddExpr(getSignExtendExpr(SC1, Ty),
  1456. getSignExtendExpr(SMul, Ty));
  1457. }
  1458. }
  1459. }
  1460. }
  1461. // If the input value is a chrec scev, and we can prove that the value
  1462. // did not overflow the old, smaller, value, we can sign extend all of the
  1463. // operands (often constants). This allows analysis of something like
  1464. // this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
  1465. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
  1466. if (AR->isAffine()) {
  1467. const SCEV *Start = AR->getStart();
  1468. const SCEV *Step = AR->getStepRecurrence(*this);
  1469. unsigned BitWidth = getTypeSizeInBits(AR->getType());
  1470. const Loop *L = AR->getLoop();
  1471. // If we have special knowledge that this addrec won't overflow,
  1472. // we don't need to do any further analysis.
  1473. if (AR->getNoWrapFlags(SCEV::FlagNSW))
  1474. return getAddRecExpr(
  1475. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
  1476. getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
  1477. // Check whether the backedge-taken count is SCEVCouldNotCompute.
  1478. // Note that this serves two purposes: It filters out loops that are
  1479. // simply not analyzable, and it covers the case where this code is
  1480. // being called from within backedge-taken count analysis, such that
  1481. // attempting to ask for the backedge-taken count would likely result
  1482. // in infinite recursion. In the later case, the analysis code will
  1483. // cope with a conservative value, and it will take care to purge
  1484. // that value once it has finished.
  1485. const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
  1486. if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
  1487. // Manually compute the final value for AR, checking for
  1488. // overflow.
  1489. // Check whether the backedge-taken count can be losslessly casted to
  1490. // the addrec's type. The count is always unsigned.
  1491. const SCEV *CastedMaxBECount =
  1492. getTruncateOrZeroExtend(MaxBECount, Start->getType());
  1493. const SCEV *RecastedMaxBECount =
  1494. getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
  1495. if (MaxBECount == RecastedMaxBECount) {
  1496. Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
  1497. // Check whether Start+Step*MaxBECount has no signed overflow.
  1498. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
  1499. const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
  1500. const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
  1501. const SCEV *WideMaxBECount =
  1502. getZeroExtendExpr(CastedMaxBECount, WideTy);
  1503. const SCEV *OperandExtendedAdd =
  1504. getAddExpr(WideStart,
  1505. getMulExpr(WideMaxBECount,
  1506. getSignExtendExpr(Step, WideTy)));
  1507. if (SAdd == OperandExtendedAdd) {
  1508. // Cache knowledge of AR NSW, which is propagated to this AddRec.
  1509. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
  1510. // Return the expression with the addrec on the outside.
  1511. return getAddRecExpr(
  1512. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
  1513. getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1514. }
  1515. // Similar to above, only this time treat the step value as unsigned.
  1516. // This covers loops that count up with an unsigned step.
  1517. OperandExtendedAdd =
  1518. getAddExpr(WideStart,
  1519. getMulExpr(WideMaxBECount,
  1520. getZeroExtendExpr(Step, WideTy)));
  1521. if (SAdd == OperandExtendedAdd) {
  1522. // If AR wraps around then
  1523. //
  1524. // abs(Step) * MaxBECount > unsigned-max(AR->getType())
  1525. // => SAdd != OperandExtendedAdd
  1526. //
  1527. // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
  1528. // (SAdd == OperandExtendedAdd => AR is NW)
  1529. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
  1530. // Return the expression with the addrec on the outside.
  1531. return getAddRecExpr(
  1532. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
  1533. getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1534. }
  1535. }
  1536. // If the backedge is guarded by a comparison with the pre-inc value
  1537. // the addrec is safe. Also, if the entry is guarded by a comparison
  1538. // with the start value and the backedge is guarded by a comparison
  1539. // with the post-inc value, the addrec is safe.
  1540. ICmpInst::Predicate Pred;
  1541. const SCEV *OverflowLimit =
  1542. getSignedOverflowLimitForStep(Step, &Pred, this);
  1543. if (OverflowLimit &&
  1544. (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
  1545. (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
  1546. isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
  1547. OverflowLimit)))) {
  1548. // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
  1549. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
  1550. return getAddRecExpr(
  1551. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
  1552. getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1553. }
  1554. }
  1555. // If Start and Step are constants, check if we can apply this
  1556. // transformation:
  1557. // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
  1558. auto SC1 = dyn_cast<SCEVConstant>(Start);
  1559. auto SC2 = dyn_cast<SCEVConstant>(Step);
  1560. if (SC1 && SC2) {
  1561. const APInt &C1 = SC1->getValue()->getValue();
  1562. const APInt &C2 = SC2->getValue()->getValue();
  1563. if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
  1564. C2.isPowerOf2()) {
  1565. Start = getSignExtendExpr(Start, Ty);
  1566. const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
  1567. L, AR->getNoWrapFlags());
  1568. return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
  1569. }
  1570. }
  1571. if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
  1572. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
  1573. return getAddRecExpr(
  1574. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
  1575. getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
  1576. }
  1577. }
  1578. // The cast wasn't folded; create an explicit cast node.
  1579. // Recompute the insert position, as it may have been invalidated.
  1580. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1581. SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
  1582. Op, Ty);
  1583. UniqueSCEVs.InsertNode(S, IP);
  1584. return S;
  1585. }
  1586. /// getAnyExtendExpr - Return a SCEV for the given operand extended with
  1587. /// unspecified bits out to the given type.
  1588. ///
  1589. const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
  1590. Type *Ty) {
  1591. assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
  1592. "This is not an extending conversion!");
  1593. assert(isSCEVable(Ty) &&
  1594. "This is not a conversion to a SCEVable type!");
  1595. Ty = getEffectiveSCEVType(Ty);
  1596. // Sign-extend negative constants.
  1597. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
  1598. if (SC->getValue()->getValue().isNegative())
  1599. return getSignExtendExpr(Op, Ty);
  1600. // Peel off a truncate cast.
  1601. if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
  1602. const SCEV *NewOp = T->getOperand();
  1603. if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
  1604. return getAnyExtendExpr(NewOp, Ty);
  1605. return getTruncateOrNoop(NewOp, Ty);
  1606. }
  1607. // Next try a zext cast. If the cast is folded, use it.
  1608. const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
  1609. if (!isa<SCEVZeroExtendExpr>(ZExt))
  1610. return ZExt;
  1611. // Next try a sext cast. If the cast is folded, use it.
  1612. const SCEV *SExt = getSignExtendExpr(Op, Ty);
  1613. if (!isa<SCEVSignExtendExpr>(SExt))
  1614. return SExt;
  1615. // Force the cast to be folded into the operands of an addrec.
  1616. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
  1617. SmallVector<const SCEV *, 4> Ops;
  1618. for (const SCEV *Op : AR->operands())
  1619. Ops.push_back(getAnyExtendExpr(Op, Ty));
  1620. return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
  1621. }
  1622. // If the expression is obviously signed, use the sext cast value.
  1623. if (isa<SCEVSMaxExpr>(Op))
  1624. return SExt;
  1625. // Absent any other information, use the zext cast value.
  1626. return ZExt;
  1627. }
  1628. /// CollectAddOperandsWithScales - Process the given Ops list, which is
  1629. /// a list of operands to be added under the given scale, update the given
  1630. /// map. This is a helper function for getAddRecExpr. As an example of
  1631. /// what it does, given a sequence of operands that would form an add
  1632. /// expression like this:
  1633. ///
  1634. /// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
  1635. ///
  1636. /// where A and B are constants, update the map with these values:
  1637. ///
  1638. /// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
  1639. ///
  1640. /// and add 13 + A*B*29 to AccumulatedConstant.
  1641. /// This will allow getAddRecExpr to produce this:
  1642. ///
  1643. /// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
  1644. ///
  1645. /// This form often exposes folding opportunities that are hidden in
  1646. /// the original operand list.
  1647. ///
  1648. /// Return true iff it appears that any interesting folding opportunities
  1649. /// may be exposed. This helps getAddRecExpr short-circuit extra work in
  1650. /// the common case where no interesting opportunities are present, and
  1651. /// is also used as a check to avoid infinite recursion.
  1652. ///
  1653. static bool
  1654. CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
  1655. SmallVectorImpl<const SCEV *> &NewOps,
  1656. APInt &AccumulatedConstant,
  1657. const SCEV *const *Ops, size_t NumOperands,
  1658. const APInt &Scale,
  1659. ScalarEvolution &SE) {
  1660. bool Interesting = false;
  1661. // Iterate over the add operands. They are sorted, with constants first.
  1662. unsigned i = 0;
  1663. while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
  1664. ++i;
  1665. // Pull a buried constant out to the outside.
  1666. if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
  1667. Interesting = true;
  1668. AccumulatedConstant += Scale * C->getValue()->getValue();
  1669. }
  1670. // Next comes everything else. We're especially interested in multiplies
  1671. // here, but they're in the middle, so just visit the rest with one loop.
  1672. for (; i != NumOperands; ++i) {
  1673. const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
  1674. if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
  1675. APInt NewScale =
  1676. Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
  1677. if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
  1678. // A multiplication of a constant with another add; recurse.
  1679. const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
  1680. Interesting |=
  1681. CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
  1682. Add->op_begin(), Add->getNumOperands(),
  1683. NewScale, SE);
  1684. } else {
  1685. // A multiplication of a constant with some other value. Update
  1686. // the map.
  1687. SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
  1688. const SCEV *Key = SE.getMulExpr(MulOps);
  1689. std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
  1690. M.insert(std::make_pair(Key, NewScale));
  1691. if (Pair.second) {
  1692. NewOps.push_back(Pair.first->first);
  1693. } else {
  1694. Pair.first->second += NewScale;
  1695. // The map already had an entry for this value, which may indicate
  1696. // a folding opportunity.
  1697. Interesting = true;
  1698. }
  1699. }
  1700. } else {
  1701. // An ordinary operand. Update the map.
  1702. std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
  1703. M.insert(std::make_pair(Ops[i], Scale));
  1704. if (Pair.second) {
  1705. NewOps.push_back(Pair.first->first);
  1706. } else {
  1707. Pair.first->second += Scale;
  1708. // The map already had an entry for this value, which may indicate
  1709. // a folding opportunity.
  1710. Interesting = true;
  1711. }
  1712. }
  1713. }
  1714. return Interesting;
  1715. }
  1716. namespace {
  1717. struct APIntCompare {
  1718. bool operator()(const APInt &LHS, const APInt &RHS) const {
  1719. return LHS.ult(RHS);
  1720. }
  1721. };
  1722. }
  1723. // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
  1724. // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
  1725. // can't-overflow flags for the operation if possible.
  1726. static SCEV::NoWrapFlags
  1727. StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
  1728. const SmallVectorImpl<const SCEV *> &Ops,
  1729. SCEV::NoWrapFlags OldFlags) {
  1730. using namespace std::placeholders;
  1731. bool CanAnalyze =
  1732. Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
  1733. (void)CanAnalyze;
  1734. assert(CanAnalyze && "don't call from other places!");
  1735. int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
  1736. SCEV::NoWrapFlags SignOrUnsignWrap =
  1737. ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask);
  1738. // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
  1739. auto IsKnownNonNegative =
  1740. std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1);
  1741. if (SignOrUnsignWrap == SCEV::FlagNSW &&
  1742. std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative))
  1743. return ScalarEvolution::setFlags(OldFlags,
  1744. (SCEV::NoWrapFlags)SignOrUnsignMask);
  1745. return OldFlags;
  1746. }
  1747. /// getAddExpr - Get a canonical add expression, or something simpler if
  1748. /// possible.
  1749. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
  1750. SCEV::NoWrapFlags Flags) {
  1751. assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
  1752. "only nuw or nsw allowed");
  1753. assert(!Ops.empty() && "Cannot get empty add!");
  1754. if (Ops.size() == 1) return Ops[0];
  1755. #ifndef NDEBUG
  1756. Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
  1757. for (unsigned i = 1, e = Ops.size(); i != e; ++i)
  1758. assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
  1759. "SCEVAddExpr operand types don't match!");
  1760. #endif
  1761. Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
  1762. // Sort by complexity, this groups all similar expression types together.
  1763. GroupByComplexity(Ops, LI);
  1764. // If there are any constants, fold them together.
  1765. unsigned Idx = 0;
  1766. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
  1767. ++Idx;
  1768. assert(Idx < Ops.size());
  1769. while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
  1770. // We found two constants, fold them together!
  1771. Ops[0] = getConstant(LHSC->getValue()->getValue() +
  1772. RHSC->getValue()->getValue());
  1773. if (Ops.size() == 2) return Ops[0];
  1774. Ops.erase(Ops.begin()+1); // Erase the folded element
  1775. LHSC = cast<SCEVConstant>(Ops[0]);
  1776. }
  1777. // If we are left with a constant zero being added, strip it off.
  1778. if (LHSC->getValue()->isZero()) {
  1779. Ops.erase(Ops.begin());
  1780. --Idx;
  1781. }
  1782. if (Ops.size() == 1) return Ops[0];
  1783. }
  1784. // Okay, check to see if the same value occurs in the operand list more than
  1785. // once. If so, merge them together into an multiply expression. Since we
  1786. // sorted the list, these values are required to be adjacent.
  1787. Type *Ty = Ops[0]->getType();
  1788. bool FoundMatch = false;
  1789. for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
  1790. if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
  1791. // Scan ahead to count how many equal operands there are.
  1792. unsigned Count = 2;
  1793. while (i+Count != e && Ops[i+Count] == Ops[i])
  1794. ++Count;
  1795. // Merge the values into a multiply.
  1796. const SCEV *Scale = getConstant(Ty, Count);
  1797. const SCEV *Mul = getMulExpr(Scale, Ops[i]);
  1798. if (Ops.size() == Count)
  1799. return Mul;
  1800. Ops[i] = Mul;
  1801. Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
  1802. --i; e -= Count - 1;
  1803. FoundMatch = true;
  1804. }
  1805. if (FoundMatch)
  1806. return getAddExpr(Ops, Flags);
  1807. // Check for truncates. If all the operands are truncated from the same
  1808. // type, see if factoring out the truncate would permit the result to be
  1809. // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
  1810. // if the contents of the resulting outer trunc fold to something simple.
  1811. for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
  1812. const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
  1813. Type *DstType = Trunc->getType();
  1814. Type *SrcType = Trunc->getOperand()->getType();
  1815. SmallVector<const SCEV *, 8> LargeOps;
  1816. bool Ok = true;
  1817. // Check all the operands to see if they can be represented in the
  1818. // source type of the truncate.
  1819. for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
  1820. if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
  1821. if (T->getOperand()->getType() != SrcType) {
  1822. Ok = false;
  1823. break;
  1824. }
  1825. LargeOps.push_back(T->getOperand());
  1826. } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
  1827. LargeOps.push_back(getAnyExtendExpr(C, SrcType));
  1828. } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
  1829. SmallVector<const SCEV *, 8> LargeMulOps;
  1830. for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
  1831. if (const SCEVTruncateExpr *T =
  1832. dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
  1833. if (T->getOperand()->getType() != SrcType) {
  1834. Ok = false;
  1835. break;
  1836. }
  1837. LargeMulOps.push_back(T->getOperand());
  1838. } else if (const SCEVConstant *C =
  1839. dyn_cast<SCEVConstant>(M->getOperand(j))) {
  1840. LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
  1841. } else {
  1842. Ok = false;
  1843. break;
  1844. }
  1845. }
  1846. if (Ok)
  1847. LargeOps.push_back(getMulExpr(LargeMulOps));
  1848. } else {
  1849. Ok = false;
  1850. break;
  1851. }
  1852. }
  1853. if (Ok) {
  1854. // Evaluate the expression in the larger type.
  1855. const SCEV *Fold = getAddExpr(LargeOps, Flags);
  1856. // If it folds to something simple, use it. Otherwise, don't.
  1857. if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
  1858. return getTruncateExpr(Fold, DstType);
  1859. }
  1860. }
  1861. // Skip past any other cast SCEVs.
  1862. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
  1863. ++Idx;
  1864. // If there are add operands they would be next.
  1865. if (Idx < Ops.size()) {
  1866. bool DeletedAdd = false;
  1867. while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
  1868. // If we have an add, expand the add operands onto the end of the operands
  1869. // list.
  1870. Ops.erase(Ops.begin()+Idx);
  1871. Ops.append(Add->op_begin(), Add->op_end());
  1872. DeletedAdd = true;
  1873. }
  1874. // If we deleted at least one add, we added operands to the end of the list,
  1875. // and they are not necessarily sorted. Recurse to resort and resimplify
  1876. // any operands we just acquired.
  1877. if (DeletedAdd)
  1878. return getAddExpr(Ops);
  1879. }
  1880. // Skip over the add expression until we get to a multiply.
  1881. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
  1882. ++Idx;
  1883. // Check to see if there are any folding opportunities present with
  1884. // operands multiplied by constant values.
  1885. if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
  1886. uint64_t BitWidth = getTypeSizeInBits(Ty);
  1887. DenseMap<const SCEV *, APInt> M;
  1888. SmallVector<const SCEV *, 8> NewOps;
  1889. APInt AccumulatedConstant(BitWidth, 0);
  1890. if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
  1891. Ops.data(), Ops.size(),
  1892. APInt(BitWidth, 1), *this)) {
  1893. // Some interesting folding opportunity is present, so its worthwhile to
  1894. // re-generate the operands list. Group the operands by constant scale,
  1895. // to avoid multiplying by the same constant scale multiple times.
  1896. std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
  1897. for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
  1898. E = NewOps.end(); I != E; ++I)
  1899. MulOpLists[M.find(*I)->second].push_back(*I);
  1900. // Re-generate the operands list.
  1901. Ops.clear();
  1902. if (AccumulatedConstant != 0)
  1903. Ops.push_back(getConstant(AccumulatedConstant));
  1904. for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
  1905. I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
  1906. if (I->first != 0)
  1907. Ops.push_back(getMulExpr(getConstant(I->first),
  1908. getAddExpr(I->second)));
  1909. if (Ops.empty())
  1910. return getConstant(Ty, 0);
  1911. if (Ops.size() == 1)
  1912. return Ops[0];
  1913. return getAddExpr(Ops);
  1914. }
  1915. }
  1916. // If we are adding something to a multiply expression, make sure the
  1917. // something is not already an operand of the multiply. If so, merge it into
  1918. // the multiply.
  1919. for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
  1920. const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
  1921. for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
  1922. const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
  1923. if (isa<SCEVConstant>(MulOpSCEV))
  1924. continue;
  1925. for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
  1926. if (MulOpSCEV == Ops[AddOp]) {
  1927. // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
  1928. const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
  1929. if (Mul->getNumOperands() != 2) {
  1930. // If the multiply has more than two operands, we must get the
  1931. // Y*Z term.
  1932. SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
  1933. Mul->op_begin()+MulOp);
  1934. MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
  1935. InnerMul = getMulExpr(MulOps);
  1936. }
  1937. const SCEV *One = getConstant(Ty, 1);
  1938. const SCEV *AddOne = getAddExpr(One, InnerMul);
  1939. const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
  1940. if (Ops.size() == 2) return OuterMul;
  1941. if (AddOp < Idx) {
  1942. Ops.erase(Ops.begin()+AddOp);
  1943. Ops.erase(Ops.begin()+Idx-1);
  1944. } else {
  1945. Ops.erase(Ops.begin()+Idx);
  1946. Ops.erase(Ops.begin()+AddOp-1);
  1947. }
  1948. Ops.push_back(OuterMul);
  1949. return getAddExpr(Ops);
  1950. }
  1951. // Check this multiply against other multiplies being added together.
  1952. for (unsigned OtherMulIdx = Idx+1;
  1953. OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
  1954. ++OtherMulIdx) {
  1955. const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
  1956. // If MulOp occurs in OtherMul, we can fold the two multiplies
  1957. // together.
  1958. for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
  1959. OMulOp != e; ++OMulOp)
  1960. if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
  1961. // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
  1962. const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
  1963. if (Mul->getNumOperands() != 2) {
  1964. SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
  1965. Mul->op_begin()+MulOp);
  1966. MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
  1967. InnerMul1 = getMulExpr(MulOps);
  1968. }
  1969. const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
  1970. if (OtherMul->getNumOperands() != 2) {
  1971. SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
  1972. OtherMul->op_begin()+OMulOp);
  1973. MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
  1974. InnerMul2 = getMulExpr(MulOps);
  1975. }
  1976. const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
  1977. const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
  1978. if (Ops.size() == 2) return OuterMul;
  1979. Ops.erase(Ops.begin()+Idx);
  1980. Ops.erase(Ops.begin()+OtherMulIdx-1);
  1981. Ops.push_back(OuterMul);
  1982. return getAddExpr(Ops);
  1983. }
  1984. }
  1985. }
  1986. }
  1987. // If there are any add recurrences in the operands list, see if any other
  1988. // added values are loop invariant. If so, we can fold them into the
  1989. // recurrence.
  1990. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
  1991. ++Idx;
  1992. // Scan over all recurrences, trying to fold loop invariants into them.
  1993. for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
  1994. // Scan all of the other operands to this add and add them to the vector if
  1995. // they are loop invariant w.r.t. the recurrence.
  1996. SmallVector<const SCEV *, 8> LIOps;
  1997. const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
  1998. const Loop *AddRecLoop = AddRec->getLoop();
  1999. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  2000. if (isLoopInvariant(Ops[i], AddRecLoop)) {
  2001. LIOps.push_back(Ops[i]);
  2002. Ops.erase(Ops.begin()+i);
  2003. --i; --e;
  2004. }
  2005. // If we found some loop invariants, fold them into the recurrence.
  2006. if (!LIOps.empty()) {
  2007. // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
  2008. LIOps.push_back(AddRec->getStart());
  2009. SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
  2010. AddRec->op_end());
  2011. AddRecOps[0] = getAddExpr(LIOps);
  2012. // Build the new addrec. Propagate the NUW and NSW flags if both the
  2013. // outer add and the inner addrec are guaranteed to have no overflow.
  2014. // Always propagate NW.
  2015. Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
  2016. const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
  2017. // If all of the other operands were loop invariant, we are done.
  2018. if (Ops.size() == 1) return NewRec;
  2019. // Otherwise, add the folded AddRec by the non-invariant parts.
  2020. for (unsigned i = 0;; ++i)
  2021. if (Ops[i] == AddRec) {
  2022. Ops[i] = NewRec;
  2023. break;
  2024. }
  2025. return getAddExpr(Ops);
  2026. }
  2027. // Okay, if there weren't any loop invariants to be folded, check to see if
  2028. // there are multiple AddRec's with the same loop induction variable being
  2029. // added together. If so, we can fold them.
  2030. for (unsigned OtherIdx = Idx+1;
  2031. OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
  2032. ++OtherIdx)
  2033. if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
  2034. // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
  2035. SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
  2036. AddRec->op_end());
  2037. for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
  2038. ++OtherIdx)
  2039. if (const SCEVAddRecExpr *OtherAddRec =
  2040. dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
  2041. if (OtherAddRec->getLoop() == AddRecLoop) {
  2042. for (unsigned i = 0, e = OtherAddRec->getNumOperands();
  2043. i != e; ++i) {
  2044. if (i >= AddRecOps.size()) {
  2045. AddRecOps.append(OtherAddRec->op_begin()+i,
  2046. OtherAddRec->op_end());
  2047. break;
  2048. }
  2049. AddRecOps[i] = getAddExpr(AddRecOps[i],
  2050. OtherAddRec->getOperand(i));
  2051. }
  2052. Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
  2053. }
  2054. // Step size has changed, so we cannot guarantee no self-wraparound.
  2055. Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
  2056. return getAddExpr(Ops);
  2057. }
  2058. // Otherwise couldn't fold anything into this recurrence. Move onto the
  2059. // next one.
  2060. }
  2061. // Okay, it looks like we really DO need an add expr. Check to see if we
  2062. // already have one, otherwise create a new one.
  2063. FoldingSetNodeID ID;
  2064. ID.AddInteger(scAddExpr);
  2065. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  2066. ID.AddPointer(Ops[i]);
  2067. void *IP = nullptr;
  2068. SCEVAddExpr *S =
  2069. static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
  2070. if (!S) {
  2071. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  2072. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  2073. S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
  2074. O, Ops.size());
  2075. UniqueSCEVs.InsertNode(S, IP);
  2076. }
  2077. S->setNoWrapFlags(Flags);
  2078. return S;
  2079. }
  2080. static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
  2081. uint64_t k = i*j;
  2082. if (j > 1 && k / j != i) Overflow = true;
  2083. return k;
  2084. }
  2085. /// Compute the result of "n choose k", the binomial coefficient. If an
  2086. /// intermediate computation overflows, Overflow will be set and the return will
  2087. /// be garbage. Overflow is not cleared on absence of overflow.
  2088. static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
  2089. // We use the multiplicative formula:
  2090. // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
  2091. // At each iteration, we take the n-th term of the numeral and divide by the
  2092. // (k-n)th term of the denominator. This division will always produce an
  2093. // integral result, and helps reduce the chance of overflow in the
  2094. // intermediate computations. However, we can still overflow even when the
  2095. // final result would fit.
  2096. if (n == 0 || n == k) return 1;
  2097. if (k > n) return 0;
  2098. if (k > n/2)
  2099. k = n-k;
  2100. uint64_t r = 1;
  2101. for (uint64_t i = 1; i <= k; ++i) {
  2102. r = umul_ov(r, n-(i-1), Overflow);
  2103. r /= i;
  2104. }
  2105. return r;
  2106. }
  2107. /// Determine if any of the operands in this SCEV are a constant or if
  2108. /// any of the add or multiply expressions in this SCEV contain a constant.
  2109. static bool containsConstantSomewhere(const SCEV *StartExpr) {
  2110. SmallVector<const SCEV *, 4> Ops;
  2111. Ops.push_back(StartExpr);
  2112. while (!Ops.empty()) {
  2113. const SCEV *CurrentExpr = Ops.pop_back_val();
  2114. if (isa<SCEVConstant>(*CurrentExpr))
  2115. return true;
  2116. if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
  2117. const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
  2118. Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
  2119. }
  2120. }
  2121. return false;
  2122. }
  2123. /// getMulExpr - Get a canonical multiply expression, or something simpler if
  2124. /// possible.
  2125. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
  2126. SCEV::NoWrapFlags Flags) {
  2127. assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
  2128. "only nuw or nsw allowed");
  2129. assert(!Ops.empty() && "Cannot get empty mul!");
  2130. if (Ops.size() == 1) return Ops[0];
  2131. #ifndef NDEBUG
  2132. Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
  2133. for (unsigned i = 1, e = Ops.size(); i != e; ++i)
  2134. assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
  2135. "SCEVMulExpr operand types don't match!");
  2136. #endif
  2137. Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
  2138. // Sort by complexity, this groups all similar expression types together.
  2139. GroupByComplexity(Ops, LI);
  2140. // If there are any constants, fold them together.
  2141. unsigned Idx = 0;
  2142. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
  2143. // C1*(C2+V) -> C1*C2 + C1*V
  2144. if (Ops.size() == 2)
  2145. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
  2146. // If any of Add's ops are Adds or Muls with a constant,
  2147. // apply this transformation as well.
  2148. if (Add->getNumOperands() == 2)
  2149. if (containsConstantSomewhere(Add))
  2150. return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
  2151. getMulExpr(LHSC, Add->getOperand(1)));
  2152. ++Idx;
  2153. while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
  2154. // We found two constants, fold them together!
  2155. ConstantInt *Fold = ConstantInt::get(getContext(),
  2156. LHSC->getValue()->getValue() *
  2157. RHSC->getValue()->getValue());
  2158. Ops[0] = getConstant(Fold);
  2159. Ops.erase(Ops.begin()+1); // Erase the folded element
  2160. if (Ops.size() == 1) return Ops[0];
  2161. LHSC = cast<SCEVConstant>(Ops[0]);
  2162. }
  2163. // If we are left with a constant one being multiplied, strip it off.
  2164. if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
  2165. Ops.erase(Ops.begin());
  2166. --Idx;
  2167. } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
  2168. // If we have a multiply of zero, it will always be zero.
  2169. return Ops[0];
  2170. } else if (Ops[0]->isAllOnesValue()) {
  2171. // If we have a mul by -1 of an add, try distributing the -1 among the
  2172. // add operands.
  2173. if (Ops.size() == 2) {
  2174. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
  2175. SmallVector<const SCEV *, 4> NewOps;
  2176. bool AnyFolded = false;
  2177. for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
  2178. E = Add->op_end(); I != E; ++I) {
  2179. const SCEV *Mul = getMulExpr(Ops[0], *I);
  2180. if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
  2181. NewOps.push_back(Mul);
  2182. }
  2183. if (AnyFolded)
  2184. return getAddExpr(NewOps);
  2185. }
  2186. else if (const SCEVAddRecExpr *
  2187. AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
  2188. // Negation preserves a recurrence's no self-wrap property.
  2189. SmallVector<const SCEV *, 4> Operands;
  2190. for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
  2191. E = AddRec->op_end(); I != E; ++I) {
  2192. Operands.push_back(getMulExpr(Ops[0], *I));
  2193. }
  2194. return getAddRecExpr(Operands, AddRec->getLoop(),
  2195. AddRec->getNoWrapFlags(SCEV::FlagNW));
  2196. }
  2197. }
  2198. }
  2199. if (Ops.size() == 1)
  2200. return Ops[0];
  2201. }
  2202. // Skip over the add expression until we get to a multiply.
  2203. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
  2204. ++Idx;
  2205. // If there are mul operands inline them all into this expression.
  2206. if (Idx < Ops.size()) {
  2207. bool DeletedMul = false;
  2208. while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
  2209. // If we have an mul, expand the mul operands onto the end of the operands
  2210. // list.
  2211. Ops.erase(Ops.begin()+Idx);
  2212. Ops.append(Mul->op_begin(), Mul->op_end());
  2213. DeletedMul = true;
  2214. }
  2215. // If we deleted at least one mul, we added operands to the end of the list,
  2216. // and they are not necessarily sorted. Recurse to resort and resimplify
  2217. // any operands we just acquired.
  2218. if (DeletedMul)
  2219. return getMulExpr(Ops);
  2220. }
  2221. // If there are any add recurrences in the operands list, see if any other
  2222. // added values are loop invariant. If so, we can fold them into the
  2223. // recurrence.
  2224. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
  2225. ++Idx;
  2226. // Scan over all recurrences, trying to fold loop invariants into them.
  2227. for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
  2228. // Scan all of the other operands to this mul and add them to the vector if
  2229. // they are loop invariant w.r.t. the recurrence.
  2230. SmallVector<const SCEV *, 8> LIOps;
  2231. const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
  2232. const Loop *AddRecLoop = AddRec->getLoop();
  2233. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  2234. if (isLoopInvariant(Ops[i], AddRecLoop)) {
  2235. LIOps.push_back(Ops[i]);
  2236. Ops.erase(Ops.begin()+i);
  2237. --i; --e;
  2238. }
  2239. // If we found some loop invariants, fold them into the recurrence.
  2240. if (!LIOps.empty()) {
  2241. // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
  2242. SmallVector<const SCEV *, 4> NewOps;
  2243. NewOps.reserve(AddRec->getNumOperands());
  2244. const SCEV *Scale = getMulExpr(LIOps);
  2245. for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
  2246. NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
  2247. // Build the new addrec. Propagate the NUW and NSW flags if both the
  2248. // outer mul and the inner addrec are guaranteed to have no overflow.
  2249. //
  2250. // No self-wrap cannot be guaranteed after changing the step size, but
  2251. // will be inferred if either NUW or NSW is true.
  2252. Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
  2253. const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
  2254. // If all of the other operands were loop invariant, we are done.
  2255. if (Ops.size() == 1) return NewRec;
  2256. // Otherwise, multiply the folded AddRec by the non-invariant parts.
  2257. for (unsigned i = 0;; ++i)
  2258. if (Ops[i] == AddRec) {
  2259. Ops[i] = NewRec;
  2260. break;
  2261. }
  2262. return getMulExpr(Ops);
  2263. }
  2264. // Okay, if there weren't any loop invariants to be folded, check to see if
  2265. // there are multiple AddRec's with the same loop induction variable being
  2266. // multiplied together. If so, we can fold them.
  2267. // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
  2268. // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
  2269. // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
  2270. // ]]],+,...up to x=2n}.
  2271. // Note that the arguments to choose() are always integers with values
  2272. // known at compile time, never SCEV objects.
  2273. //
  2274. // The implementation avoids pointless extra computations when the two
  2275. // addrec's are of different length (mathematically, it's equivalent to
  2276. // an infinite stream of zeros on the right).
  2277. bool OpsModified = false;
  2278. for (unsigned OtherIdx = Idx+1;
  2279. OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
  2280. ++OtherIdx) {
  2281. const SCEVAddRecExpr *OtherAddRec =
  2282. dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
  2283. if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
  2284. continue;
  2285. bool Overflow = false;
  2286. Type *Ty = AddRec->getType();
  2287. bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
  2288. SmallVector<const SCEV*, 7> AddRecOps;
  2289. for (int x = 0, xe = AddRec->getNumOperands() +
  2290. OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
  2291. const SCEV *Term = getConstant(Ty, 0);
  2292. for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
  2293. uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
  2294. for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
  2295. ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
  2296. z < ze && !Overflow; ++z) {
  2297. uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
  2298. uint64_t Coeff;
  2299. if (LargerThan64Bits)
  2300. Coeff = umul_ov(Coeff1, Coeff2, Overflow);
  2301. else
  2302. Coeff = Coeff1*Coeff2;
  2303. const SCEV *CoeffTerm = getConstant(Ty, Coeff);
  2304. const SCEV *Term1 = AddRec->getOperand(y-z);
  2305. const SCEV *Term2 = OtherAddRec->getOperand(z);
  2306. Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
  2307. }
  2308. }
  2309. AddRecOps.push_back(Term);
  2310. }
  2311. if (!Overflow) {
  2312. const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
  2313. SCEV::FlagAnyWrap);
  2314. if (Ops.size() == 2) return NewAddRec;
  2315. Ops[Idx] = NewAddRec;
  2316. Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
  2317. OpsModified = true;
  2318. AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
  2319. if (!AddRec)
  2320. break;
  2321. }
  2322. }
  2323. if (OpsModified)
  2324. return getMulExpr(Ops);
  2325. // Otherwise couldn't fold anything into this recurrence. Move onto the
  2326. // next one.
  2327. }
  2328. // Okay, it looks like we really DO need an mul expr. Check to see if we
  2329. // already have one, otherwise create a new one.
  2330. FoldingSetNodeID ID;
  2331. ID.AddInteger(scMulExpr);
  2332. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  2333. ID.AddPointer(Ops[i]);
  2334. void *IP = nullptr;
  2335. SCEVMulExpr *S =
  2336. static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
  2337. if (!S) {
  2338. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  2339. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  2340. S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
  2341. O, Ops.size());
  2342. UniqueSCEVs.InsertNode(S, IP);
  2343. }
  2344. S->setNoWrapFlags(Flags);
  2345. return S;
  2346. }
  2347. /// getUDivExpr - Get a canonical unsigned division expression, or something
  2348. /// simpler if possible.
  2349. const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
  2350. const SCEV *RHS) {
  2351. assert(getEffectiveSCEVType(LHS->getType()) ==
  2352. getEffectiveSCEVType(RHS->getType()) &&
  2353. "SCEVUDivExpr operand types don't match!");
  2354. if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
  2355. if (RHSC->getValue()->equalsInt(1))
  2356. return LHS; // X udiv 1 --> x
  2357. // If the denominator is zero, the result of the udiv is undefined. Don't
  2358. // try to analyze it, because the resolution chosen here may differ from
  2359. // the resolution chosen in other parts of the compiler.
  2360. if (!RHSC->getValue()->isZero()) {
  2361. // Determine if the division can be folded into the operands of
  2362. // its operands.
  2363. // TODO: Generalize this to non-constants by using known-bits information.
  2364. Type *Ty = LHS->getType();
  2365. unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
  2366. unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
  2367. // For non-power-of-two values, effectively round the value up to the
  2368. // nearest power of two.
  2369. if (!RHSC->getValue()->getValue().isPowerOf2())
  2370. ++MaxShiftAmt;
  2371. IntegerType *ExtTy =
  2372. IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
  2373. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
  2374. if (const SCEVConstant *Step =
  2375. dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
  2376. // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
  2377. const APInt &StepInt = Step->getValue()->getValue();
  2378. const APInt &DivInt = RHSC->getValue()->getValue();
  2379. if (!StepInt.urem(DivInt) &&
  2380. getZeroExtendExpr(AR, ExtTy) ==
  2381. getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
  2382. getZeroExtendExpr(Step, ExtTy),
  2383. AR->getLoop(), SCEV::FlagAnyWrap)) {
  2384. SmallVector<const SCEV *, 4> Operands;
  2385. for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
  2386. Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
  2387. return getAddRecExpr(Operands, AR->getLoop(),
  2388. SCEV::FlagNW);
  2389. }
  2390. /// Get a canonical UDivExpr for a recurrence.
  2391. /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
  2392. // We can currently only fold X%N if X is constant.
  2393. const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
  2394. if (StartC && !DivInt.urem(StepInt) &&
  2395. getZeroExtendExpr(AR, ExtTy) ==
  2396. getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
  2397. getZeroExtendExpr(Step, ExtTy),
  2398. AR->getLoop(), SCEV::FlagAnyWrap)) {
  2399. const APInt &StartInt = StartC->getValue()->getValue();
  2400. const APInt &StartRem = StartInt.urem(StepInt);
  2401. if (StartRem != 0)
  2402. LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
  2403. AR->getLoop(), SCEV::FlagNW);
  2404. }
  2405. }
  2406. // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
  2407. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
  2408. SmallVector<const SCEV *, 4> Operands;
  2409. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
  2410. Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
  2411. if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
  2412. // Find an operand that's safely divisible.
  2413. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
  2414. const SCEV *Op = M->getOperand(i);
  2415. const SCEV *Div = getUDivExpr(Op, RHSC);
  2416. if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
  2417. Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
  2418. M->op_end());
  2419. Operands[i] = Div;
  2420. return getMulExpr(Operands);
  2421. }
  2422. }
  2423. }
  2424. // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
  2425. if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
  2426. SmallVector<const SCEV *, 4> Operands;
  2427. for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
  2428. Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
  2429. if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
  2430. Operands.clear();
  2431. for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
  2432. const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
  2433. if (isa<SCEVUDivExpr>(Op) ||
  2434. getMulExpr(Op, RHS) != A->getOperand(i))
  2435. break;
  2436. Operands.push_back(Op);
  2437. }
  2438. if (Operands.size() == A->getNumOperands())
  2439. return getAddExpr(Operands);
  2440. }
  2441. }
  2442. // Fold if both operands are constant.
  2443. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
  2444. Constant *LHSCV = LHSC->getValue();
  2445. Constant *RHSCV = RHSC->getValue();
  2446. return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
  2447. RHSCV)));
  2448. }
  2449. }
  2450. }
  2451. FoldingSetNodeID ID;
  2452. ID.AddInteger(scUDivExpr);
  2453. ID.AddPointer(LHS);
  2454. ID.AddPointer(RHS);
  2455. void *IP = nullptr;
  2456. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  2457. SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
  2458. LHS, RHS);
  2459. UniqueSCEVs.InsertNode(S, IP);
  2460. return S;
  2461. }
  2462. static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
  2463. APInt A = C1->getValue()->getValue().abs();
  2464. APInt B = C2->getValue()->getValue().abs();
  2465. uint32_t ABW = A.getBitWidth();
  2466. uint32_t BBW = B.getBitWidth();
  2467. if (ABW > BBW)
  2468. B = B.zext(ABW);
  2469. else if (ABW < BBW)
  2470. A = A.zext(BBW);
  2471. return APIntOps::GreatestCommonDivisor(A, B);
  2472. }
  2473. /// getUDivExactExpr - Get a canonical unsigned division expression, or
  2474. /// something simpler if possible. There is no representation for an exact udiv
  2475. /// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
  2476. /// We can't do this when it's not exact because the udiv may be clearing bits.
  2477. const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
  2478. const SCEV *RHS) {
  2479. // TODO: we could try to find factors in all sorts of things, but for now we
  2480. // just deal with u/exact (multiply, constant). See SCEVDivision towards the
  2481. // end of this file for inspiration.
  2482. const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
  2483. if (!Mul)
  2484. return getUDivExpr(LHS, RHS);
  2485. if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
  2486. // If the mulexpr multiplies by a constant, then that constant must be the
  2487. // first element of the mulexpr.
  2488. if (const SCEVConstant *LHSCst =
  2489. dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
  2490. if (LHSCst == RHSCst) {
  2491. SmallVector<const SCEV *, 2> Operands;
  2492. Operands.append(Mul->op_begin() + 1, Mul->op_end());
  2493. return getMulExpr(Operands);
  2494. }
  2495. // We can't just assume that LHSCst divides RHSCst cleanly, it could be
  2496. // that there's a factor provided by one of the other terms. We need to
  2497. // check.
  2498. APInt Factor = gcd(LHSCst, RHSCst);
  2499. if (!Factor.isIntN(1)) {
  2500. LHSCst = cast<SCEVConstant>(
  2501. getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
  2502. RHSCst = cast<SCEVConstant>(
  2503. getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
  2504. SmallVector<const SCEV *, 2> Operands;
  2505. Operands.push_back(LHSCst);
  2506. Operands.append(Mul->op_begin() + 1, Mul->op_end());
  2507. LHS = getMulExpr(Operands);
  2508. RHS = RHSCst;
  2509. Mul = dyn_cast<SCEVMulExpr>(LHS);
  2510. if (!Mul)
  2511. return getUDivExactExpr(LHS, RHS);
  2512. }
  2513. }
  2514. }
  2515. for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
  2516. if (Mul->getOperand(i) == RHS) {
  2517. SmallVector<const SCEV *, 2> Operands;
  2518. Operands.append(Mul->op_begin(), Mul->op_begin() + i);
  2519. Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
  2520. return getMulExpr(Operands);
  2521. }
  2522. }
  2523. return getUDivExpr(LHS, RHS);
  2524. }
  2525. /// getAddRecExpr - Get an add recurrence expression for the specified loop.
  2526. /// Simplify the expression as much as possible.
  2527. const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
  2528. const Loop *L,
  2529. SCEV::NoWrapFlags Flags) {
  2530. SmallVector<const SCEV *, 4> Operands;
  2531. Operands.push_back(Start);
  2532. if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
  2533. if (StepChrec->getLoop() == L) {
  2534. Operands.append(StepChrec->op_begin(), StepChrec->op_end());
  2535. return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
  2536. }
  2537. Operands.push_back(Step);
  2538. return getAddRecExpr(Operands, L, Flags);
  2539. }
  2540. /// getAddRecExpr - Get an add recurrence expression for the specified loop.
  2541. /// Simplify the expression as much as possible.
  2542. const SCEV *
  2543. ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
  2544. const Loop *L, SCEV::NoWrapFlags Flags) {
  2545. if (Operands.size() == 1) return Operands[0];
  2546. #ifndef NDEBUG
  2547. Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
  2548. for (unsigned i = 1, e = Operands.size(); i != e; ++i)
  2549. assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
  2550. "SCEVAddRecExpr operand types don't match!");
  2551. for (unsigned i = 0, e = Operands.size(); i != e; ++i)
  2552. assert(isLoopInvariant(Operands[i], L) &&
  2553. "SCEVAddRecExpr operand is not loop-invariant!");
  2554. #endif
  2555. if (Operands.back()->isZero()) {
  2556. Operands.pop_back();
  2557. return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
  2558. }
  2559. // It's tempting to want to call getMaxBackedgeTakenCount count here and
  2560. // use that information to infer NUW and NSW flags. However, computing a
  2561. // BE count requires calling getAddRecExpr, so we may not yet have a
  2562. // meaningful BE count at this point (and if we don't, we'd be stuck
  2563. // with a SCEVCouldNotCompute as the cached BE count).
  2564. Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
  2565. // Canonicalize nested AddRecs in by nesting them in order of loop depth.
  2566. if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
  2567. const Loop *NestedLoop = NestedAR->getLoop();
  2568. if (L->contains(NestedLoop) ?
  2569. (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
  2570. (!NestedLoop->contains(L) &&
  2571. DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
  2572. SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
  2573. NestedAR->op_end());
  2574. Operands[0] = NestedAR->getStart();
  2575. // AddRecs require their operands be loop-invariant with respect to their
  2576. // loops. Don't perform this transformation if it would break this
  2577. // requirement.
  2578. bool AllInvariant = true;
  2579. for (unsigned i = 0, e = Operands.size(); i != e; ++i)
  2580. if (!isLoopInvariant(Operands[i], L)) {
  2581. AllInvariant = false;
  2582. break;
  2583. }
  2584. if (AllInvariant) {
  2585. // Create a recurrence for the outer loop with the same step size.
  2586. //
  2587. // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
  2588. // inner recurrence has the same property.
  2589. SCEV::NoWrapFlags OuterFlags =
  2590. maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
  2591. NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
  2592. AllInvariant = true;
  2593. for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
  2594. if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
  2595. AllInvariant = false;
  2596. break;
  2597. }
  2598. if (AllInvariant) {
  2599. // Ok, both add recurrences are valid after the transformation.
  2600. //
  2601. // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
  2602. // the outer recurrence has the same property.
  2603. SCEV::NoWrapFlags InnerFlags =
  2604. maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
  2605. return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
  2606. }
  2607. }
  2608. // Reset Operands to its original state.
  2609. Operands[0] = NestedAR;
  2610. }
  2611. }
  2612. // Okay, it looks like we really DO need an addrec expr. Check to see if we
  2613. // already have one, otherwise create a new one.
  2614. FoldingSetNodeID ID;
  2615. ID.AddInteger(scAddRecExpr);
  2616. for (unsigned i = 0, e = Operands.size(); i != e; ++i)
  2617. ID.AddPointer(Operands[i]);
  2618. ID.AddPointer(L);
  2619. void *IP = nullptr;
  2620. SCEVAddRecExpr *S =
  2621. static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
  2622. if (!S) {
  2623. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
  2624. std::uninitialized_copy(Operands.begin(), Operands.end(), O);
  2625. S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
  2626. O, Operands.size(), L);
  2627. UniqueSCEVs.InsertNode(S, IP);
  2628. }
  2629. S->setNoWrapFlags(Flags);
  2630. return S;
  2631. }
  2632. const SCEV *
  2633. ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
  2634. const SmallVectorImpl<const SCEV *> &IndexExprs,
  2635. bool InBounds) {
  2636. // getSCEV(Base)->getType() has the same address space as Base->getType()
  2637. // because SCEV::getType() preserves the address space.
  2638. Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
  2639. // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
  2640. // instruction to its SCEV, because the Instruction may be guarded by control
  2641. // flow and the no-overflow bits may not be valid for the expression in any
  2642. // context.
  2643. SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
  2644. const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
  2645. // The address space is unimportant. The first thing we do on CurTy is getting
  2646. // its element type.
  2647. Type *CurTy = PointerType::getUnqual(PointeeType);
  2648. for (const SCEV *IndexExpr : IndexExprs) {
  2649. // Compute the (potentially symbolic) offset in bytes for this index.
  2650. if (StructType *STy = dyn_cast<StructType>(CurTy)) {
  2651. // For a struct, add the member offset.
  2652. ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
  2653. unsigned FieldNo = Index->getZExtValue();
  2654. const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
  2655. // Add the field offset to the running total offset.
  2656. TotalOffset = getAddExpr(TotalOffset, FieldOffset);
  2657. // Update CurTy to the type of the field at Index.
  2658. CurTy = STy->getTypeAtIndex(Index);
  2659. } else {
  2660. // Update CurTy to its element type.
  2661. CurTy = cast<SequentialType>(CurTy)->getElementType();
  2662. // For an array, add the element offset, explicitly scaled.
  2663. const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
  2664. // Getelementptr indices are signed.
  2665. IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
  2666. // Multiply the index by the element size to compute the element offset.
  2667. const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
  2668. // Add the element offset to the running total offset.
  2669. TotalOffset = getAddExpr(TotalOffset, LocalOffset);
  2670. }
  2671. }
  2672. // Add the total offset from all the GEP indices to the base.
  2673. return getAddExpr(BaseExpr, TotalOffset, Wrap);
  2674. }
  2675. const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
  2676. const SCEV *RHS) {
  2677. SmallVector<const SCEV *, 2> Ops;
  2678. Ops.push_back(LHS);
  2679. Ops.push_back(RHS);
  2680. return getSMaxExpr(Ops);
  2681. }
  2682. const SCEV *
  2683. ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
  2684. assert(!Ops.empty() && "Cannot get empty smax!");
  2685. if (Ops.size() == 1) return Ops[0];
  2686. #ifndef NDEBUG
  2687. Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
  2688. for (unsigned i = 1, e = Ops.size(); i != e; ++i)
  2689. assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
  2690. "SCEVSMaxExpr operand types don't match!");
  2691. #endif
  2692. // Sort by complexity, this groups all similar expression types together.
  2693. GroupByComplexity(Ops, LI);
  2694. // If there are any constants, fold them together.
  2695. unsigned Idx = 0;
  2696. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
  2697. ++Idx;
  2698. assert(Idx < Ops.size());
  2699. while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
  2700. // We found two constants, fold them together!
  2701. ConstantInt *Fold = ConstantInt::get(getContext(),
  2702. APIntOps::smax(LHSC->getValue()->getValue(),
  2703. RHSC->getValue()->getValue()));
  2704. Ops[0] = getConstant(Fold);
  2705. Ops.erase(Ops.begin()+1); // Erase the folded element
  2706. if (Ops.size() == 1) return Ops[0];
  2707. LHSC = cast<SCEVConstant>(Ops[0]);
  2708. }
  2709. // If we are left with a constant minimum-int, strip it off.
  2710. if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
  2711. Ops.erase(Ops.begin());
  2712. --Idx;
  2713. } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
  2714. // If we have an smax with a constant maximum-int, it will always be
  2715. // maximum-int.
  2716. return Ops[0];
  2717. }
  2718. if (Ops.size() == 1) return Ops[0];
  2719. }
  2720. // Find the first SMax
  2721. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
  2722. ++Idx;
  2723. // Check to see if one of the operands is an SMax. If so, expand its operands
  2724. // onto our operand list, and recurse to simplify.
  2725. if (Idx < Ops.size()) {
  2726. bool DeletedSMax = false;
  2727. while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
  2728. Ops.erase(Ops.begin()+Idx);
  2729. Ops.append(SMax->op_begin(), SMax->op_end());
  2730. DeletedSMax = true;
  2731. }
  2732. if (DeletedSMax)
  2733. return getSMaxExpr(Ops);
  2734. }
  2735. // Okay, check to see if the same value occurs in the operand list twice. If
  2736. // so, delete one. Since we sorted the list, these values are required to
  2737. // be adjacent.
  2738. for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
  2739. // X smax Y smax Y --> X smax Y
  2740. // X smax Y --> X, if X is always greater than Y
  2741. if (Ops[i] == Ops[i+1] ||
  2742. isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
  2743. Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
  2744. --i; --e;
  2745. } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
  2746. Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
  2747. --i; --e;
  2748. }
  2749. if (Ops.size() == 1) return Ops[0];
  2750. assert(!Ops.empty() && "Reduced smax down to nothing!");
  2751. // Okay, it looks like we really DO need an smax expr. Check to see if we
  2752. // already have one, otherwise create a new one.
  2753. FoldingSetNodeID ID;
  2754. ID.AddInteger(scSMaxExpr);
  2755. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  2756. ID.AddPointer(Ops[i]);
  2757. void *IP = nullptr;
  2758. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  2759. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  2760. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  2761. SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
  2762. O, Ops.size());
  2763. UniqueSCEVs.InsertNode(S, IP);
  2764. return S;
  2765. }
  2766. const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
  2767. const SCEV *RHS) {
  2768. SmallVector<const SCEV *, 2> Ops;
  2769. Ops.push_back(LHS);
  2770. Ops.push_back(RHS);
  2771. return getUMaxExpr(Ops);
  2772. }
  2773. const SCEV *
  2774. ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
  2775. assert(!Ops.empty() && "Cannot get empty umax!");
  2776. if (Ops.size() == 1) return Ops[0];
  2777. #ifndef NDEBUG
  2778. Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
  2779. for (unsigned i = 1, e = Ops.size(); i != e; ++i)
  2780. assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
  2781. "SCEVUMaxExpr operand types don't match!");
  2782. #endif
  2783. // Sort by complexity, this groups all similar expression types together.
  2784. GroupByComplexity(Ops, LI);
  2785. // If there are any constants, fold them together.
  2786. unsigned Idx = 0;
  2787. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
  2788. ++Idx;
  2789. assert(Idx < Ops.size());
  2790. while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
  2791. // We found two constants, fold them together!
  2792. ConstantInt *Fold = ConstantInt::get(getContext(),
  2793. APIntOps::umax(LHSC->getValue()->getValue(),
  2794. RHSC->getValue()->getValue()));
  2795. Ops[0] = getConstant(Fold);
  2796. Ops.erase(Ops.begin()+1); // Erase the folded element
  2797. if (Ops.size() == 1) return Ops[0];
  2798. LHSC = cast<SCEVConstant>(Ops[0]);
  2799. }
  2800. // If we are left with a constant minimum-int, strip it off.
  2801. if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
  2802. Ops.erase(Ops.begin());
  2803. --Idx;
  2804. } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
  2805. // If we have an umax with a constant maximum-int, it will always be
  2806. // maximum-int.
  2807. return Ops[0];
  2808. }
  2809. if (Ops.size() == 1) return Ops[0];
  2810. }
  2811. // Find the first UMax
  2812. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
  2813. ++Idx;
  2814. // Check to see if one of the operands is a UMax. If so, expand its operands
  2815. // onto our operand list, and recurse to simplify.
  2816. if (Idx < Ops.size()) {
  2817. bool DeletedUMax = false;
  2818. while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
  2819. Ops.erase(Ops.begin()+Idx);
  2820. Ops.append(UMax->op_begin(), UMax->op_end());
  2821. DeletedUMax = true;
  2822. }
  2823. if (DeletedUMax)
  2824. return getUMaxExpr(Ops);
  2825. }
  2826. // Okay, check to see if the same value occurs in the operand list twice. If
  2827. // so, delete one. Since we sorted the list, these values are required to
  2828. // be adjacent.
  2829. for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
  2830. // X umax Y umax Y --> X umax Y
  2831. // X umax Y --> X, if X is always greater than Y
  2832. if (Ops[i] == Ops[i+1] ||
  2833. isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
  2834. Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
  2835. --i; --e;
  2836. } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
  2837. Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
  2838. --i; --e;
  2839. }
  2840. if (Ops.size() == 1) return Ops[0];
  2841. assert(!Ops.empty() && "Reduced umax down to nothing!");
  2842. // Okay, it looks like we really DO need a umax expr. Check to see if we
  2843. // already have one, otherwise create a new one.
  2844. FoldingSetNodeID ID;
  2845. ID.AddInteger(scUMaxExpr);
  2846. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  2847. ID.AddPointer(Ops[i]);
  2848. void *IP = nullptr;
  2849. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  2850. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  2851. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  2852. SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
  2853. O, Ops.size());
  2854. UniqueSCEVs.InsertNode(S, IP);
  2855. return S;
  2856. }
  2857. const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
  2858. const SCEV *RHS) {
  2859. // ~smax(~x, ~y) == smin(x, y).
  2860. return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
  2861. }
  2862. const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
  2863. const SCEV *RHS) {
  2864. // ~umax(~x, ~y) == umin(x, y)
  2865. return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
  2866. }
  2867. const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
  2868. // We can bypass creating a target-independent
  2869. // constant expression and then folding it back into a ConstantInt.
  2870. // This is just a compile-time optimization.
  2871. return getConstant(IntTy,
  2872. F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
  2873. }
  2874. const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
  2875. StructType *STy,
  2876. unsigned FieldNo) {
  2877. // We can bypass creating a target-independent
  2878. // constant expression and then folding it back into a ConstantInt.
  2879. // This is just a compile-time optimization.
  2880. return getConstant(
  2881. IntTy,
  2882. F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
  2883. FieldNo));
  2884. }
  2885. const SCEV *ScalarEvolution::getUnknown(Value *V) {
  2886. // Don't attempt to do anything other than create a SCEVUnknown object
  2887. // here. createSCEV only calls getUnknown after checking for all other
  2888. // interesting possibilities, and any other code that calls getUnknown
  2889. // is doing so in order to hide a value from SCEV canonicalization.
  2890. FoldingSetNodeID ID;
  2891. ID.AddInteger(scUnknown);
  2892. ID.AddPointer(V);
  2893. void *IP = nullptr;
  2894. if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
  2895. assert(cast<SCEVUnknown>(S)->getValue() == V &&
  2896. "Stale SCEVUnknown in uniquing map!");
  2897. return S;
  2898. }
  2899. SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
  2900. FirstUnknown);
  2901. FirstUnknown = cast<SCEVUnknown>(S);
  2902. UniqueSCEVs.InsertNode(S, IP);
  2903. return S;
  2904. }
  2905. //===----------------------------------------------------------------------===//
  2906. // Basic SCEV Analysis and PHI Idiom Recognition Code
  2907. //
  2908. /// isSCEVable - Test if values of the given type are analyzable within
  2909. /// the SCEV framework. This primarily includes integer types, and it
  2910. /// can optionally include pointer types if the ScalarEvolution class
  2911. /// has access to target-specific information.
  2912. bool ScalarEvolution::isSCEVable(Type *Ty) const {
  2913. // Integers and pointers are always SCEVable.
  2914. return Ty->isIntegerTy() || Ty->isPointerTy();
  2915. }
  2916. /// getTypeSizeInBits - Return the size in bits of the specified type,
  2917. /// for which isSCEVable must return true.
  2918. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
  2919. assert(isSCEVable(Ty) && "Type is not SCEVable!");
  2920. return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
  2921. }
  2922. /// getEffectiveSCEVType - Return a type with the same bitwidth as
  2923. /// the given type and which represents how SCEV will treat the given
  2924. /// type, for which isSCEVable must return true. For pointer types,
  2925. /// this is the pointer-sized integer type.
  2926. Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
  2927. assert(isSCEVable(Ty) && "Type is not SCEVable!");
  2928. if (Ty->isIntegerTy()) {
  2929. return Ty;
  2930. }
  2931. // The only other support type is pointer.
  2932. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
  2933. return F->getParent()->getDataLayout().getIntPtrType(Ty);
  2934. }
  2935. const SCEV *ScalarEvolution::getCouldNotCompute() {
  2936. return &CouldNotCompute;
  2937. }
  2938. namespace {
  2939. // Helper class working with SCEVTraversal to figure out if a SCEV contains
  2940. // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
  2941. // is set iff if find such SCEVUnknown.
  2942. //
  2943. struct FindInvalidSCEVUnknown {
  2944. bool FindOne;
  2945. FindInvalidSCEVUnknown() { FindOne = false; }
  2946. bool follow(const SCEV *S) {
  2947. switch (static_cast<SCEVTypes>(S->getSCEVType())) {
  2948. case scConstant:
  2949. return false;
  2950. case scUnknown:
  2951. if (!cast<SCEVUnknown>(S)->getValue())
  2952. FindOne = true;
  2953. return false;
  2954. default:
  2955. return true;
  2956. }
  2957. }
  2958. bool isDone() const { return FindOne; }
  2959. };
  2960. }
  2961. bool ScalarEvolution::checkValidity(const SCEV *S) const {
  2962. FindInvalidSCEVUnknown F;
  2963. SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
  2964. ST.visitAll(S);
  2965. return !F.FindOne;
  2966. }
  2967. /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
  2968. /// expression and create a new one.
  2969. const SCEV *ScalarEvolution::getSCEV(Value *V) {
  2970. assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
  2971. ValueExprMapType::iterator I = ValueExprMap.find_as(V);
  2972. if (I != ValueExprMap.end()) {
  2973. const SCEV *S = I->second;
  2974. if (checkValidity(S))
  2975. return S;
  2976. else
  2977. ValueExprMap.erase(I);
  2978. }
  2979. const SCEV *S = createSCEV(V);
  2980. // The process of creating a SCEV for V may have caused other SCEVs
  2981. // to have been created, so it's necessary to insert the new entry
  2982. // from scratch, rather than trying to remember the insert position
  2983. // above.
  2984. ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
  2985. return S;
  2986. }
  2987. /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
  2988. ///
  2989. const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
  2990. if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
  2991. return getConstant(
  2992. cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
  2993. Type *Ty = V->getType();
  2994. Ty = getEffectiveSCEVType(Ty);
  2995. return getMulExpr(V,
  2996. getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
  2997. }
  2998. /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
  2999. const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
  3000. if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
  3001. return getConstant(
  3002. cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
  3003. Type *Ty = V->getType();
  3004. Ty = getEffectiveSCEVType(Ty);
  3005. const SCEV *AllOnes =
  3006. getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
  3007. return getMinusSCEV(AllOnes, V);
  3008. }
  3009. /// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
  3010. const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
  3011. SCEV::NoWrapFlags Flags) {
  3012. assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
  3013. // Fast path: X - X --> 0.
  3014. if (LHS == RHS)
  3015. return getConstant(LHS->getType(), 0);
  3016. // X - Y --> X + -Y.
  3017. // X -(nsw || nuw) Y --> X + -Y.
  3018. return getAddExpr(LHS, getNegativeSCEV(RHS));
  3019. }
  3020. /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
  3021. /// input value to the specified type. If the type must be extended, it is zero
  3022. /// extended.
  3023. const SCEV *
  3024. ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
  3025. Type *SrcTy = V->getType();
  3026. assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
  3027. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  3028. "Cannot truncate or zero extend with non-integer arguments!");
  3029. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  3030. return V; // No conversion
  3031. if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
  3032. return getTruncateExpr(V, Ty);
  3033. return getZeroExtendExpr(V, Ty);
  3034. }
  3035. /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
  3036. /// input value to the specified type. If the type must be extended, it is sign
  3037. /// extended.
  3038. const SCEV *
  3039. ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
  3040. Type *Ty) {
  3041. Type *SrcTy = V->getType();
  3042. assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
  3043. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  3044. "Cannot truncate or zero extend with non-integer arguments!");
  3045. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  3046. return V; // No conversion
  3047. if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
  3048. return getTruncateExpr(V, Ty);
  3049. return getSignExtendExpr(V, Ty);
  3050. }
  3051. /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
  3052. /// input value to the specified type. If the type must be extended, it is zero
  3053. /// extended. The conversion must not be narrowing.
  3054. const SCEV *
  3055. ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
  3056. Type *SrcTy = V->getType();
  3057. assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
  3058. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  3059. "Cannot noop or zero extend with non-integer arguments!");
  3060. assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
  3061. "getNoopOrZeroExtend cannot truncate!");
  3062. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  3063. return V; // No conversion
  3064. return getZeroExtendExpr(V, Ty);
  3065. }
  3066. /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
  3067. /// input value to the specified type. If the type must be extended, it is sign
  3068. /// extended. The conversion must not be narrowing.
  3069. const SCEV *
  3070. ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
  3071. Type *SrcTy = V->getType();
  3072. assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
  3073. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  3074. "Cannot noop or sign extend with non-integer arguments!");
  3075. assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
  3076. "getNoopOrSignExtend cannot truncate!");
  3077. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  3078. return V; // No conversion
  3079. return getSignExtendExpr(V, Ty);
  3080. }
  3081. /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
  3082. /// the input value to the specified type. If the type must be extended,
  3083. /// it is extended with unspecified bits. The conversion must not be
  3084. /// narrowing.
  3085. const SCEV *
  3086. ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
  3087. Type *SrcTy = V->getType();
  3088. assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
  3089. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  3090. "Cannot noop or any extend with non-integer arguments!");
  3091. assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
  3092. "getNoopOrAnyExtend cannot truncate!");
  3093. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  3094. return V; // No conversion
  3095. return getAnyExtendExpr(V, Ty);
  3096. }
  3097. /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
  3098. /// input value to the specified type. The conversion must not be widening.
  3099. const SCEV *
  3100. ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
  3101. Type *SrcTy = V->getType();
  3102. assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
  3103. (Ty->isIntegerTy() || Ty->isPointerTy()) &&
  3104. "Cannot truncate or noop with non-integer arguments!");
  3105. assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
  3106. "getTruncateOrNoop cannot extend!");
  3107. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  3108. return V; // No conversion
  3109. return getTruncateExpr(V, Ty);
  3110. }
  3111. /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
  3112. /// the types using zero-extension, and then perform a umax operation
  3113. /// with them.
  3114. const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
  3115. const SCEV *RHS) {
  3116. const SCEV *PromotedLHS = LHS;
  3117. const SCEV *PromotedRHS = RHS;
  3118. if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
  3119. PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
  3120. else
  3121. PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
  3122. return getUMaxExpr(PromotedLHS, PromotedRHS);
  3123. }
  3124. /// getUMinFromMismatchedTypes - Promote the operands to the wider of
  3125. /// the types using zero-extension, and then perform a umin operation
  3126. /// with them.
  3127. const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
  3128. const SCEV *RHS) {
  3129. const SCEV *PromotedLHS = LHS;
  3130. const SCEV *PromotedRHS = RHS;
  3131. if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
  3132. PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
  3133. else
  3134. PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
  3135. return getUMinExpr(PromotedLHS, PromotedRHS);
  3136. }
  3137. /// getPointerBase - Transitively follow the chain of pointer-type operands
  3138. /// until reaching a SCEV that does not have a single pointer operand. This
  3139. /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
  3140. /// but corner cases do exist.
  3141. const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
  3142. // A pointer operand may evaluate to a nonpointer expression, such as null.
  3143. if (!V->getType()->isPointerTy())
  3144. return V;
  3145. if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
  3146. return getPointerBase(Cast->getOperand());
  3147. }
  3148. else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
  3149. const SCEV *PtrOp = nullptr;
  3150. for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
  3151. I != E; ++I) {
  3152. if ((*I)->getType()->isPointerTy()) {
  3153. // Cannot find the base of an expression with multiple pointer operands.
  3154. if (PtrOp)
  3155. return V;
  3156. PtrOp = *I;
  3157. }
  3158. }
  3159. if (!PtrOp)
  3160. return V;
  3161. return getPointerBase(PtrOp);
  3162. }
  3163. return V;
  3164. }
  3165. /// PushDefUseChildren - Push users of the given Instruction
  3166. /// onto the given Worklist.
  3167. static void
  3168. PushDefUseChildren(Instruction *I,
  3169. SmallVectorImpl<Instruction *> &Worklist) {
  3170. // Push the def-use children onto the Worklist stack.
  3171. for (User *U : I->users())
  3172. Worklist.push_back(cast<Instruction>(U));
  3173. }
  3174. /// ForgetSymbolicValue - This looks up computed SCEV values for all
  3175. /// instructions that depend on the given instruction and removes them from
  3176. /// the ValueExprMapType map if they reference SymName. This is used during PHI
  3177. /// resolution.
  3178. void
  3179. ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
  3180. SmallVector<Instruction *, 16> Worklist;
  3181. PushDefUseChildren(PN, Worklist);
  3182. SmallPtrSet<Instruction *, 8> Visited;
  3183. Visited.insert(PN);
  3184. while (!Worklist.empty()) {
  3185. Instruction *I = Worklist.pop_back_val();
  3186. if (!Visited.insert(I).second)
  3187. continue;
  3188. ValueExprMapType::iterator It =
  3189. ValueExprMap.find_as(static_cast<Value *>(I));
  3190. if (It != ValueExprMap.end()) {
  3191. const SCEV *Old = It->second;
  3192. // Short-circuit the def-use traversal if the symbolic name
  3193. // ceases to appear in expressions.
  3194. if (Old != SymName && !hasOperand(Old, SymName))
  3195. continue;
  3196. // SCEVUnknown for a PHI either means that it has an unrecognized
  3197. // structure, it's a PHI that's in the progress of being computed
  3198. // by createNodeForPHI, or it's a single-value PHI. In the first case,
  3199. // additional loop trip count information isn't going to change anything.
  3200. // In the second case, createNodeForPHI will perform the necessary
  3201. // updates on its own when it gets to that point. In the third, we do
  3202. // want to forget the SCEVUnknown.
  3203. if (!isa<PHINode>(I) ||
  3204. !isa<SCEVUnknown>(Old) ||
  3205. (I != PN && Old == SymName)) {
  3206. forgetMemoizedResults(Old);
  3207. ValueExprMap.erase(It);
  3208. }
  3209. }
  3210. PushDefUseChildren(I, Worklist);
  3211. }
  3212. }
  3213. /// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in
  3214. /// a loop header, making it a potential recurrence, or it doesn't.
  3215. ///
  3216. const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
  3217. if (const Loop *L = LI->getLoopFor(PN->getParent()))
  3218. if (L->getHeader() == PN->getParent()) {
  3219. // The loop may have multiple entrances or multiple exits; we can analyze
  3220. // this phi as an addrec if it has a unique entry value and a unique
  3221. // backedge value.
  3222. Value *BEValueV = nullptr, *StartValueV = nullptr;
  3223. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  3224. Value *V = PN->getIncomingValue(i);
  3225. if (L->contains(PN->getIncomingBlock(i))) {
  3226. if (!BEValueV) {
  3227. BEValueV = V;
  3228. } else if (BEValueV != V) {
  3229. BEValueV = nullptr;
  3230. break;
  3231. }
  3232. } else if (!StartValueV) {
  3233. StartValueV = V;
  3234. } else if (StartValueV != V) {
  3235. StartValueV = nullptr;
  3236. break;
  3237. }
  3238. }
  3239. if (BEValueV && StartValueV) {
  3240. // While we are analyzing this PHI node, handle its value symbolically.
  3241. const SCEV *SymbolicName = getUnknown(PN);
  3242. assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
  3243. "PHI node already processed?");
  3244. ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
  3245. // Using this symbolic name for the PHI, analyze the value coming around
  3246. // the back-edge.
  3247. const SCEV *BEValue = getSCEV(BEValueV);
  3248. // NOTE: If BEValue is loop invariant, we know that the PHI node just
  3249. // has a special value for the first iteration of the loop.
  3250. // If the value coming around the backedge is an add with the symbolic
  3251. // value we just inserted, then we found a simple induction variable!
  3252. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
  3253. // If there is a single occurrence of the symbolic value, replace it
  3254. // with a recurrence.
  3255. unsigned FoundIndex = Add->getNumOperands();
  3256. for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
  3257. if (Add->getOperand(i) == SymbolicName)
  3258. if (FoundIndex == e) {
  3259. FoundIndex = i;
  3260. break;
  3261. }
  3262. if (FoundIndex != Add->getNumOperands()) {
  3263. // Create an add with everything but the specified operand.
  3264. SmallVector<const SCEV *, 8> Ops;
  3265. for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
  3266. if (i != FoundIndex)
  3267. Ops.push_back(Add->getOperand(i));
  3268. const SCEV *Accum = getAddExpr(Ops);
  3269. // This is not a valid addrec if the step amount is varying each
  3270. // loop iteration, but is not itself an addrec in this loop.
  3271. if (isLoopInvariant(Accum, L) ||
  3272. (isa<SCEVAddRecExpr>(Accum) &&
  3273. cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
  3274. SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
  3275. // If the increment doesn't overflow, then neither the addrec nor
  3276. // the post-increment will overflow.
  3277. if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
  3278. if (OBO->getOperand(0) == PN) {
  3279. if (OBO->hasNoUnsignedWrap())
  3280. Flags = setFlags(Flags, SCEV::FlagNUW);
  3281. if (OBO->hasNoSignedWrap())
  3282. Flags = setFlags(Flags, SCEV::FlagNSW);
  3283. }
  3284. } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
  3285. // If the increment is an inbounds GEP, then we know the address
  3286. // space cannot be wrapped around. We cannot make any guarantee
  3287. // about signed or unsigned overflow because pointers are
  3288. // unsigned but we may have a negative index from the base
  3289. // pointer. We can guarantee that no unsigned wrap occurs if the
  3290. // indices form a positive value.
  3291. if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
  3292. Flags = setFlags(Flags, SCEV::FlagNW);
  3293. const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
  3294. if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
  3295. Flags = setFlags(Flags, SCEV::FlagNUW);
  3296. }
  3297. // We cannot transfer nuw and nsw flags from subtraction
  3298. // operations -- sub nuw X, Y is not the same as add nuw X, -Y
  3299. // for instance.
  3300. }
  3301. const SCEV *StartVal = getSCEV(StartValueV);
  3302. const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
  3303. // Since the no-wrap flags are on the increment, they apply to the
  3304. // post-incremented value as well.
  3305. if (isLoopInvariant(Accum, L))
  3306. (void)getAddRecExpr(getAddExpr(StartVal, Accum),
  3307. Accum, L, Flags);
  3308. // Okay, for the entire analysis of this edge we assumed the PHI
  3309. // to be symbolic. We now need to go back and purge all of the
  3310. // entries for the scalars that use the symbolic expression.
  3311. ForgetSymbolicName(PN, SymbolicName);
  3312. ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
  3313. return PHISCEV;
  3314. }
  3315. }
  3316. } else if (const SCEVAddRecExpr *AddRec =
  3317. dyn_cast<SCEVAddRecExpr>(BEValue)) {
  3318. // Otherwise, this could be a loop like this:
  3319. // i = 0; for (j = 1; ..; ++j) { .... i = j; }
  3320. // In this case, j = {1,+,1} and BEValue is j.
  3321. // Because the other in-value of i (0) fits the evolution of BEValue
  3322. // i really is an addrec evolution.
  3323. if (AddRec->getLoop() == L && AddRec->isAffine()) {
  3324. const SCEV *StartVal = getSCEV(StartValueV);
  3325. // If StartVal = j.start - j.stride, we can use StartVal as the
  3326. // initial step of the addrec evolution.
  3327. if (StartVal == getMinusSCEV(AddRec->getOperand(0),
  3328. AddRec->getOperand(1))) {
  3329. // FIXME: For constant StartVal, we should be able to infer
  3330. // no-wrap flags.
  3331. const SCEV *PHISCEV =
  3332. getAddRecExpr(StartVal, AddRec->getOperand(1), L,
  3333. SCEV::FlagAnyWrap);
  3334. // Okay, for the entire analysis of this edge we assumed the PHI
  3335. // to be symbolic. We now need to go back and purge all of the
  3336. // entries for the scalars that use the symbolic expression.
  3337. ForgetSymbolicName(PN, SymbolicName);
  3338. ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
  3339. return PHISCEV;
  3340. }
  3341. }
  3342. }
  3343. }
  3344. }
  3345. // If the PHI has a single incoming value, follow that value, unless the
  3346. // PHI's incoming blocks are in a different loop, in which case doing so
  3347. // risks breaking LCSSA form. Instcombine would normally zap these, but
  3348. // it doesn't have DominatorTree information, so it may miss cases.
  3349. if (Value *V =
  3350. SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
  3351. if (LI->replacementPreservesLCSSAForm(PN, V))
  3352. return getSCEV(V);
  3353. // If it's not a loop phi, we can't handle it yet.
  3354. return getUnknown(PN);
  3355. }
  3356. /// createNodeForGEP - Expand GEP instructions into add and multiply
  3357. /// operations. This allows them to be analyzed by regular SCEV code.
  3358. ///
  3359. const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
  3360. Value *Base = GEP->getOperand(0);
  3361. // Don't attempt to analyze GEPs over unsized objects.
  3362. if (!Base->getType()->getPointerElementType()->isSized())
  3363. return getUnknown(GEP);
  3364. SmallVector<const SCEV *, 4> IndexExprs;
  3365. for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
  3366. IndexExprs.push_back(getSCEV(*Index));
  3367. return getGEPExpr(GEP->getSourceElementType(), getSCEV(Base), IndexExprs,
  3368. GEP->isInBounds());
  3369. }
  3370. /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
  3371. /// guaranteed to end in (at every loop iteration). It is, at the same time,
  3372. /// the minimum number of times S is divisible by 2. For example, given {4,+,8}
  3373. /// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S.
  3374. uint32_t
  3375. ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
  3376. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
  3377. return C->getValue()->getValue().countTrailingZeros();
  3378. if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
  3379. return std::min(GetMinTrailingZeros(T->getOperand()),
  3380. (uint32_t)getTypeSizeInBits(T->getType()));
  3381. if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
  3382. uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
  3383. return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
  3384. getTypeSizeInBits(E->getType()) : OpRes;
  3385. }
  3386. if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
  3387. uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
  3388. return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
  3389. getTypeSizeInBits(E->getType()) : OpRes;
  3390. }
  3391. if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
  3392. // The result is the min of all operands results.
  3393. uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
  3394. for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
  3395. MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
  3396. return MinOpRes;
  3397. }
  3398. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
  3399. // The result is the sum of all operands results.
  3400. uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
  3401. uint32_t BitWidth = getTypeSizeInBits(M->getType());
  3402. for (unsigned i = 1, e = M->getNumOperands();
  3403. SumOpRes != BitWidth && i != e; ++i)
  3404. SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
  3405. BitWidth);
  3406. return SumOpRes;
  3407. }
  3408. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
  3409. // The result is the min of all operands results.
  3410. uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
  3411. for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
  3412. MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
  3413. return MinOpRes;
  3414. }
  3415. if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
  3416. // The result is the min of all operands results.
  3417. uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
  3418. for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
  3419. MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
  3420. return MinOpRes;
  3421. }
  3422. if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
  3423. // The result is the min of all operands results.
  3424. uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
  3425. for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
  3426. MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
  3427. return MinOpRes;
  3428. }
  3429. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
  3430. // For a SCEVUnknown, ask ValueTracking.
  3431. unsigned BitWidth = getTypeSizeInBits(U->getType());
  3432. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
  3433. computeKnownBits(U->getValue(), Zeros, Ones,
  3434. F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
  3435. return Zeros.countTrailingOnes();
  3436. }
  3437. // SCEVUDivExpr
  3438. return 0;
  3439. }
  3440. /// GetRangeFromMetadata - Helper method to assign a range to V from
  3441. /// metadata present in the IR.
  3442. static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
  3443. if (Instruction *I = dyn_cast<Instruction>(V)) {
  3444. if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
  3445. ConstantRange TotalRange(
  3446. cast<IntegerType>(I->getType())->getBitWidth(), false);
  3447. unsigned NumRanges = MD->getNumOperands() / 2;
  3448. assert(NumRanges >= 1);
  3449. for (unsigned i = 0; i < NumRanges; ++i) {
  3450. ConstantInt *Lower =
  3451. mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0));
  3452. ConstantInt *Upper =
  3453. mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1));
  3454. ConstantRange Range(Lower->getValue(), Upper->getValue());
  3455. TotalRange = TotalRange.unionWith(Range);
  3456. }
  3457. return TotalRange;
  3458. }
  3459. }
  3460. return None;
  3461. }
  3462. /// getRange - Determine the range for a particular SCEV. If SignHint is
  3463. /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
  3464. /// with a "cleaner" unsigned (resp. signed) representation.
  3465. ///
  3466. ConstantRange
  3467. ScalarEvolution::getRange(const SCEV *S,
  3468. ScalarEvolution::RangeSignHint SignHint) {
  3469. DenseMap<const SCEV *, ConstantRange> &Cache =
  3470. SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
  3471. : SignedRanges;
  3472. // See if we've computed this range already.
  3473. DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
  3474. if (I != Cache.end())
  3475. return I->second;
  3476. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
  3477. return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
  3478. unsigned BitWidth = getTypeSizeInBits(S->getType());
  3479. ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
  3480. // If the value has known zeros, the maximum value will have those known zeros
  3481. // as well.
  3482. uint32_t TZ = GetMinTrailingZeros(S);
  3483. if (TZ != 0) {
  3484. if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
  3485. ConservativeResult =
  3486. ConstantRange(APInt::getMinValue(BitWidth),
  3487. APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
  3488. else
  3489. ConservativeResult = ConstantRange(
  3490. APInt::getSignedMinValue(BitWidth),
  3491. APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
  3492. }
  3493. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
  3494. ConstantRange X = getRange(Add->getOperand(0), SignHint);
  3495. for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
  3496. X = X.add(getRange(Add->getOperand(i), SignHint));
  3497. return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
  3498. }
  3499. if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
  3500. ConstantRange X = getRange(Mul->getOperand(0), SignHint);
  3501. for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
  3502. X = X.multiply(getRange(Mul->getOperand(i), SignHint));
  3503. return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
  3504. }
  3505. if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
  3506. ConstantRange X = getRange(SMax->getOperand(0), SignHint);
  3507. for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
  3508. X = X.smax(getRange(SMax->getOperand(i), SignHint));
  3509. return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
  3510. }
  3511. if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
  3512. ConstantRange X = getRange(UMax->getOperand(0), SignHint);
  3513. for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
  3514. X = X.umax(getRange(UMax->getOperand(i), SignHint));
  3515. return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
  3516. }
  3517. if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
  3518. ConstantRange X = getRange(UDiv->getLHS(), SignHint);
  3519. ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
  3520. return setRange(UDiv, SignHint,
  3521. ConservativeResult.intersectWith(X.udiv(Y)));
  3522. }
  3523. if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
  3524. ConstantRange X = getRange(ZExt->getOperand(), SignHint);
  3525. return setRange(ZExt, SignHint,
  3526. ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
  3527. }
  3528. if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
  3529. ConstantRange X = getRange(SExt->getOperand(), SignHint);
  3530. return setRange(SExt, SignHint,
  3531. ConservativeResult.intersectWith(X.signExtend(BitWidth)));
  3532. }
  3533. if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
  3534. ConstantRange X = getRange(Trunc->getOperand(), SignHint);
  3535. return setRange(Trunc, SignHint,
  3536. ConservativeResult.intersectWith(X.truncate(BitWidth)));
  3537. }
  3538. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
  3539. // If there's no unsigned wrap, the value will never be less than its
  3540. // initial value.
  3541. if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
  3542. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
  3543. if (!C->getValue()->isZero())
  3544. ConservativeResult =
  3545. ConservativeResult.intersectWith(
  3546. ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
  3547. // If there's no signed wrap, and all the operands have the same sign or
  3548. // zero, the value won't ever change sign.
  3549. if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
  3550. bool AllNonNeg = true;
  3551. bool AllNonPos = true;
  3552. for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
  3553. if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
  3554. if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
  3555. }
  3556. if (AllNonNeg)
  3557. ConservativeResult = ConservativeResult.intersectWith(
  3558. ConstantRange(APInt(BitWidth, 0),
  3559. APInt::getSignedMinValue(BitWidth)));
  3560. else if (AllNonPos)
  3561. ConservativeResult = ConservativeResult.intersectWith(
  3562. ConstantRange(APInt::getSignedMinValue(BitWidth),
  3563. APInt(BitWidth, 1)));
  3564. }
  3565. // TODO: non-affine addrec
  3566. if (AddRec->isAffine()) {
  3567. Type *Ty = AddRec->getType();
  3568. const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
  3569. if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
  3570. getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
  3571. // Check for overflow. This must be done with ConstantRange arithmetic
  3572. // because we could be called from within the ScalarEvolution overflow
  3573. // checking code.
  3574. MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
  3575. ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
  3576. ConstantRange ZExtMaxBECountRange =
  3577. MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
  3578. const SCEV *Start = AddRec->getStart();
  3579. const SCEV *Step = AddRec->getStepRecurrence(*this);
  3580. ConstantRange StepSRange = getSignedRange(Step);
  3581. ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
  3582. ConstantRange StartURange = getUnsignedRange(Start);
  3583. ConstantRange EndURange =
  3584. StartURange.add(MaxBECountRange.multiply(StepSRange));
  3585. // Check for unsigned overflow.
  3586. ConstantRange ZExtStartURange =
  3587. StartURange.zextOrTrunc(BitWidth * 2 + 1);
  3588. ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
  3589. if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
  3590. ZExtEndURange) {
  3591. APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
  3592. EndURange.getUnsignedMin());
  3593. APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
  3594. EndURange.getUnsignedMax());
  3595. bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
  3596. if (!IsFullRange)
  3597. ConservativeResult =
  3598. ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
  3599. }
  3600. ConstantRange StartSRange = getSignedRange(Start);
  3601. ConstantRange EndSRange =
  3602. StartSRange.add(MaxBECountRange.multiply(StepSRange));
  3603. // Check for signed overflow. This must be done with ConstantRange
  3604. // arithmetic because we could be called from within the ScalarEvolution
  3605. // overflow checking code.
  3606. ConstantRange SExtStartSRange =
  3607. StartSRange.sextOrTrunc(BitWidth * 2 + 1);
  3608. ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
  3609. if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
  3610. SExtEndSRange) {
  3611. APInt Min = APIntOps::smin(StartSRange.getSignedMin(),
  3612. EndSRange.getSignedMin());
  3613. APInt Max = APIntOps::smax(StartSRange.getSignedMax(),
  3614. EndSRange.getSignedMax());
  3615. bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
  3616. if (!IsFullRange)
  3617. ConservativeResult =
  3618. ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
  3619. }
  3620. }
  3621. }
  3622. return setRange(AddRec, SignHint, ConservativeResult);
  3623. }
  3624. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
  3625. // Check if the IR explicitly contains !range metadata.
  3626. Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
  3627. if (MDRange.hasValue())
  3628. ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
  3629. // Split here to avoid paying the compile-time cost of calling both
  3630. // computeKnownBits and ComputeNumSignBits. This restriction can be lifted
  3631. // if needed.
  3632. const DataLayout &DL = F->getParent()->getDataLayout();
  3633. if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
  3634. // For a SCEVUnknown, ask ValueTracking.
  3635. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
  3636. computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
  3637. if (Ones != ~Zeros + 1)
  3638. ConservativeResult =
  3639. ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
  3640. } else {
  3641. assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
  3642. "generalize as needed!");
  3643. unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
  3644. if (NS > 1)
  3645. ConservativeResult = ConservativeResult.intersectWith(
  3646. ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
  3647. APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
  3648. }
  3649. return setRange(U, SignHint, ConservativeResult);
  3650. }
  3651. return setRange(S, SignHint, ConservativeResult);
  3652. }
  3653. /// createSCEV - We know that there is no SCEV for the specified value.
  3654. /// Analyze the expression.
  3655. ///
  3656. const SCEV *ScalarEvolution::createSCEV(Value *V) {
  3657. if (!isSCEVable(V->getType()))
  3658. return getUnknown(V);
  3659. unsigned Opcode = Instruction::UserOp1;
  3660. if (Instruction *I = dyn_cast<Instruction>(V)) {
  3661. Opcode = I->getOpcode();
  3662. // Don't attempt to analyze instructions in blocks that aren't
  3663. // reachable. Such instructions don't matter, and they aren't required
  3664. // to obey basic rules for definitions dominating uses which this
  3665. // analysis depends on.
  3666. if (!DT->isReachableFromEntry(I->getParent()))
  3667. return getUnknown(V);
  3668. } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
  3669. Opcode = CE->getOpcode();
  3670. else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
  3671. return getConstant(CI);
  3672. else if (isa<ConstantPointerNull>(V))
  3673. return getConstant(V->getType(), 0);
  3674. else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
  3675. return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
  3676. else
  3677. return getUnknown(V);
  3678. Operator *U = cast<Operator>(V);
  3679. switch (Opcode) {
  3680. case Instruction::Add: {
  3681. // The simple thing to do would be to just call getSCEV on both operands
  3682. // and call getAddExpr with the result. However if we're looking at a
  3683. // bunch of things all added together, this can be quite inefficient,
  3684. // because it leads to N-1 getAddExpr calls for N ultimate operands.
  3685. // Instead, gather up all the operands and make a single getAddExpr call.
  3686. // LLVM IR canonical form means we need only traverse the left operands.
  3687. //
  3688. // Don't apply this instruction's NSW or NUW flags to the new
  3689. // expression. The instruction may be guarded by control flow that the
  3690. // no-wrap behavior depends on. Non-control-equivalent instructions can be
  3691. // mapped to the same SCEV expression, and it would be incorrect to transfer
  3692. // NSW/NUW semantics to those operations.
  3693. SmallVector<const SCEV *, 4> AddOps;
  3694. AddOps.push_back(getSCEV(U->getOperand(1)));
  3695. for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
  3696. unsigned Opcode = Op->getValueID() - Value::InstructionVal;
  3697. if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
  3698. break;
  3699. U = cast<Operator>(Op);
  3700. const SCEV *Op1 = getSCEV(U->getOperand(1));
  3701. if (Opcode == Instruction::Sub)
  3702. AddOps.push_back(getNegativeSCEV(Op1));
  3703. else
  3704. AddOps.push_back(Op1);
  3705. }
  3706. AddOps.push_back(getSCEV(U->getOperand(0)));
  3707. return getAddExpr(AddOps);
  3708. }
  3709. case Instruction::Mul: {
  3710. // Don't transfer NSW/NUW for the same reason as AddExpr.
  3711. SmallVector<const SCEV *, 4> MulOps;
  3712. MulOps.push_back(getSCEV(U->getOperand(1)));
  3713. for (Value *Op = U->getOperand(0);
  3714. Op->getValueID() == Instruction::Mul + Value::InstructionVal;
  3715. Op = U->getOperand(0)) {
  3716. U = cast<Operator>(Op);
  3717. MulOps.push_back(getSCEV(U->getOperand(1)));
  3718. }
  3719. MulOps.push_back(getSCEV(U->getOperand(0)));
  3720. return getMulExpr(MulOps);
  3721. }
  3722. case Instruction::UDiv:
  3723. return getUDivExpr(getSCEV(U->getOperand(0)),
  3724. getSCEV(U->getOperand(1)));
  3725. case Instruction::Sub:
  3726. return getMinusSCEV(getSCEV(U->getOperand(0)),
  3727. getSCEV(U->getOperand(1)));
  3728. case Instruction::And:
  3729. // For an expression like x&255 that merely masks off the high bits,
  3730. // use zext(trunc(x)) as the SCEV expression.
  3731. if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
  3732. if (CI->isNullValue())
  3733. return getSCEV(U->getOperand(1));
  3734. if (CI->isAllOnesValue())
  3735. return getSCEV(U->getOperand(0));
  3736. const APInt &A = CI->getValue();
  3737. // Instcombine's ShrinkDemandedConstant may strip bits out of
  3738. // constants, obscuring what would otherwise be a low-bits mask.
  3739. // Use computeKnownBits to compute what ShrinkDemandedConstant
  3740. // knew about to reconstruct a low-bits mask value.
  3741. unsigned LZ = A.countLeadingZeros();
  3742. unsigned TZ = A.countTrailingZeros();
  3743. unsigned BitWidth = A.getBitWidth();
  3744. APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
  3745. computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
  3746. F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
  3747. APInt EffectiveMask =
  3748. APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
  3749. if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
  3750. const SCEV *MulCount = getConstant(
  3751. ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
  3752. return getMulExpr(
  3753. getZeroExtendExpr(
  3754. getTruncateExpr(
  3755. getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
  3756. IntegerType::get(getContext(), BitWidth - LZ - TZ)),
  3757. U->getType()),
  3758. MulCount);
  3759. }
  3760. }
  3761. break;
  3762. case Instruction::Or:
  3763. // If the RHS of the Or is a constant, we may have something like:
  3764. // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
  3765. // optimizations will transparently handle this case.
  3766. //
  3767. // In order for this transformation to be safe, the LHS must be of the
  3768. // form X*(2^n) and the Or constant must be less than 2^n.
  3769. if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
  3770. const SCEV *LHS = getSCEV(U->getOperand(0));
  3771. const APInt &CIVal = CI->getValue();
  3772. if (GetMinTrailingZeros(LHS) >=
  3773. (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
  3774. // Build a plain add SCEV.
  3775. const SCEV *S = getAddExpr(LHS, getSCEV(CI));
  3776. // If the LHS of the add was an addrec and it has no-wrap flags,
  3777. // transfer the no-wrap flags, since an or won't introduce a wrap.
  3778. if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
  3779. const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
  3780. const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
  3781. OldAR->getNoWrapFlags());
  3782. }
  3783. return S;
  3784. }
  3785. }
  3786. break;
  3787. case Instruction::Xor:
  3788. if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
  3789. // If the RHS of the xor is a signbit, then this is just an add.
  3790. // Instcombine turns add of signbit into xor as a strength reduction step.
  3791. if (CI->getValue().isSignBit())
  3792. return getAddExpr(getSCEV(U->getOperand(0)),
  3793. getSCEV(U->getOperand(1)));
  3794. // If the RHS of xor is -1, then this is a not operation.
  3795. if (CI->isAllOnesValue())
  3796. return getNotSCEV(getSCEV(U->getOperand(0)));
  3797. // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
  3798. // This is a variant of the check for xor with -1, and it handles
  3799. // the case where instcombine has trimmed non-demanded bits out
  3800. // of an xor with -1.
  3801. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
  3802. if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
  3803. if (BO->getOpcode() == Instruction::And &&
  3804. LCI->getValue() == CI->getValue())
  3805. if (const SCEVZeroExtendExpr *Z =
  3806. dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
  3807. Type *UTy = U->getType();
  3808. const SCEV *Z0 = Z->getOperand();
  3809. Type *Z0Ty = Z0->getType();
  3810. unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
  3811. // If C is a low-bits mask, the zero extend is serving to
  3812. // mask off the high bits. Complement the operand and
  3813. // re-apply the zext.
  3814. if (APIntOps::isMask(Z0TySize, CI->getValue()))
  3815. return getZeroExtendExpr(getNotSCEV(Z0), UTy);
  3816. // If C is a single bit, it may be in the sign-bit position
  3817. // before the zero-extend. In this case, represent the xor
  3818. // using an add, which is equivalent, and re-apply the zext.
  3819. APInt Trunc = CI->getValue().trunc(Z0TySize);
  3820. if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
  3821. Trunc.isSignBit())
  3822. return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
  3823. UTy);
  3824. }
  3825. }
  3826. break;
  3827. case Instruction::Shl:
  3828. // Turn shift left of a constant amount into a multiply.
  3829. if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
  3830. uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
  3831. // If the shift count is not less than the bitwidth, the result of
  3832. // the shift is undefined. Don't try to analyze it, because the
  3833. // resolution chosen here may differ from the resolution chosen in
  3834. // other parts of the compiler.
  3835. if (SA->getValue().uge(BitWidth))
  3836. break;
  3837. Constant *X = ConstantInt::get(getContext(),
  3838. APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
  3839. return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
  3840. }
  3841. break;
  3842. case Instruction::LShr:
  3843. // Turn logical shift right of a constant into a unsigned divide.
  3844. if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
  3845. uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
  3846. // If the shift count is not less than the bitwidth, the result of
  3847. // the shift is undefined. Don't try to analyze it, because the
  3848. // resolution chosen here may differ from the resolution chosen in
  3849. // other parts of the compiler.
  3850. if (SA->getValue().uge(BitWidth))
  3851. break;
  3852. Constant *X = ConstantInt::get(getContext(),
  3853. APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
  3854. return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
  3855. }
  3856. break;
  3857. case Instruction::AShr:
  3858. // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
  3859. if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
  3860. if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
  3861. if (L->getOpcode() == Instruction::Shl &&
  3862. L->getOperand(1) == U->getOperand(1)) {
  3863. uint64_t BitWidth = getTypeSizeInBits(U->getType());
  3864. // If the shift count is not less than the bitwidth, the result of
  3865. // the shift is undefined. Don't try to analyze it, because the
  3866. // resolution chosen here may differ from the resolution chosen in
  3867. // other parts of the compiler.
  3868. if (CI->getValue().uge(BitWidth))
  3869. break;
  3870. uint64_t Amt = BitWidth - CI->getZExtValue();
  3871. if (Amt == BitWidth)
  3872. return getSCEV(L->getOperand(0)); // shift by zero --> noop
  3873. return
  3874. getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
  3875. IntegerType::get(getContext(),
  3876. Amt)),
  3877. U->getType());
  3878. }
  3879. break;
  3880. case Instruction::Trunc:
  3881. return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
  3882. case Instruction::ZExt:
  3883. return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
  3884. case Instruction::SExt:
  3885. return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
  3886. case Instruction::BitCast:
  3887. // BitCasts are no-op casts so we just eliminate the cast.
  3888. if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
  3889. return getSCEV(U->getOperand(0));
  3890. break;
  3891. // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
  3892. // lead to pointer expressions which cannot safely be expanded to GEPs,
  3893. // because ScalarEvolution doesn't respect the GEP aliasing rules when
  3894. // simplifying integer expressions.
  3895. case Instruction::GetElementPtr:
  3896. return createNodeForGEP(cast<GEPOperator>(U));
  3897. case Instruction::PHI:
  3898. return createNodeForPHI(cast<PHINode>(U));
  3899. case Instruction::Select:
  3900. // This could be a smax or umax that was lowered earlier.
  3901. // Try to recover it.
  3902. if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
  3903. Value *LHS = ICI->getOperand(0);
  3904. Value *RHS = ICI->getOperand(1);
  3905. switch (ICI->getPredicate()) {
  3906. case ICmpInst::ICMP_SLT:
  3907. case ICmpInst::ICMP_SLE:
  3908. std::swap(LHS, RHS);
  3909. // fall through
  3910. case ICmpInst::ICMP_SGT:
  3911. case ICmpInst::ICMP_SGE:
  3912. // a >s b ? a+x : b+x -> smax(a, b)+x
  3913. // a >s b ? b+x : a+x -> smin(a, b)+x
  3914. if (getTypeSizeInBits(LHS->getType()) <=
  3915. getTypeSizeInBits(U->getType())) {
  3916. const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
  3917. const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
  3918. const SCEV *LA = getSCEV(U->getOperand(1));
  3919. const SCEV *RA = getSCEV(U->getOperand(2));
  3920. const SCEV *LDiff = getMinusSCEV(LA, LS);
  3921. const SCEV *RDiff = getMinusSCEV(RA, RS);
  3922. if (LDiff == RDiff)
  3923. return getAddExpr(getSMaxExpr(LS, RS), LDiff);
  3924. LDiff = getMinusSCEV(LA, RS);
  3925. RDiff = getMinusSCEV(RA, LS);
  3926. if (LDiff == RDiff)
  3927. return getAddExpr(getSMinExpr(LS, RS), LDiff);
  3928. }
  3929. break;
  3930. case ICmpInst::ICMP_ULT:
  3931. case ICmpInst::ICMP_ULE:
  3932. std::swap(LHS, RHS);
  3933. // fall through
  3934. case ICmpInst::ICMP_UGT:
  3935. case ICmpInst::ICMP_UGE:
  3936. // a >u b ? a+x : b+x -> umax(a, b)+x
  3937. // a >u b ? b+x : a+x -> umin(a, b)+x
  3938. if (getTypeSizeInBits(LHS->getType()) <=
  3939. getTypeSizeInBits(U->getType())) {
  3940. const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
  3941. const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
  3942. const SCEV *LA = getSCEV(U->getOperand(1));
  3943. const SCEV *RA = getSCEV(U->getOperand(2));
  3944. const SCEV *LDiff = getMinusSCEV(LA, LS);
  3945. const SCEV *RDiff = getMinusSCEV(RA, RS);
  3946. if (LDiff == RDiff)
  3947. return getAddExpr(getUMaxExpr(LS, RS), LDiff);
  3948. LDiff = getMinusSCEV(LA, RS);
  3949. RDiff = getMinusSCEV(RA, LS);
  3950. if (LDiff == RDiff)
  3951. return getAddExpr(getUMinExpr(LS, RS), LDiff);
  3952. }
  3953. break;
  3954. case ICmpInst::ICMP_NE:
  3955. // n != 0 ? n+x : 1+x -> umax(n, 1)+x
  3956. if (getTypeSizeInBits(LHS->getType()) <=
  3957. getTypeSizeInBits(U->getType()) &&
  3958. isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
  3959. const SCEV *One = getConstant(U->getType(), 1);
  3960. const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
  3961. const SCEV *LA = getSCEV(U->getOperand(1));
  3962. const SCEV *RA = getSCEV(U->getOperand(2));
  3963. const SCEV *LDiff = getMinusSCEV(LA, LS);
  3964. const SCEV *RDiff = getMinusSCEV(RA, One);
  3965. if (LDiff == RDiff)
  3966. return getAddExpr(getUMaxExpr(One, LS), LDiff);
  3967. }
  3968. break;
  3969. case ICmpInst::ICMP_EQ:
  3970. // n == 0 ? 1+x : n+x -> umax(n, 1)+x
  3971. if (getTypeSizeInBits(LHS->getType()) <=
  3972. getTypeSizeInBits(U->getType()) &&
  3973. isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
  3974. const SCEV *One = getConstant(U->getType(), 1);
  3975. const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
  3976. const SCEV *LA = getSCEV(U->getOperand(1));
  3977. const SCEV *RA = getSCEV(U->getOperand(2));
  3978. const SCEV *LDiff = getMinusSCEV(LA, One);
  3979. const SCEV *RDiff = getMinusSCEV(RA, LS);
  3980. if (LDiff == RDiff)
  3981. return getAddExpr(getUMaxExpr(One, LS), LDiff);
  3982. }
  3983. break;
  3984. default:
  3985. break;
  3986. }
  3987. }
  3988. default: // We cannot analyze this expression.
  3989. break;
  3990. }
  3991. return getUnknown(V);
  3992. }
  3993. //===----------------------------------------------------------------------===//
  3994. // Iteration Count Computation Code
  3995. //
  3996. unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
  3997. if (BasicBlock *ExitingBB = L->getExitingBlock())
  3998. return getSmallConstantTripCount(L, ExitingBB);
  3999. // No trip count information for multiple exits.
  4000. return 0;
  4001. }
  4002. /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
  4003. /// normal unsigned value. Returns 0 if the trip count is unknown or not
  4004. /// constant. Will also return 0 if the maximum trip count is very large (>=
  4005. /// 2^32).
  4006. ///
  4007. /// This "trip count" assumes that control exits via ExitingBlock. More
  4008. /// precisely, it is the number of times that control may reach ExitingBlock
  4009. /// before taking the branch. For loops with multiple exits, it may not be the
  4010. /// number times that the loop header executes because the loop may exit
  4011. /// prematurely via another branch.
  4012. unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
  4013. BasicBlock *ExitingBlock) {
  4014. assert(ExitingBlock && "Must pass a non-null exiting block!");
  4015. assert(L->isLoopExiting(ExitingBlock) &&
  4016. "Exiting block must actually branch out of the loop!");
  4017. const SCEVConstant *ExitCount =
  4018. dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
  4019. if (!ExitCount)
  4020. return 0;
  4021. ConstantInt *ExitConst = ExitCount->getValue();
  4022. // Guard against huge trip counts.
  4023. if (ExitConst->getValue().getActiveBits() > 32)
  4024. return 0;
  4025. // In case of integer overflow, this returns 0, which is correct.
  4026. return ((unsigned)ExitConst->getZExtValue()) + 1;
  4027. }
  4028. unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
  4029. if (BasicBlock *ExitingBB = L->getExitingBlock())
  4030. return getSmallConstantTripMultiple(L, ExitingBB);
  4031. // No trip multiple information for multiple exits.
  4032. return 0;
  4033. }
  4034. /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
  4035. /// trip count of this loop as a normal unsigned value, if possible. This
  4036. /// means that the actual trip count is always a multiple of the returned
  4037. /// value (don't forget the trip count could very well be zero as well!).
  4038. ///
  4039. /// Returns 1 if the trip count is unknown or not guaranteed to be the
  4040. /// multiple of a constant (which is also the case if the trip count is simply
  4041. /// constant, use getSmallConstantTripCount for that case), Will also return 1
  4042. /// if the trip count is very large (>= 2^32).
  4043. ///
  4044. /// As explained in the comments for getSmallConstantTripCount, this assumes
  4045. /// that control exits the loop via ExitingBlock.
  4046. unsigned
  4047. ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
  4048. BasicBlock *ExitingBlock) {
  4049. assert(ExitingBlock && "Must pass a non-null exiting block!");
  4050. assert(L->isLoopExiting(ExitingBlock) &&
  4051. "Exiting block must actually branch out of the loop!");
  4052. const SCEV *ExitCount = getExitCount(L, ExitingBlock);
  4053. if (ExitCount == getCouldNotCompute())
  4054. return 1;
  4055. // Get the trip count from the BE count by adding 1.
  4056. const SCEV *TCMul = getAddExpr(ExitCount,
  4057. getConstant(ExitCount->getType(), 1));
  4058. // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
  4059. // to factor simple cases.
  4060. if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
  4061. TCMul = Mul->getOperand(0);
  4062. const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
  4063. if (!MulC)
  4064. return 1;
  4065. ConstantInt *Result = MulC->getValue();
  4066. // Guard against huge trip counts (this requires checking
  4067. // for zero to handle the case where the trip count == -1 and the
  4068. // addition wraps).
  4069. if (!Result || Result->getValue().getActiveBits() > 32 ||
  4070. Result->getValue().getActiveBits() == 0)
  4071. return 1;
  4072. return (unsigned)Result->getZExtValue();
  4073. }
  4074. // getExitCount - Get the expression for the number of loop iterations for which
  4075. // this loop is guaranteed not to exit via ExitingBlock. Otherwise return
  4076. // SCEVCouldNotCompute.
  4077. const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
  4078. return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
  4079. }
  4080. /// getBackedgeTakenCount - If the specified loop has a predictable
  4081. /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
  4082. /// object. The backedge-taken count is the number of times the loop header
  4083. /// will be branched to from within the loop. This is one less than the
  4084. /// trip count of the loop, since it doesn't count the first iteration,
  4085. /// when the header is branched to from outside the loop.
  4086. ///
  4087. /// Note that it is not valid to call this method on a loop without a
  4088. /// loop-invariant backedge-taken count (see
  4089. /// hasLoopInvariantBackedgeTakenCount).
  4090. ///
  4091. const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
  4092. return getBackedgeTakenInfo(L).getExact(this);
  4093. }
  4094. /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
  4095. /// return the least SCEV value that is known never to be less than the
  4096. /// actual backedge taken count.
  4097. const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
  4098. return getBackedgeTakenInfo(L).getMax(this);
  4099. }
  4100. /// PushLoopPHIs - Push PHI nodes in the header of the given loop
  4101. /// onto the given Worklist.
  4102. static void
  4103. PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
  4104. BasicBlock *Header = L->getHeader();
  4105. // Push all Loop-header PHIs onto the Worklist stack.
  4106. for (BasicBlock::iterator I = Header->begin();
  4107. PHINode *PN = dyn_cast<PHINode>(I); ++I)
  4108. Worklist.push_back(PN);
  4109. }
  4110. const ScalarEvolution::BackedgeTakenInfo &
  4111. ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
  4112. // Initially insert an invalid entry for this loop. If the insertion
  4113. // succeeds, proceed to actually compute a backedge-taken count and
  4114. // update the value. The temporary CouldNotCompute value tells SCEV
  4115. // code elsewhere that it shouldn't attempt to request a new
  4116. // backedge-taken count, which could result in infinite recursion.
  4117. std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
  4118. BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
  4119. if (!Pair.second)
  4120. return Pair.first->second;
  4121. // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
  4122. // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
  4123. // must be cleared in this scope.
  4124. BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
  4125. if (Result.getExact(this) != getCouldNotCompute()) {
  4126. assert(isLoopInvariant(Result.getExact(this), L) &&
  4127. isLoopInvariant(Result.getMax(this), L) &&
  4128. "Computed backedge-taken count isn't loop invariant for loop!");
  4129. ++NumTripCountsComputed;
  4130. }
  4131. else if (Result.getMax(this) == getCouldNotCompute() &&
  4132. isa<PHINode>(L->getHeader()->begin())) {
  4133. // Only count loops that have phi nodes as not being computable.
  4134. ++NumTripCountsNotComputed;
  4135. }
  4136. // Now that we know more about the trip count for this loop, forget any
  4137. // existing SCEV values for PHI nodes in this loop since they are only
  4138. // conservative estimates made without the benefit of trip count
  4139. // information. This is similar to the code in forgetLoop, except that
  4140. // it handles SCEVUnknown PHI nodes specially.
  4141. if (Result.hasAnyInfo()) {
  4142. SmallVector<Instruction *, 16> Worklist;
  4143. PushLoopPHIs(L, Worklist);
  4144. SmallPtrSet<Instruction *, 8> Visited;
  4145. while (!Worklist.empty()) {
  4146. Instruction *I = Worklist.pop_back_val();
  4147. if (!Visited.insert(I).second)
  4148. continue;
  4149. ValueExprMapType::iterator It =
  4150. ValueExprMap.find_as(static_cast<Value *>(I));
  4151. if (It != ValueExprMap.end()) {
  4152. const SCEV *Old = It->second;
  4153. // SCEVUnknown for a PHI either means that it has an unrecognized
  4154. // structure, or it's a PHI that's in the progress of being computed
  4155. // by createNodeForPHI. In the former case, additional loop trip
  4156. // count information isn't going to change anything. In the later
  4157. // case, createNodeForPHI will perform the necessary updates on its
  4158. // own when it gets to that point.
  4159. if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
  4160. forgetMemoizedResults(Old);
  4161. ValueExprMap.erase(It);
  4162. }
  4163. if (PHINode *PN = dyn_cast<PHINode>(I))
  4164. ConstantEvolutionLoopExitValue.erase(PN);
  4165. }
  4166. PushDefUseChildren(I, Worklist);
  4167. }
  4168. }
  4169. // Re-lookup the insert position, since the call to
  4170. // ComputeBackedgeTakenCount above could result in a
  4171. // recusive call to getBackedgeTakenInfo (on a different
  4172. // loop), which would invalidate the iterator computed
  4173. // earlier.
  4174. return BackedgeTakenCounts.find(L)->second = Result;
  4175. }
  4176. /// forgetLoop - This method should be called by the client when it has
  4177. /// changed a loop in a way that may effect ScalarEvolution's ability to
  4178. /// compute a trip count, or if the loop is deleted.
  4179. void ScalarEvolution::forgetLoop(const Loop *L) {
  4180. // Drop any stored trip count value.
  4181. DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
  4182. BackedgeTakenCounts.find(L);
  4183. if (BTCPos != BackedgeTakenCounts.end()) {
  4184. BTCPos->second.clear();
  4185. BackedgeTakenCounts.erase(BTCPos);
  4186. }
  4187. // Drop information about expressions based on loop-header PHIs.
  4188. SmallVector<Instruction *, 16> Worklist;
  4189. PushLoopPHIs(L, Worklist);
  4190. SmallPtrSet<Instruction *, 8> Visited;
  4191. while (!Worklist.empty()) {
  4192. Instruction *I = Worklist.pop_back_val();
  4193. if (!Visited.insert(I).second)
  4194. continue;
  4195. ValueExprMapType::iterator It =
  4196. ValueExprMap.find_as(static_cast<Value *>(I));
  4197. if (It != ValueExprMap.end()) {
  4198. forgetMemoizedResults(It->second);
  4199. ValueExprMap.erase(It);
  4200. if (PHINode *PN = dyn_cast<PHINode>(I))
  4201. ConstantEvolutionLoopExitValue.erase(PN);
  4202. }
  4203. PushDefUseChildren(I, Worklist);
  4204. }
  4205. // Forget all contained loops too, to avoid dangling entries in the
  4206. // ValuesAtScopes map.
  4207. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
  4208. forgetLoop(*I);
  4209. }
  4210. /// forgetValue - This method should be called by the client when it has
  4211. /// changed a value in a way that may effect its value, or which may
  4212. /// disconnect it from a def-use chain linking it to a loop.
  4213. void ScalarEvolution::forgetValue(Value *V) {
  4214. Instruction *I = dyn_cast<Instruction>(V);
  4215. if (!I) return;
  4216. // Drop information about expressions based on loop-header PHIs.
  4217. SmallVector<Instruction *, 16> Worklist;
  4218. Worklist.push_back(I);
  4219. SmallPtrSet<Instruction *, 8> Visited;
  4220. while (!Worklist.empty()) {
  4221. I = Worklist.pop_back_val();
  4222. if (!Visited.insert(I).second)
  4223. continue;
  4224. ValueExprMapType::iterator It =
  4225. ValueExprMap.find_as(static_cast<Value *>(I));
  4226. if (It != ValueExprMap.end()) {
  4227. forgetMemoizedResults(It->second);
  4228. ValueExprMap.erase(It);
  4229. if (PHINode *PN = dyn_cast<PHINode>(I))
  4230. ConstantEvolutionLoopExitValue.erase(PN);
  4231. }
  4232. PushDefUseChildren(I, Worklist);
  4233. }
  4234. }
  4235. /// getExact - Get the exact loop backedge taken count considering all loop
  4236. /// exits. A computable result can only be return for loops with a single exit.
  4237. /// Returning the minimum taken count among all exits is incorrect because one
  4238. /// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
  4239. /// the limit of each loop test is never skipped. This is a valid assumption as
  4240. /// long as the loop exits via that test. For precise results, it is the
  4241. /// caller's responsibility to specify the relevant loop exit using
  4242. /// getExact(ExitingBlock, SE).
  4243. const SCEV *
  4244. ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
  4245. // If any exits were not computable, the loop is not computable.
  4246. if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
  4247. // We need exactly one computable exit.
  4248. if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
  4249. assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
  4250. const SCEV *BECount = nullptr;
  4251. for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
  4252. ENT != nullptr; ENT = ENT->getNextExit()) {
  4253. assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
  4254. if (!BECount)
  4255. BECount = ENT->ExactNotTaken;
  4256. else if (BECount != ENT->ExactNotTaken)
  4257. return SE->getCouldNotCompute();
  4258. }
  4259. assert(BECount && "Invalid not taken count for loop exit");
  4260. return BECount;
  4261. }
  4262. /// getExact - Get the exact not taken count for this loop exit.
  4263. const SCEV *
  4264. ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
  4265. ScalarEvolution *SE) const {
  4266. for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
  4267. ENT != nullptr; ENT = ENT->getNextExit()) {
  4268. if (ENT->ExitingBlock == ExitingBlock)
  4269. return ENT->ExactNotTaken;
  4270. }
  4271. return SE->getCouldNotCompute();
  4272. }
  4273. /// getMax - Get the max backedge taken count for the loop.
  4274. const SCEV *
  4275. ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
  4276. return Max ? Max : SE->getCouldNotCompute();
  4277. }
  4278. bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
  4279. ScalarEvolution *SE) const {
  4280. if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
  4281. return true;
  4282. if (!ExitNotTaken.ExitingBlock)
  4283. return false;
  4284. for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
  4285. ENT != nullptr; ENT = ENT->getNextExit()) {
  4286. if (ENT->ExactNotTaken != SE->getCouldNotCompute()
  4287. && SE->hasOperand(ENT->ExactNotTaken, S)) {
  4288. return true;
  4289. }
  4290. }
  4291. return false;
  4292. }
  4293. /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
  4294. /// computable exit into a persistent ExitNotTakenInfo array.
  4295. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
  4296. SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
  4297. bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
  4298. if (!Complete)
  4299. ExitNotTaken.setIncomplete();
  4300. unsigned NumExits = ExitCounts.size();
  4301. if (NumExits == 0) return;
  4302. ExitNotTaken.ExitingBlock = ExitCounts[0].first;
  4303. ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
  4304. if (NumExits == 1) return;
  4305. // Handle the rare case of multiple computable exits.
  4306. ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
  4307. ExitNotTakenInfo *PrevENT = &ExitNotTaken;
  4308. for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
  4309. PrevENT->setNextExit(ENT);
  4310. ENT->ExitingBlock = ExitCounts[i].first;
  4311. ENT->ExactNotTaken = ExitCounts[i].second;
  4312. }
  4313. }
  4314. /// clear - Invalidate this result and free the ExitNotTakenInfo array.
  4315. void ScalarEvolution::BackedgeTakenInfo::clear() {
  4316. ExitNotTaken.ExitingBlock = nullptr;
  4317. ExitNotTaken.ExactNotTaken = nullptr;
  4318. delete[] ExitNotTaken.getNextExit();
  4319. }
  4320. /// ComputeBackedgeTakenCount - Compute the number of times the backedge
  4321. /// of the specified loop will execute.
  4322. ScalarEvolution::BackedgeTakenInfo
  4323. ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
  4324. SmallVector<BasicBlock *, 8> ExitingBlocks;
  4325. L->getExitingBlocks(ExitingBlocks);
  4326. SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
  4327. bool CouldComputeBECount = true;
  4328. BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
  4329. const SCEV *MustExitMaxBECount = nullptr;
  4330. const SCEV *MayExitMaxBECount = nullptr;
  4331. // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
  4332. // and compute maxBECount.
  4333. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
  4334. BasicBlock *ExitBB = ExitingBlocks[i];
  4335. ExitLimit EL = ComputeExitLimit(L, ExitBB);
  4336. // 1. For each exit that can be computed, add an entry to ExitCounts.
  4337. // CouldComputeBECount is true only if all exits can be computed.
  4338. if (EL.Exact == getCouldNotCompute())
  4339. // We couldn't compute an exact value for this exit, so
  4340. // we won't be able to compute an exact value for the loop.
  4341. CouldComputeBECount = false;
  4342. else
  4343. ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
  4344. // 2. Derive the loop's MaxBECount from each exit's max number of
  4345. // non-exiting iterations. Partition the loop exits into two kinds:
  4346. // LoopMustExits and LoopMayExits.
  4347. //
  4348. // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
  4349. // is a LoopMayExit. If any computable LoopMustExit is found, then
  4350. // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
  4351. // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
  4352. // considered greater than any computable EL.Max.
  4353. if (EL.Max != getCouldNotCompute() && Latch &&
  4354. DT->dominates(ExitBB, Latch)) {
  4355. if (!MustExitMaxBECount)
  4356. MustExitMaxBECount = EL.Max;
  4357. else {
  4358. MustExitMaxBECount =
  4359. getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
  4360. }
  4361. } else if (MayExitMaxBECount != getCouldNotCompute()) {
  4362. if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
  4363. MayExitMaxBECount = EL.Max;
  4364. else {
  4365. MayExitMaxBECount =
  4366. getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
  4367. }
  4368. }
  4369. }
  4370. const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
  4371. (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
  4372. return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
  4373. }
  4374. /// ComputeExitLimit - Compute the number of times the backedge of the specified
  4375. /// loop will execute if it exits via the specified block.
  4376. ScalarEvolution::ExitLimit
  4377. ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
  4378. // Okay, we've chosen an exiting block. See what condition causes us to
  4379. // exit at this block and remember the exit block and whether all other targets
  4380. // lead to the loop header.
  4381. bool MustExecuteLoopHeader = true;
  4382. BasicBlock *Exit = nullptr;
  4383. for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
  4384. SI != SE; ++SI)
  4385. if (!L->contains(*SI)) {
  4386. if (Exit) // Multiple exit successors.
  4387. return getCouldNotCompute();
  4388. Exit = *SI;
  4389. } else if (*SI != L->getHeader()) {
  4390. MustExecuteLoopHeader = false;
  4391. }
  4392. // At this point, we know we have a conditional branch that determines whether
  4393. // the loop is exited. However, we don't know if the branch is executed each
  4394. // time through the loop. If not, then the execution count of the branch will
  4395. // not be equal to the trip count of the loop.
  4396. //
  4397. // Currently we check for this by checking to see if the Exit branch goes to
  4398. // the loop header. If so, we know it will always execute the same number of
  4399. // times as the loop. We also handle the case where the exit block *is* the
  4400. // loop header. This is common for un-rotated loops.
  4401. //
  4402. // If both of those tests fail, walk up the unique predecessor chain to the
  4403. // header, stopping if there is an edge that doesn't exit the loop. If the
  4404. // header is reached, the execution count of the branch will be equal to the
  4405. // trip count of the loop.
  4406. //
  4407. // More extensive analysis could be done to handle more cases here.
  4408. //
  4409. if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
  4410. // The simple checks failed, try climbing the unique predecessor chain
  4411. // up to the header.
  4412. bool Ok = false;
  4413. for (BasicBlock *BB = ExitingBlock; BB; ) {
  4414. BasicBlock *Pred = BB->getUniquePredecessor();
  4415. if (!Pred)
  4416. return getCouldNotCompute();
  4417. TerminatorInst *PredTerm = Pred->getTerminator();
  4418. for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
  4419. BasicBlock *PredSucc = PredTerm->getSuccessor(i);
  4420. if (PredSucc == BB)
  4421. continue;
  4422. // If the predecessor has a successor that isn't BB and isn't
  4423. // outside the loop, assume the worst.
  4424. if (L->contains(PredSucc))
  4425. return getCouldNotCompute();
  4426. }
  4427. if (Pred == L->getHeader()) {
  4428. Ok = true;
  4429. break;
  4430. }
  4431. BB = Pred;
  4432. }
  4433. if (!Ok)
  4434. return getCouldNotCompute();
  4435. }
  4436. bool IsOnlyExit = (L->getExitingBlock() != nullptr);
  4437. TerminatorInst *Term = ExitingBlock->getTerminator();
  4438. if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
  4439. assert(BI->isConditional() && "If unconditional, it can't be in loop!");
  4440. // Proceed to the next level to examine the exit condition expression.
  4441. return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
  4442. BI->getSuccessor(1),
  4443. /*ControlsExit=*/IsOnlyExit);
  4444. }
  4445. if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
  4446. return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
  4447. /*ControlsExit=*/IsOnlyExit);
  4448. return getCouldNotCompute();
  4449. }
  4450. /// ComputeExitLimitFromCond - Compute the number of times the
  4451. /// backedge of the specified loop will execute if its exit condition
  4452. /// were a conditional branch of ExitCond, TBB, and FBB.
  4453. ///
  4454. /// @param ControlsExit is true if ExitCond directly controls the exit
  4455. /// branch. In this case, we can assume that the loop exits only if the
  4456. /// condition is true and can infer that failing to meet the condition prior to
  4457. /// integer wraparound results in undefined behavior.
  4458. ScalarEvolution::ExitLimit
  4459. ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
  4460. Value *ExitCond,
  4461. BasicBlock *TBB,
  4462. BasicBlock *FBB,
  4463. bool ControlsExit) {
  4464. // Check if the controlling expression for this loop is an And or Or.
  4465. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
  4466. if (BO->getOpcode() == Instruction::And) {
  4467. // Recurse on the operands of the and.
  4468. bool EitherMayExit = L->contains(TBB);
  4469. ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
  4470. ControlsExit && !EitherMayExit);
  4471. ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
  4472. ControlsExit && !EitherMayExit);
  4473. const SCEV *BECount = getCouldNotCompute();
  4474. const SCEV *MaxBECount = getCouldNotCompute();
  4475. if (EitherMayExit) {
  4476. // Both conditions must be true for the loop to continue executing.
  4477. // Choose the less conservative count.
  4478. if (EL0.Exact == getCouldNotCompute() ||
  4479. EL1.Exact == getCouldNotCompute())
  4480. BECount = getCouldNotCompute();
  4481. else
  4482. BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
  4483. if (EL0.Max == getCouldNotCompute())
  4484. MaxBECount = EL1.Max;
  4485. else if (EL1.Max == getCouldNotCompute())
  4486. MaxBECount = EL0.Max;
  4487. else
  4488. MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
  4489. } else {
  4490. // Both conditions must be true at the same time for the loop to exit.
  4491. // For now, be conservative.
  4492. assert(L->contains(FBB) && "Loop block has no successor in loop!");
  4493. if (EL0.Max == EL1.Max)
  4494. MaxBECount = EL0.Max;
  4495. if (EL0.Exact == EL1.Exact)
  4496. BECount = EL0.Exact;
  4497. }
  4498. return ExitLimit(BECount, MaxBECount);
  4499. }
  4500. if (BO->getOpcode() == Instruction::Or) {
  4501. // Recurse on the operands of the or.
  4502. bool EitherMayExit = L->contains(FBB);
  4503. ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
  4504. ControlsExit && !EitherMayExit);
  4505. ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
  4506. ControlsExit && !EitherMayExit);
  4507. const SCEV *BECount = getCouldNotCompute();
  4508. const SCEV *MaxBECount = getCouldNotCompute();
  4509. if (EitherMayExit) {
  4510. // Both conditions must be false for the loop to continue executing.
  4511. // Choose the less conservative count.
  4512. if (EL0.Exact == getCouldNotCompute() ||
  4513. EL1.Exact == getCouldNotCompute())
  4514. BECount = getCouldNotCompute();
  4515. else
  4516. BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
  4517. if (EL0.Max == getCouldNotCompute())
  4518. MaxBECount = EL1.Max;
  4519. else if (EL1.Max == getCouldNotCompute())
  4520. MaxBECount = EL0.Max;
  4521. else
  4522. MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
  4523. } else {
  4524. // Both conditions must be false at the same time for the loop to exit.
  4525. // For now, be conservative.
  4526. assert(L->contains(TBB) && "Loop block has no successor in loop!");
  4527. if (EL0.Max == EL1.Max)
  4528. MaxBECount = EL0.Max;
  4529. if (EL0.Exact == EL1.Exact)
  4530. BECount = EL0.Exact;
  4531. }
  4532. return ExitLimit(BECount, MaxBECount);
  4533. }
  4534. }
  4535. // With an icmp, it may be feasible to compute an exact backedge-taken count.
  4536. // Proceed to the next level to examine the icmp.
  4537. if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
  4538. return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
  4539. // Check for a constant condition. These are normally stripped out by
  4540. // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
  4541. // preserve the CFG and is temporarily leaving constant conditions
  4542. // in place.
  4543. if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
  4544. if (L->contains(FBB) == !CI->getZExtValue())
  4545. // The backedge is always taken.
  4546. return getCouldNotCompute();
  4547. else
  4548. // The backedge is never taken.
  4549. return getConstant(CI->getType(), 0);
  4550. }
  4551. // If it's not an integer or pointer comparison then compute it the hard way.
  4552. return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
  4553. }
  4554. /// ComputeExitLimitFromICmp - Compute the number of times the
  4555. /// backedge of the specified loop will execute if its exit condition
  4556. /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
  4557. ScalarEvolution::ExitLimit
  4558. ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
  4559. ICmpInst *ExitCond,
  4560. BasicBlock *TBB,
  4561. BasicBlock *FBB,
  4562. bool ControlsExit) {
  4563. // If the condition was exit on true, convert the condition to exit on false
  4564. ICmpInst::Predicate Cond;
  4565. if (!L->contains(FBB))
  4566. Cond = ExitCond->getPredicate();
  4567. else
  4568. Cond = ExitCond->getInversePredicate();
  4569. // Handle common loops like: for (X = "string"; *X; ++X)
  4570. if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
  4571. if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
  4572. ExitLimit ItCnt =
  4573. ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
  4574. if (ItCnt.hasAnyInfo())
  4575. return ItCnt;
  4576. }
  4577. const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
  4578. const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
  4579. // Try to evaluate any dependencies out of the loop.
  4580. LHS = getSCEVAtScope(LHS, L);
  4581. RHS = getSCEVAtScope(RHS, L);
  4582. // At this point, we would like to compute how many iterations of the
  4583. // loop the predicate will return true for these inputs.
  4584. if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
  4585. // If there is a loop-invariant, force it into the RHS.
  4586. std::swap(LHS, RHS);
  4587. Cond = ICmpInst::getSwappedPredicate(Cond);
  4588. }
  4589. // Simplify the operands before analyzing them.
  4590. (void)SimplifyICmpOperands(Cond, LHS, RHS);
  4591. // If we have a comparison of a chrec against a constant, try to use value
  4592. // ranges to answer this query.
  4593. if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
  4594. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
  4595. if (AddRec->getLoop() == L) {
  4596. // Form the constant range.
  4597. ConstantRange CompRange(
  4598. ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
  4599. const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
  4600. if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
  4601. }
  4602. switch (Cond) {
  4603. case ICmpInst::ICMP_NE: { // while (X != Y)
  4604. // Convert to: while (X-Y != 0)
  4605. ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
  4606. if (EL.hasAnyInfo()) return EL;
  4607. break;
  4608. }
  4609. case ICmpInst::ICMP_EQ: { // while (X == Y)
  4610. // Convert to: while (X-Y == 0)
  4611. ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
  4612. if (EL.hasAnyInfo()) return EL;
  4613. break;
  4614. }
  4615. case ICmpInst::ICMP_SLT:
  4616. case ICmpInst::ICMP_ULT: { // while (X < Y)
  4617. bool IsSigned = Cond == ICmpInst::ICMP_SLT;
  4618. ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
  4619. if (EL.hasAnyInfo()) return EL;
  4620. break;
  4621. }
  4622. case ICmpInst::ICMP_SGT:
  4623. case ICmpInst::ICMP_UGT: { // while (X > Y)
  4624. bool IsSigned = Cond == ICmpInst::ICMP_SGT;
  4625. ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
  4626. if (EL.hasAnyInfo()) return EL;
  4627. break;
  4628. }
  4629. default:
  4630. #if 0
  4631. dbgs() << "ComputeBackedgeTakenCount ";
  4632. if (ExitCond->getOperand(0)->getType()->isUnsigned())
  4633. dbgs() << "[unsigned] ";
  4634. dbgs() << *LHS << " "
  4635. << Instruction::getOpcodeName(Instruction::ICmp)
  4636. << " " << *RHS << "\n";
  4637. #endif
  4638. break;
  4639. }
  4640. return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
  4641. }
  4642. ScalarEvolution::ExitLimit
  4643. ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
  4644. SwitchInst *Switch,
  4645. BasicBlock *ExitingBlock,
  4646. bool ControlsExit) {
  4647. assert(!L->contains(ExitingBlock) && "Not an exiting block!");
  4648. // Give up if the exit is the default dest of a switch.
  4649. if (Switch->getDefaultDest() == ExitingBlock)
  4650. return getCouldNotCompute();
  4651. assert(L->contains(Switch->getDefaultDest()) &&
  4652. "Default case must not exit the loop!");
  4653. const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
  4654. const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
  4655. // while (X != Y) --> while (X-Y != 0)
  4656. ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
  4657. if (EL.hasAnyInfo())
  4658. return EL;
  4659. return getCouldNotCompute();
  4660. }
  4661. static ConstantInt *
  4662. EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
  4663. ScalarEvolution &SE) {
  4664. const SCEV *InVal = SE.getConstant(C);
  4665. const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
  4666. assert(isa<SCEVConstant>(Val) &&
  4667. "Evaluation of SCEV at constant didn't fold correctly?");
  4668. return cast<SCEVConstant>(Val)->getValue();
  4669. }
  4670. /// ComputeLoadConstantCompareExitLimit - Given an exit condition of
  4671. /// 'icmp op load X, cst', try to see if we can compute the backedge
  4672. /// execution count.
  4673. ScalarEvolution::ExitLimit
  4674. ScalarEvolution::ComputeLoadConstantCompareExitLimit(
  4675. LoadInst *LI,
  4676. Constant *RHS,
  4677. const Loop *L,
  4678. ICmpInst::Predicate predicate) {
  4679. if (LI->isVolatile()) return getCouldNotCompute();
  4680. // Check to see if the loaded pointer is a getelementptr of a global.
  4681. // TODO: Use SCEV instead of manually grubbing with GEPs.
  4682. GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
  4683. if (!GEP) return getCouldNotCompute();
  4684. // Make sure that it is really a constant global we are gepping, with an
  4685. // initializer, and make sure the first IDX is really 0.
  4686. GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
  4687. if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
  4688. GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
  4689. !cast<Constant>(GEP->getOperand(1))->isNullValue())
  4690. return getCouldNotCompute();
  4691. // Okay, we allow one non-constant index into the GEP instruction.
  4692. Value *VarIdx = nullptr;
  4693. std::vector<Constant*> Indexes;
  4694. unsigned VarIdxNum = 0;
  4695. for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
  4696. if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
  4697. Indexes.push_back(CI);
  4698. } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
  4699. if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
  4700. VarIdx = GEP->getOperand(i);
  4701. VarIdxNum = i-2;
  4702. Indexes.push_back(nullptr);
  4703. }
  4704. // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
  4705. if (!VarIdx)
  4706. return getCouldNotCompute();
  4707. // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
  4708. // Check to see if X is a loop variant variable value now.
  4709. const SCEV *Idx = getSCEV(VarIdx);
  4710. Idx = getSCEVAtScope(Idx, L);
  4711. // We can only recognize very limited forms of loop index expressions, in
  4712. // particular, only affine AddRec's like {C1,+,C2}.
  4713. const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
  4714. if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
  4715. !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
  4716. !isa<SCEVConstant>(IdxExpr->getOperand(1)))
  4717. return getCouldNotCompute();
  4718. unsigned MaxSteps = MaxBruteForceIterations;
  4719. for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
  4720. ConstantInt *ItCst = ConstantInt::get(
  4721. cast<IntegerType>(IdxExpr->getType()), IterationNum);
  4722. ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
  4723. // Form the GEP offset.
  4724. Indexes[VarIdxNum] = Val;
  4725. Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
  4726. Indexes);
  4727. if (!Result) break; // Cannot compute!
  4728. // Evaluate the condition for this iteration.
  4729. Result = ConstantExpr::getICmp(predicate, Result, RHS);
  4730. if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
  4731. if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
  4732. #if 0
  4733. dbgs() << "\n***\n*** Computed loop count " << *ItCst
  4734. << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
  4735. << "***\n";
  4736. #endif
  4737. ++NumArrayLenItCounts;
  4738. return getConstant(ItCst); // Found terminating iteration!
  4739. }
  4740. }
  4741. return getCouldNotCompute();
  4742. }
  4743. /// CanConstantFold - Return true if we can constant fold an instruction of the
  4744. /// specified type, assuming that all operands were constants.
  4745. static bool CanConstantFold(const Instruction *I) {
  4746. if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
  4747. isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
  4748. isa<LoadInst>(I))
  4749. return true;
  4750. if (const CallInst *CI = dyn_cast<CallInst>(I))
  4751. if (const Function *F = CI->getCalledFunction())
  4752. return canConstantFoldCallTo(F);
  4753. return false;
  4754. }
  4755. /// Determine whether this instruction can constant evolve within this loop
  4756. /// assuming its operands can all constant evolve.
  4757. static bool canConstantEvolve(Instruction *I, const Loop *L) {
  4758. // An instruction outside of the loop can't be derived from a loop PHI.
  4759. if (!L->contains(I)) return false;
  4760. if (isa<PHINode>(I)) {
  4761. // We don't currently keep track of the control flow needed to evaluate
  4762. // PHIs, so we cannot handle PHIs inside of loops.
  4763. return L->getHeader() == I->getParent();
  4764. }
  4765. // If we won't be able to constant fold this expression even if the operands
  4766. // are constants, bail early.
  4767. return CanConstantFold(I);
  4768. }
  4769. /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
  4770. /// recursing through each instruction operand until reaching a loop header phi.
  4771. static PHINode *
  4772. getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
  4773. DenseMap<Instruction *, PHINode *> &PHIMap) {
  4774. // Otherwise, we can evaluate this instruction if all of its operands are
  4775. // constant or derived from a PHI node themselves.
  4776. PHINode *PHI = nullptr;
  4777. for (Instruction::op_iterator OpI = UseInst->op_begin(),
  4778. OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
  4779. if (isa<Constant>(*OpI)) continue;
  4780. Instruction *OpInst = dyn_cast<Instruction>(*OpI);
  4781. if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
  4782. PHINode *P = dyn_cast<PHINode>(OpInst);
  4783. if (!P)
  4784. // If this operand is already visited, reuse the prior result.
  4785. // We may have P != PHI if this is the deepest point at which the
  4786. // inconsistent paths meet.
  4787. P = PHIMap.lookup(OpInst);
  4788. if (!P) {
  4789. // Recurse and memoize the results, whether a phi is found or not.
  4790. // This recursive call invalidates pointers into PHIMap.
  4791. P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
  4792. PHIMap[OpInst] = P;
  4793. }
  4794. if (!P)
  4795. return nullptr; // Not evolving from PHI
  4796. if (PHI && PHI != P)
  4797. return nullptr; // Evolving from multiple different PHIs.
  4798. PHI = P;
  4799. }
  4800. // This is a expression evolving from a constant PHI!
  4801. return PHI;
  4802. }
  4803. /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
  4804. /// in the loop that V is derived from. We allow arbitrary operations along the
  4805. /// way, but the operands of an operation must either be constants or a value
  4806. /// derived from a constant PHI. If this expression does not fit with these
  4807. /// constraints, return null.
  4808. static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
  4809. Instruction *I = dyn_cast<Instruction>(V);
  4810. if (!I || !canConstantEvolve(I, L)) return nullptr;
  4811. if (PHINode *PN = dyn_cast<PHINode>(I)) {
  4812. return PN;
  4813. }
  4814. // Record non-constant instructions contained by the loop.
  4815. DenseMap<Instruction *, PHINode *> PHIMap;
  4816. return getConstantEvolvingPHIOperands(I, L, PHIMap);
  4817. }
  4818. /// EvaluateExpression - Given an expression that passes the
  4819. /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
  4820. /// in the loop has the value PHIVal. If we can't fold this expression for some
  4821. /// reason, return null.
  4822. static Constant *EvaluateExpression(Value *V, const Loop *L,
  4823. DenseMap<Instruction *, Constant *> &Vals,
  4824. const DataLayout &DL,
  4825. const TargetLibraryInfo *TLI) {
  4826. // Convenient constant check, but redundant for recursive calls.
  4827. if (Constant *C = dyn_cast<Constant>(V)) return C;
  4828. Instruction *I = dyn_cast<Instruction>(V);
  4829. if (!I) return nullptr;
  4830. if (Constant *C = Vals.lookup(I)) return C;
  4831. // An instruction inside the loop depends on a value outside the loop that we
  4832. // weren't given a mapping for, or a value such as a call inside the loop.
  4833. if (!canConstantEvolve(I, L)) return nullptr;
  4834. // An unmapped PHI can be due to a branch or another loop inside this loop,
  4835. // or due to this not being the initial iteration through a loop where we
  4836. // couldn't compute the evolution of this particular PHI last time.
  4837. if (isa<PHINode>(I)) return nullptr;
  4838. std::vector<Constant*> Operands(I->getNumOperands());
  4839. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  4840. Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
  4841. if (!Operand) {
  4842. Operands[i] = dyn_cast<Constant>(I->getOperand(i));
  4843. if (!Operands[i]) return nullptr;
  4844. continue;
  4845. }
  4846. Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
  4847. Vals[Operand] = C;
  4848. if (!C) return nullptr;
  4849. Operands[i] = C;
  4850. }
  4851. if (CmpInst *CI = dyn_cast<CmpInst>(I))
  4852. return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
  4853. Operands[1], DL, TLI);
  4854. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  4855. if (!LI->isVolatile())
  4856. return ConstantFoldLoadFromConstPtr(Operands[0], DL);
  4857. }
  4858. return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL,
  4859. TLI);
  4860. }
  4861. /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
  4862. /// in the header of its containing loop, we know the loop executes a
  4863. /// constant number of times, and the PHI node is just a recurrence
  4864. /// involving constants, fold it.
  4865. Constant *
  4866. ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
  4867. const APInt &BEs,
  4868. const Loop *L) {
  4869. DenseMap<PHINode*, Constant*>::const_iterator I =
  4870. ConstantEvolutionLoopExitValue.find(PN);
  4871. if (I != ConstantEvolutionLoopExitValue.end())
  4872. return I->second;
  4873. if (BEs.ugt(MaxBruteForceIterations))
  4874. return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it.
  4875. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
  4876. DenseMap<Instruction *, Constant *> CurrentIterVals;
  4877. BasicBlock *Header = L->getHeader();
  4878. assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
  4879. // Since the loop is canonicalized, the PHI node must have two entries. One
  4880. // entry must be a constant (coming in from outside of the loop), and the
  4881. // second must be derived from the same PHI.
  4882. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
  4883. PHINode *PHI = nullptr;
  4884. for (BasicBlock::iterator I = Header->begin();
  4885. (PHI = dyn_cast<PHINode>(I)); ++I) {
  4886. Constant *StartCST =
  4887. dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
  4888. if (!StartCST) continue;
  4889. CurrentIterVals[PHI] = StartCST;
  4890. }
  4891. if (!CurrentIterVals.count(PN))
  4892. return RetVal = nullptr;
  4893. Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
  4894. // Execute the loop symbolically to determine the exit value.
  4895. if (BEs.getActiveBits() >= 32)
  4896. return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
  4897. unsigned NumIterations = BEs.getZExtValue(); // must be in range
  4898. unsigned IterationNum = 0;
  4899. const DataLayout &DL = F->getParent()->getDataLayout();
  4900. for (; ; ++IterationNum) {
  4901. if (IterationNum == NumIterations)
  4902. return RetVal = CurrentIterVals[PN]; // Got exit value!
  4903. // Compute the value of the PHIs for the next iteration.
  4904. // EvaluateExpression adds non-phi values to the CurrentIterVals map.
  4905. DenseMap<Instruction *, Constant *> NextIterVals;
  4906. Constant *NextPHI =
  4907. EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
  4908. if (!NextPHI)
  4909. return nullptr; // Couldn't evaluate!
  4910. NextIterVals[PN] = NextPHI;
  4911. bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
  4912. // Also evaluate the other PHI nodes. However, we don't get to stop if we
  4913. // cease to be able to evaluate one of them or if they stop evolving,
  4914. // because that doesn't necessarily prevent us from computing PN.
  4915. SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
  4916. for (DenseMap<Instruction *, Constant *>::const_iterator
  4917. I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
  4918. PHINode *PHI = dyn_cast<PHINode>(I->first);
  4919. if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
  4920. PHIsToCompute.push_back(std::make_pair(PHI, I->second));
  4921. }
  4922. // We use two distinct loops because EvaluateExpression may invalidate any
  4923. // iterators into CurrentIterVals.
  4924. for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
  4925. I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
  4926. PHINode *PHI = I->first;
  4927. Constant *&NextPHI = NextIterVals[PHI];
  4928. if (!NextPHI) { // Not already computed.
  4929. Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
  4930. NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
  4931. }
  4932. if (NextPHI != I->second)
  4933. StoppedEvolving = false;
  4934. }
  4935. // If all entries in CurrentIterVals == NextIterVals then we can stop
  4936. // iterating, the loop can't continue to change.
  4937. if (StoppedEvolving)
  4938. return RetVal = CurrentIterVals[PN];
  4939. CurrentIterVals.swap(NextIterVals);
  4940. }
  4941. }
  4942. /// ComputeExitCountExhaustively - If the loop is known to execute a
  4943. /// constant number of times (the condition evolves only from constants),
  4944. /// try to evaluate a few iterations of the loop until we get the exit
  4945. /// condition gets a value of ExitWhen (true or false). If we cannot
  4946. /// evaluate the trip count of the loop, return getCouldNotCompute().
  4947. const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
  4948. Value *Cond,
  4949. bool ExitWhen) {
  4950. PHINode *PN = getConstantEvolvingPHI(Cond, L);
  4951. if (!PN) return getCouldNotCompute();
  4952. // If the loop is canonicalized, the PHI will have exactly two entries.
  4953. // That's the only form we support here.
  4954. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
  4955. DenseMap<Instruction *, Constant *> CurrentIterVals;
  4956. BasicBlock *Header = L->getHeader();
  4957. assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
  4958. // One entry must be a constant (coming in from outside of the loop), and the
  4959. // second must be derived from the same PHI.
  4960. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
  4961. PHINode *PHI = nullptr;
  4962. for (BasicBlock::iterator I = Header->begin();
  4963. (PHI = dyn_cast<PHINode>(I)); ++I) {
  4964. Constant *StartCST =
  4965. dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
  4966. if (!StartCST) continue;
  4967. CurrentIterVals[PHI] = StartCST;
  4968. }
  4969. if (!CurrentIterVals.count(PN))
  4970. return getCouldNotCompute();
  4971. // Okay, we find a PHI node that defines the trip count of this loop. Execute
  4972. // the loop symbolically to determine when the condition gets a value of
  4973. // "ExitWhen".
  4974. unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
  4975. const DataLayout &DL = F->getParent()->getDataLayout();
  4976. for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
  4977. ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
  4978. EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
  4979. // Couldn't symbolically evaluate.
  4980. if (!CondVal) return getCouldNotCompute();
  4981. if (CondVal->getValue() == uint64_t(ExitWhen)) {
  4982. ++NumBruteForceTripCountsComputed;
  4983. return getConstant(Type::getInt32Ty(getContext()), IterationNum);
  4984. }
  4985. // Update all the PHI nodes for the next iteration.
  4986. DenseMap<Instruction *, Constant *> NextIterVals;
  4987. // Create a list of which PHIs we need to compute. We want to do this before
  4988. // calling EvaluateExpression on them because that may invalidate iterators
  4989. // into CurrentIterVals.
  4990. SmallVector<PHINode *, 8> PHIsToCompute;
  4991. for (DenseMap<Instruction *, Constant *>::const_iterator
  4992. I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
  4993. PHINode *PHI = dyn_cast<PHINode>(I->first);
  4994. if (!PHI || PHI->getParent() != Header) continue;
  4995. PHIsToCompute.push_back(PHI);
  4996. }
  4997. for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
  4998. E = PHIsToCompute.end(); I != E; ++I) {
  4999. PHINode *PHI = *I;
  5000. Constant *&NextPHI = NextIterVals[PHI];
  5001. if (NextPHI) continue; // Already computed!
  5002. Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
  5003. NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
  5004. }
  5005. CurrentIterVals.swap(NextIterVals);
  5006. }
  5007. // Too many iterations were needed to evaluate.
  5008. return getCouldNotCompute();
  5009. }
  5010. /// getSCEVAtScope - Return a SCEV expression for the specified value
  5011. /// at the specified scope in the program. The L value specifies a loop
  5012. /// nest to evaluate the expression at, where null is the top-level or a
  5013. /// specified loop is immediately inside of the loop.
  5014. ///
  5015. /// This method can be used to compute the exit value for a variable defined
  5016. /// in a loop by querying what the value will hold in the parent loop.
  5017. ///
  5018. /// In the case that a relevant loop exit value cannot be computed, the
  5019. /// original value V is returned.
  5020. const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
  5021. // Check to see if we've folded this expression at this loop before.
  5022. SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
  5023. for (unsigned u = 0; u < Values.size(); u++) {
  5024. if (Values[u].first == L)
  5025. return Values[u].second ? Values[u].second : V;
  5026. }
  5027. Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
  5028. // Otherwise compute it.
  5029. const SCEV *C = computeSCEVAtScope(V, L);
  5030. SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
  5031. for (unsigned u = Values2.size(); u > 0; u--) {
  5032. if (Values2[u - 1].first == L) {
  5033. Values2[u - 1].second = C;
  5034. break;
  5035. }
  5036. }
  5037. return C;
  5038. }
  5039. /// This builds up a Constant using the ConstantExpr interface. That way, we
  5040. /// will return Constants for objects which aren't represented by a
  5041. /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
  5042. /// Returns NULL if the SCEV isn't representable as a Constant.
  5043. static Constant *BuildConstantFromSCEV(const SCEV *V) {
  5044. switch (static_cast<SCEVTypes>(V->getSCEVType())) {
  5045. case scCouldNotCompute:
  5046. case scAddRecExpr:
  5047. break;
  5048. case scConstant:
  5049. return cast<SCEVConstant>(V)->getValue();
  5050. case scUnknown:
  5051. return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
  5052. case scSignExtend: {
  5053. const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
  5054. if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
  5055. return ConstantExpr::getSExt(CastOp, SS->getType());
  5056. break;
  5057. }
  5058. case scZeroExtend: {
  5059. const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
  5060. if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
  5061. return ConstantExpr::getZExt(CastOp, SZ->getType());
  5062. break;
  5063. }
  5064. case scTruncate: {
  5065. const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
  5066. if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
  5067. return ConstantExpr::getTrunc(CastOp, ST->getType());
  5068. break;
  5069. }
  5070. case scAddExpr: {
  5071. const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
  5072. if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
  5073. if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
  5074. unsigned AS = PTy->getAddressSpace();
  5075. Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
  5076. C = ConstantExpr::getBitCast(C, DestPtrTy);
  5077. }
  5078. for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
  5079. Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
  5080. if (!C2) return nullptr;
  5081. // First pointer!
  5082. if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
  5083. unsigned AS = C2->getType()->getPointerAddressSpace();
  5084. std::swap(C, C2);
  5085. Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
  5086. // The offsets have been converted to bytes. We can add bytes to an
  5087. // i8* by GEP with the byte count in the first index.
  5088. C = ConstantExpr::getBitCast(C, DestPtrTy);
  5089. }
  5090. // Don't bother trying to sum two pointers. We probably can't
  5091. // statically compute a load that results from it anyway.
  5092. if (C2->getType()->isPointerTy())
  5093. return nullptr;
  5094. if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
  5095. if (PTy->getElementType()->isStructTy())
  5096. C2 = ConstantExpr::getIntegerCast(
  5097. C2, Type::getInt32Ty(C->getContext()), true);
  5098. C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
  5099. } else
  5100. C = ConstantExpr::getAdd(C, C2);
  5101. }
  5102. return C;
  5103. }
  5104. break;
  5105. }
  5106. case scMulExpr: {
  5107. const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
  5108. if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
  5109. // Don't bother with pointers at all.
  5110. if (C->getType()->isPointerTy()) return nullptr;
  5111. for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
  5112. Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
  5113. if (!C2 || C2->getType()->isPointerTy()) return nullptr;
  5114. C = ConstantExpr::getMul(C, C2);
  5115. }
  5116. return C;
  5117. }
  5118. break;
  5119. }
  5120. case scUDivExpr: {
  5121. const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
  5122. if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
  5123. if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
  5124. if (LHS->getType() == RHS->getType())
  5125. return ConstantExpr::getUDiv(LHS, RHS);
  5126. break;
  5127. }
  5128. case scSMaxExpr:
  5129. case scUMaxExpr:
  5130. break; // TODO: smax, umax.
  5131. }
  5132. return nullptr;
  5133. }
  5134. const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
  5135. if (isa<SCEVConstant>(V)) return V;
  5136. // If this instruction is evolved from a constant-evolving PHI, compute the
  5137. // exit value from the loop without using SCEVs.
  5138. if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
  5139. if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
  5140. const Loop *LI = (*this->LI)[I->getParent()];
  5141. if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
  5142. if (PHINode *PN = dyn_cast<PHINode>(I))
  5143. if (PN->getParent() == LI->getHeader()) {
  5144. // Okay, there is no closed form solution for the PHI node. Check
  5145. // to see if the loop that contains it has a known backedge-taken
  5146. // count. If so, we may be able to force computation of the exit
  5147. // value.
  5148. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
  5149. if (const SCEVConstant *BTCC =
  5150. dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
  5151. // Okay, we know how many times the containing loop executes. If
  5152. // this is a constant evolving PHI node, get the final value at
  5153. // the specified iteration number.
  5154. Constant *RV = getConstantEvolutionLoopExitValue(PN,
  5155. BTCC->getValue()->getValue(),
  5156. LI);
  5157. if (RV) return getSCEV(RV);
  5158. }
  5159. }
  5160. // Okay, this is an expression that we cannot symbolically evaluate
  5161. // into a SCEV. Check to see if it's possible to symbolically evaluate
  5162. // the arguments into constants, and if so, try to constant propagate the
  5163. // result. This is particularly useful for computing loop exit values.
  5164. if (CanConstantFold(I)) {
  5165. SmallVector<Constant *, 4> Operands;
  5166. bool MadeImprovement = false;
  5167. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  5168. Value *Op = I->getOperand(i);
  5169. if (Constant *C = dyn_cast<Constant>(Op)) {
  5170. Operands.push_back(C);
  5171. continue;
  5172. }
  5173. // If any of the operands is non-constant and if they are
  5174. // non-integer and non-pointer, don't even try to analyze them
  5175. // with scev techniques.
  5176. if (!isSCEVable(Op->getType()))
  5177. return V;
  5178. const SCEV *OrigV = getSCEV(Op);
  5179. const SCEV *OpV = getSCEVAtScope(OrigV, L);
  5180. MadeImprovement |= OrigV != OpV;
  5181. Constant *C = BuildConstantFromSCEV(OpV);
  5182. if (!C) return V;
  5183. if (C->getType() != Op->getType())
  5184. C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
  5185. Op->getType(),
  5186. false),
  5187. C, Op->getType());
  5188. Operands.push_back(C);
  5189. }
  5190. // Check to see if getSCEVAtScope actually made an improvement.
  5191. if (MadeImprovement) {
  5192. Constant *C = nullptr;
  5193. const DataLayout &DL = F->getParent()->getDataLayout();
  5194. if (const CmpInst *CI = dyn_cast<CmpInst>(I))
  5195. C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
  5196. Operands[1], DL, TLI);
  5197. else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
  5198. if (!LI->isVolatile())
  5199. C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
  5200. } else
  5201. C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
  5202. DL, TLI);
  5203. if (!C) return V;
  5204. return getSCEV(C);
  5205. }
  5206. }
  5207. }
  5208. // This is some other type of SCEVUnknown, just return it.
  5209. return V;
  5210. }
  5211. if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
  5212. // Avoid performing the look-up in the common case where the specified
  5213. // expression has no loop-variant portions.
  5214. for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
  5215. const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
  5216. if (OpAtScope != Comm->getOperand(i)) {
  5217. // Okay, at least one of these operands is loop variant but might be
  5218. // foldable. Build a new instance of the folded commutative expression.
  5219. SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
  5220. Comm->op_begin()+i);
  5221. NewOps.push_back(OpAtScope);
  5222. for (++i; i != e; ++i) {
  5223. OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
  5224. NewOps.push_back(OpAtScope);
  5225. }
  5226. if (isa<SCEVAddExpr>(Comm))
  5227. return getAddExpr(NewOps);
  5228. if (isa<SCEVMulExpr>(Comm))
  5229. return getMulExpr(NewOps);
  5230. if (isa<SCEVSMaxExpr>(Comm))
  5231. return getSMaxExpr(NewOps);
  5232. if (isa<SCEVUMaxExpr>(Comm))
  5233. return getUMaxExpr(NewOps);
  5234. llvm_unreachable("Unknown commutative SCEV type!");
  5235. }
  5236. }
  5237. // If we got here, all operands are loop invariant.
  5238. return Comm;
  5239. }
  5240. if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
  5241. const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
  5242. const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
  5243. if (LHS == Div->getLHS() && RHS == Div->getRHS())
  5244. return Div; // must be loop invariant
  5245. return getUDivExpr(LHS, RHS);
  5246. }
  5247. // If this is a loop recurrence for a loop that does not contain L, then we
  5248. // are dealing with the final value computed by the loop.
  5249. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
  5250. // First, attempt to evaluate each operand.
  5251. // Avoid performing the look-up in the common case where the specified
  5252. // expression has no loop-variant portions.
  5253. for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
  5254. const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
  5255. if (OpAtScope == AddRec->getOperand(i))
  5256. continue;
  5257. // Okay, at least one of these operands is loop variant but might be
  5258. // foldable. Build a new instance of the folded commutative expression.
  5259. SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
  5260. AddRec->op_begin()+i);
  5261. NewOps.push_back(OpAtScope);
  5262. for (++i; i != e; ++i)
  5263. NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
  5264. const SCEV *FoldedRec =
  5265. getAddRecExpr(NewOps, AddRec->getLoop(),
  5266. AddRec->getNoWrapFlags(SCEV::FlagNW));
  5267. AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
  5268. // The addrec may be folded to a nonrecurrence, for example, if the
  5269. // induction variable is multiplied by zero after constant folding. Go
  5270. // ahead and return the folded value.
  5271. if (!AddRec)
  5272. return FoldedRec;
  5273. break;
  5274. }
  5275. // If the scope is outside the addrec's loop, evaluate it by using the
  5276. // loop exit value of the addrec.
  5277. if (!AddRec->getLoop()->contains(L)) {
  5278. // To evaluate this recurrence, we need to know how many times the AddRec
  5279. // loop iterates. Compute this now.
  5280. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
  5281. if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
  5282. // Then, evaluate the AddRec.
  5283. return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
  5284. }
  5285. return AddRec;
  5286. }
  5287. if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
  5288. const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
  5289. if (Op == Cast->getOperand())
  5290. return Cast; // must be loop invariant
  5291. return getZeroExtendExpr(Op, Cast->getType());
  5292. }
  5293. if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
  5294. const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
  5295. if (Op == Cast->getOperand())
  5296. return Cast; // must be loop invariant
  5297. return getSignExtendExpr(Op, Cast->getType());
  5298. }
  5299. if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
  5300. const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
  5301. if (Op == Cast->getOperand())
  5302. return Cast; // must be loop invariant
  5303. return getTruncateExpr(Op, Cast->getType());
  5304. }
  5305. llvm_unreachable("Unknown SCEV type!");
  5306. }
  5307. /// getSCEVAtScope - This is a convenience function which does
  5308. /// getSCEVAtScope(getSCEV(V), L).
  5309. const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
  5310. return getSCEVAtScope(getSCEV(V), L);
  5311. }
  5312. /// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
  5313. /// following equation:
  5314. ///
  5315. /// A * X = B (mod N)
  5316. ///
  5317. /// where N = 2^BW and BW is the common bit width of A and B. The signedness of
  5318. /// A and B isn't important.
  5319. ///
  5320. /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
  5321. static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
  5322. ScalarEvolution &SE) {
  5323. uint32_t BW = A.getBitWidth();
  5324. assert(BW == B.getBitWidth() && "Bit widths must be the same.");
  5325. assert(A != 0 && "A must be non-zero.");
  5326. // 1. D = gcd(A, N)
  5327. //
  5328. // The gcd of A and N may have only one prime factor: 2. The number of
  5329. // trailing zeros in A is its multiplicity
  5330. uint32_t Mult2 = A.countTrailingZeros();
  5331. // D = 2^Mult2
  5332. // 2. Check if B is divisible by D.
  5333. //
  5334. // B is divisible by D if and only if the multiplicity of prime factor 2 for B
  5335. // is not less than multiplicity of this prime factor for D.
  5336. if (B.countTrailingZeros() < Mult2)
  5337. return SE.getCouldNotCompute();
  5338. // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
  5339. // modulo (N / D).
  5340. //
  5341. // (N / D) may need BW+1 bits in its representation. Hence, we'll use this
  5342. // bit width during computations.
  5343. APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
  5344. APInt Mod(BW + 1, 0);
  5345. Mod.setBit(BW - Mult2); // Mod = N / D
  5346. APInt I = AD.multiplicativeInverse(Mod);
  5347. // 4. Compute the minimum unsigned root of the equation:
  5348. // I * (B / D) mod (N / D)
  5349. APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
  5350. // The result is guaranteed to be less than 2^BW so we may truncate it to BW
  5351. // bits.
  5352. return SE.getConstant(Result.trunc(BW));
  5353. }
  5354. /// SolveQuadraticEquation - Find the roots of the quadratic equation for the
  5355. /// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which
  5356. /// might be the same) or two SCEVCouldNotCompute objects.
  5357. ///
  5358. static std::pair<const SCEV *,const SCEV *>
  5359. SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
  5360. assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
  5361. const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
  5362. const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
  5363. const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
  5364. // We currently can only solve this if the coefficients are constants.
  5365. if (!LC || !MC || !NC) {
  5366. const SCEV *CNC = SE.getCouldNotCompute();
  5367. return std::make_pair(CNC, CNC);
  5368. }
  5369. uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
  5370. const APInt &L = LC->getValue()->getValue();
  5371. const APInt &M = MC->getValue()->getValue();
  5372. const APInt &N = NC->getValue()->getValue();
  5373. APInt Two(BitWidth, 2);
  5374. APInt Four(BitWidth, 4);
  5375. {
  5376. using namespace APIntOps;
  5377. const APInt& C = L;
  5378. // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
  5379. // The B coefficient is M-N/2
  5380. APInt B(M);
  5381. B -= sdiv(N,Two);
  5382. // The A coefficient is N/2
  5383. APInt A(N.sdiv(Two));
  5384. // Compute the B^2-4ac term.
  5385. APInt SqrtTerm(B);
  5386. SqrtTerm *= B;
  5387. SqrtTerm -= Four * (A * C);
  5388. if (SqrtTerm.isNegative()) {
  5389. // The loop is provably infinite.
  5390. const SCEV *CNC = SE.getCouldNotCompute();
  5391. return std::make_pair(CNC, CNC);
  5392. }
  5393. // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
  5394. // integer value or else APInt::sqrt() will assert.
  5395. APInt SqrtVal(SqrtTerm.sqrt());
  5396. // Compute the two solutions for the quadratic formula.
  5397. // The divisions must be performed as signed divisions.
  5398. APInt NegB(-B);
  5399. APInt TwoA(A << 1);
  5400. if (TwoA.isMinValue()) {
  5401. const SCEV *CNC = SE.getCouldNotCompute();
  5402. return std::make_pair(CNC, CNC);
  5403. }
  5404. LLVMContext &Context = SE.getContext();
  5405. ConstantInt *Solution1 =
  5406. ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
  5407. ConstantInt *Solution2 =
  5408. ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
  5409. return std::make_pair(SE.getConstant(Solution1),
  5410. SE.getConstant(Solution2));
  5411. } // end APIntOps namespace
  5412. }
  5413. /// HowFarToZero - Return the number of times a backedge comparing the specified
  5414. /// value to zero will execute. If not computable, return CouldNotCompute.
  5415. ///
  5416. /// This is only used for loops with a "x != y" exit test. The exit condition is
  5417. /// now expressed as a single expression, V = x-y. So the exit test is
  5418. /// effectively V != 0. We know and take advantage of the fact that this
  5419. /// expression only being used in a comparison by zero context.
  5420. ScalarEvolution::ExitLimit
  5421. ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
  5422. // If the value is a constant
  5423. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
  5424. // If the value is already zero, the branch will execute zero times.
  5425. if (C->getValue()->isZero()) return C;
  5426. return getCouldNotCompute(); // Otherwise it will loop infinitely.
  5427. }
  5428. const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
  5429. if (!AddRec || AddRec->getLoop() != L)
  5430. return getCouldNotCompute();
  5431. // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
  5432. // the quadratic equation to solve it.
  5433. if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
  5434. std::pair<const SCEV *,const SCEV *> Roots =
  5435. SolveQuadraticEquation(AddRec, *this);
  5436. const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
  5437. const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
  5438. if (R1 && R2) {
  5439. #if 0
  5440. dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
  5441. << " sol#2: " << *R2 << "\n";
  5442. #endif
  5443. // Pick the smallest positive root value.
  5444. if (ConstantInt *CB =
  5445. dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
  5446. R1->getValue(),
  5447. R2->getValue()))) {
  5448. if (!CB->getZExtValue())
  5449. std::swap(R1, R2); // R1 is the minimum root now.
  5450. // We can only use this value if the chrec ends up with an exact zero
  5451. // value at this index. When solving for "X*X != 5", for example, we
  5452. // should not accept a root of 2.
  5453. const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
  5454. if (Val->isZero())
  5455. return R1; // We found a quadratic root!
  5456. }
  5457. }
  5458. return getCouldNotCompute();
  5459. }
  5460. // Otherwise we can only handle this if it is affine.
  5461. if (!AddRec->isAffine())
  5462. return getCouldNotCompute();
  5463. // If this is an affine expression, the execution count of this branch is
  5464. // the minimum unsigned root of the following equation:
  5465. //
  5466. // Start + Step*N = 0 (mod 2^BW)
  5467. //
  5468. // equivalent to:
  5469. //
  5470. // Step*N = -Start (mod 2^BW)
  5471. //
  5472. // where BW is the common bit width of Start and Step.
  5473. // Get the initial value for the loop.
  5474. const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
  5475. const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
  5476. // For now we handle only constant steps.
  5477. //
  5478. // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
  5479. // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
  5480. // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
  5481. // We have not yet seen any such cases.
  5482. const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
  5483. if (!StepC || StepC->getValue()->equalsInt(0))
  5484. return getCouldNotCompute();
  5485. // For positive steps (counting up until unsigned overflow):
  5486. // N = -Start/Step (as unsigned)
  5487. // For negative steps (counting down to zero):
  5488. // N = Start/-Step
  5489. // First compute the unsigned distance from zero in the direction of Step.
  5490. bool CountDown = StepC->getValue()->getValue().isNegative();
  5491. const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
  5492. // Handle unitary steps, which cannot wraparound.
  5493. // 1*N = -Start; -1*N = Start (mod 2^BW), so:
  5494. // N = Distance (as unsigned)
  5495. if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
  5496. ConstantRange CR = getUnsignedRange(Start);
  5497. const SCEV *MaxBECount;
  5498. if (!CountDown && CR.getUnsignedMin().isMinValue())
  5499. // When counting up, the worst starting value is 1, not 0.
  5500. MaxBECount = CR.getUnsignedMax().isMinValue()
  5501. ? getConstant(APInt::getMinValue(CR.getBitWidth()))
  5502. : getConstant(APInt::getMaxValue(CR.getBitWidth()));
  5503. else
  5504. MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
  5505. : -CR.getUnsignedMin());
  5506. return ExitLimit(Distance, MaxBECount);
  5507. }
  5508. // As a special case, handle the instance where Step is a positive power of
  5509. // two. In this case, determining whether Step divides Distance evenly can be
  5510. // done by counting and comparing the number of trailing zeros of Step and
  5511. // Distance.
  5512. if (!CountDown) {
  5513. const APInt &StepV = StepC->getValue()->getValue();
  5514. // StepV.isPowerOf2() returns true if StepV is an positive power of two. It
  5515. // also returns true if StepV is maximally negative (eg, INT_MIN), but that
  5516. // case is not handled as this code is guarded by !CountDown.
  5517. if (StepV.isPowerOf2() &&
  5518. GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros())
  5519. return getUDivExactExpr(Distance, Step);
  5520. }
  5521. // If the condition controls loop exit (the loop exits only if the expression
  5522. // is true) and the addition is no-wrap we can use unsigned divide to
  5523. // compute the backedge count. In this case, the step may not divide the
  5524. // distance, but we don't care because if the condition is "missed" the loop
  5525. // will have undefined behavior due to wrapping.
  5526. if (ControlsExit && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
  5527. const SCEV *Exact =
  5528. getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
  5529. return ExitLimit(Exact, Exact);
  5530. }
  5531. // Then, try to solve the above equation provided that Start is constant.
  5532. if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
  5533. return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
  5534. -StartC->getValue()->getValue(),
  5535. *this);
  5536. return getCouldNotCompute();
  5537. }
  5538. /// HowFarToNonZero - Return the number of times a backedge checking the
  5539. /// specified value for nonzero will execute. If not computable, return
  5540. /// CouldNotCompute
  5541. ScalarEvolution::ExitLimit
  5542. ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
  5543. // Loops that look like: while (X == 0) are very strange indeed. We don't
  5544. // handle them yet except for the trivial case. This could be expanded in the
  5545. // future as needed.
  5546. // If the value is a constant, check to see if it is known to be non-zero
  5547. // already. If so, the backedge will execute zero times.
  5548. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
  5549. if (!C->getValue()->isNullValue())
  5550. return getConstant(C->getType(), 0);
  5551. return getCouldNotCompute(); // Otherwise it will loop infinitely.
  5552. }
  5553. // We could implement others, but I really doubt anyone writes loops like
  5554. // this, and if they did, they would already be constant folded.
  5555. return getCouldNotCompute();
  5556. }
  5557. /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
  5558. /// (which may not be an immediate predecessor) which has exactly one
  5559. /// successor from which BB is reachable, or null if no such block is
  5560. /// found.
  5561. ///
  5562. std::pair<BasicBlock *, BasicBlock *>
  5563. ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
  5564. // If the block has a unique predecessor, then there is no path from the
  5565. // predecessor to the block that does not go through the direct edge
  5566. // from the predecessor to the block.
  5567. if (BasicBlock *Pred = BB->getSinglePredecessor())
  5568. return std::make_pair(Pred, BB);
  5569. // A loop's header is defined to be a block that dominates the loop.
  5570. // If the header has a unique predecessor outside the loop, it must be
  5571. // a block that has exactly one successor that can reach the loop.
  5572. if (Loop *L = LI->getLoopFor(BB))
  5573. return std::make_pair(L->getLoopPredecessor(), L->getHeader());
  5574. return std::pair<BasicBlock *, BasicBlock *>();
  5575. }
  5576. /// HasSameValue - SCEV structural equivalence is usually sufficient for
  5577. /// testing whether two expressions are equal, however for the purposes of
  5578. /// looking for a condition guarding a loop, it can be useful to be a little
  5579. /// more general, since a front-end may have replicated the controlling
  5580. /// expression.
  5581. ///
  5582. static bool HasSameValue(const SCEV *A, const SCEV *B) {
  5583. // Quick check to see if they are the same SCEV.
  5584. if (A == B) return true;
  5585. // Otherwise, if they're both SCEVUnknown, it's possible that they hold
  5586. // two different instructions with the same value. Check for this case.
  5587. if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
  5588. if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
  5589. if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
  5590. if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
  5591. if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
  5592. return true;
  5593. // Otherwise assume they may have a different value.
  5594. return false;
  5595. }
  5596. /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
  5597. /// predicate Pred. Return true iff any changes were made.
  5598. ///
  5599. bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
  5600. const SCEV *&LHS, const SCEV *&RHS,
  5601. unsigned Depth) {
  5602. bool Changed = false;
  5603. // If we hit the max recursion limit bail out.
  5604. if (Depth >= 3)
  5605. return false;
  5606. // Canonicalize a constant to the right side.
  5607. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
  5608. // Check for both operands constant.
  5609. if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
  5610. if (ConstantExpr::getICmp(Pred,
  5611. LHSC->getValue(),
  5612. RHSC->getValue())->isNullValue())
  5613. goto trivially_false;
  5614. else
  5615. goto trivially_true;
  5616. }
  5617. // Otherwise swap the operands to put the constant on the right.
  5618. std::swap(LHS, RHS);
  5619. Pred = ICmpInst::getSwappedPredicate(Pred);
  5620. Changed = true;
  5621. }
  5622. // If we're comparing an addrec with a value which is loop-invariant in the
  5623. // addrec's loop, put the addrec on the left. Also make a dominance check,
  5624. // as both operands could be addrecs loop-invariant in each other's loop.
  5625. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
  5626. const Loop *L = AR->getLoop();
  5627. if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
  5628. std::swap(LHS, RHS);
  5629. Pred = ICmpInst::getSwappedPredicate(Pred);
  5630. Changed = true;
  5631. }
  5632. }
  5633. // If there's a constant operand, canonicalize comparisons with boundary
  5634. // cases, and canonicalize *-or-equal comparisons to regular comparisons.
  5635. if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
  5636. const APInt &RA = RC->getValue()->getValue();
  5637. switch (Pred) {
  5638. default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
  5639. case ICmpInst::ICMP_EQ:
  5640. case ICmpInst::ICMP_NE:
  5641. // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
  5642. if (!RA)
  5643. if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
  5644. if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
  5645. if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
  5646. ME->getOperand(0)->isAllOnesValue()) {
  5647. RHS = AE->getOperand(1);
  5648. LHS = ME->getOperand(1);
  5649. Changed = true;
  5650. }
  5651. break;
  5652. case ICmpInst::ICMP_UGE:
  5653. if ((RA - 1).isMinValue()) {
  5654. Pred = ICmpInst::ICMP_NE;
  5655. RHS = getConstant(RA - 1);
  5656. Changed = true;
  5657. break;
  5658. }
  5659. if (RA.isMaxValue()) {
  5660. Pred = ICmpInst::ICMP_EQ;
  5661. Changed = true;
  5662. break;
  5663. }
  5664. if (RA.isMinValue()) goto trivially_true;
  5665. Pred = ICmpInst::ICMP_UGT;
  5666. RHS = getConstant(RA - 1);
  5667. Changed = true;
  5668. break;
  5669. case ICmpInst::ICMP_ULE:
  5670. if ((RA + 1).isMaxValue()) {
  5671. Pred = ICmpInst::ICMP_NE;
  5672. RHS = getConstant(RA + 1);
  5673. Changed = true;
  5674. break;
  5675. }
  5676. if (RA.isMinValue()) {
  5677. Pred = ICmpInst::ICMP_EQ;
  5678. Changed = true;
  5679. break;
  5680. }
  5681. if (RA.isMaxValue()) goto trivially_true;
  5682. Pred = ICmpInst::ICMP_ULT;
  5683. RHS = getConstant(RA + 1);
  5684. Changed = true;
  5685. break;
  5686. case ICmpInst::ICMP_SGE:
  5687. if ((RA - 1).isMinSignedValue()) {
  5688. Pred = ICmpInst::ICMP_NE;
  5689. RHS = getConstant(RA - 1);
  5690. Changed = true;
  5691. break;
  5692. }
  5693. if (RA.isMaxSignedValue()) {
  5694. Pred = ICmpInst::ICMP_EQ;
  5695. Changed = true;
  5696. break;
  5697. }
  5698. if (RA.isMinSignedValue()) goto trivially_true;
  5699. Pred = ICmpInst::ICMP_SGT;
  5700. RHS = getConstant(RA - 1);
  5701. Changed = true;
  5702. break;
  5703. case ICmpInst::ICMP_SLE:
  5704. if ((RA + 1).isMaxSignedValue()) {
  5705. Pred = ICmpInst::ICMP_NE;
  5706. RHS = getConstant(RA + 1);
  5707. Changed = true;
  5708. break;
  5709. }
  5710. if (RA.isMinSignedValue()) {
  5711. Pred = ICmpInst::ICMP_EQ;
  5712. Changed = true;
  5713. break;
  5714. }
  5715. if (RA.isMaxSignedValue()) goto trivially_true;
  5716. Pred = ICmpInst::ICMP_SLT;
  5717. RHS = getConstant(RA + 1);
  5718. Changed = true;
  5719. break;
  5720. case ICmpInst::ICMP_UGT:
  5721. if (RA.isMinValue()) {
  5722. Pred = ICmpInst::ICMP_NE;
  5723. Changed = true;
  5724. break;
  5725. }
  5726. if ((RA + 1).isMaxValue()) {
  5727. Pred = ICmpInst::ICMP_EQ;
  5728. RHS = getConstant(RA + 1);
  5729. Changed = true;
  5730. break;
  5731. }
  5732. if (RA.isMaxValue()) goto trivially_false;
  5733. break;
  5734. case ICmpInst::ICMP_ULT:
  5735. if (RA.isMaxValue()) {
  5736. Pred = ICmpInst::ICMP_NE;
  5737. Changed = true;
  5738. break;
  5739. }
  5740. if ((RA - 1).isMinValue()) {
  5741. Pred = ICmpInst::ICMP_EQ;
  5742. RHS = getConstant(RA - 1);
  5743. Changed = true;
  5744. break;
  5745. }
  5746. if (RA.isMinValue()) goto trivially_false;
  5747. break;
  5748. case ICmpInst::ICMP_SGT:
  5749. if (RA.isMinSignedValue()) {
  5750. Pred = ICmpInst::ICMP_NE;
  5751. Changed = true;
  5752. break;
  5753. }
  5754. if ((RA + 1).isMaxSignedValue()) {
  5755. Pred = ICmpInst::ICMP_EQ;
  5756. RHS = getConstant(RA + 1);
  5757. Changed = true;
  5758. break;
  5759. }
  5760. if (RA.isMaxSignedValue()) goto trivially_false;
  5761. break;
  5762. case ICmpInst::ICMP_SLT:
  5763. if (RA.isMaxSignedValue()) {
  5764. Pred = ICmpInst::ICMP_NE;
  5765. Changed = true;
  5766. break;
  5767. }
  5768. if ((RA - 1).isMinSignedValue()) {
  5769. Pred = ICmpInst::ICMP_EQ;
  5770. RHS = getConstant(RA - 1);
  5771. Changed = true;
  5772. break;
  5773. }
  5774. if (RA.isMinSignedValue()) goto trivially_false;
  5775. break;
  5776. }
  5777. }
  5778. // Check for obvious equality.
  5779. if (HasSameValue(LHS, RHS)) {
  5780. if (ICmpInst::isTrueWhenEqual(Pred))
  5781. goto trivially_true;
  5782. if (ICmpInst::isFalseWhenEqual(Pred))
  5783. goto trivially_false;
  5784. }
  5785. // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
  5786. // adding or subtracting 1 from one of the operands.
  5787. switch (Pred) {
  5788. case ICmpInst::ICMP_SLE:
  5789. if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
  5790. RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
  5791. SCEV::FlagNSW);
  5792. Pred = ICmpInst::ICMP_SLT;
  5793. Changed = true;
  5794. } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
  5795. LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
  5796. SCEV::FlagNSW);
  5797. Pred = ICmpInst::ICMP_SLT;
  5798. Changed = true;
  5799. }
  5800. break;
  5801. case ICmpInst::ICMP_SGE:
  5802. if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
  5803. RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
  5804. SCEV::FlagNSW);
  5805. Pred = ICmpInst::ICMP_SGT;
  5806. Changed = true;
  5807. } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
  5808. LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
  5809. SCEV::FlagNSW);
  5810. Pred = ICmpInst::ICMP_SGT;
  5811. Changed = true;
  5812. }
  5813. break;
  5814. case ICmpInst::ICMP_ULE:
  5815. if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
  5816. RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
  5817. SCEV::FlagNUW);
  5818. Pred = ICmpInst::ICMP_ULT;
  5819. Changed = true;
  5820. } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
  5821. LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
  5822. SCEV::FlagNUW);
  5823. Pred = ICmpInst::ICMP_ULT;
  5824. Changed = true;
  5825. }
  5826. break;
  5827. case ICmpInst::ICMP_UGE:
  5828. if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
  5829. RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
  5830. SCEV::FlagNUW);
  5831. Pred = ICmpInst::ICMP_UGT;
  5832. Changed = true;
  5833. } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
  5834. LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
  5835. SCEV::FlagNUW);
  5836. Pred = ICmpInst::ICMP_UGT;
  5837. Changed = true;
  5838. }
  5839. break;
  5840. default:
  5841. break;
  5842. }
  5843. // TODO: More simplifications are possible here.
  5844. // Recursively simplify until we either hit a recursion limit or nothing
  5845. // changes.
  5846. if (Changed)
  5847. return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
  5848. return Changed;
  5849. trivially_true:
  5850. // Return 0 == 0.
  5851. LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
  5852. Pred = ICmpInst::ICMP_EQ;
  5853. return true;
  5854. trivially_false:
  5855. // Return 0 != 0.
  5856. LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
  5857. Pred = ICmpInst::ICMP_NE;
  5858. return true;
  5859. }
  5860. bool ScalarEvolution::isKnownNegative(const SCEV *S) {
  5861. return getSignedRange(S).getSignedMax().isNegative();
  5862. }
  5863. bool ScalarEvolution::isKnownPositive(const SCEV *S) {
  5864. return getSignedRange(S).getSignedMin().isStrictlyPositive();
  5865. }
  5866. bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
  5867. return !getSignedRange(S).getSignedMin().isNegative();
  5868. }
  5869. bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
  5870. return !getSignedRange(S).getSignedMax().isStrictlyPositive();
  5871. }
  5872. bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
  5873. return isKnownNegative(S) || isKnownPositive(S);
  5874. }
  5875. bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
  5876. const SCEV *LHS, const SCEV *RHS) {
  5877. // Canonicalize the inputs first.
  5878. (void)SimplifyICmpOperands(Pred, LHS, RHS);
  5879. // If LHS or RHS is an addrec, check to see if the condition is true in
  5880. // every iteration of the loop.
  5881. // If LHS and RHS are both addrec, both conditions must be true in
  5882. // every iteration of the loop.
  5883. const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
  5884. const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
  5885. bool LeftGuarded = false;
  5886. bool RightGuarded = false;
  5887. if (LAR) {
  5888. const Loop *L = LAR->getLoop();
  5889. if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
  5890. isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
  5891. if (!RAR) return true;
  5892. LeftGuarded = true;
  5893. }
  5894. }
  5895. if (RAR) {
  5896. const Loop *L = RAR->getLoop();
  5897. if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
  5898. isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
  5899. if (!LAR) return true;
  5900. RightGuarded = true;
  5901. }
  5902. }
  5903. if (LeftGuarded && RightGuarded)
  5904. return true;
  5905. // Otherwise see what can be done with known constant ranges.
  5906. return isKnownPredicateWithRanges(Pred, LHS, RHS);
  5907. }
  5908. bool
  5909. ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
  5910. const SCEV *LHS, const SCEV *RHS) {
  5911. if (HasSameValue(LHS, RHS))
  5912. return ICmpInst::isTrueWhenEqual(Pred);
  5913. // This code is split out from isKnownPredicate because it is called from
  5914. // within isLoopEntryGuardedByCond.
  5915. switch (Pred) {
  5916. default:
  5917. llvm_unreachable("Unexpected ICmpInst::Predicate value!");
  5918. case ICmpInst::ICMP_SGT:
  5919. std::swap(LHS, RHS);
  5920. case ICmpInst::ICMP_SLT: {
  5921. ConstantRange LHSRange = getSignedRange(LHS);
  5922. ConstantRange RHSRange = getSignedRange(RHS);
  5923. if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
  5924. return true;
  5925. if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
  5926. return false;
  5927. break;
  5928. }
  5929. case ICmpInst::ICMP_SGE:
  5930. std::swap(LHS, RHS);
  5931. case ICmpInst::ICMP_SLE: {
  5932. ConstantRange LHSRange = getSignedRange(LHS);
  5933. ConstantRange RHSRange = getSignedRange(RHS);
  5934. if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
  5935. return true;
  5936. if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
  5937. return false;
  5938. break;
  5939. }
  5940. case ICmpInst::ICMP_UGT:
  5941. std::swap(LHS, RHS);
  5942. case ICmpInst::ICMP_ULT: {
  5943. ConstantRange LHSRange = getUnsignedRange(LHS);
  5944. ConstantRange RHSRange = getUnsignedRange(RHS);
  5945. if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
  5946. return true;
  5947. if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
  5948. return false;
  5949. break;
  5950. }
  5951. case ICmpInst::ICMP_UGE:
  5952. std::swap(LHS, RHS);
  5953. case ICmpInst::ICMP_ULE: {
  5954. ConstantRange LHSRange = getUnsignedRange(LHS);
  5955. ConstantRange RHSRange = getUnsignedRange(RHS);
  5956. if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
  5957. return true;
  5958. if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
  5959. return false;
  5960. break;
  5961. }
  5962. case ICmpInst::ICMP_NE: {
  5963. if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
  5964. return true;
  5965. if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
  5966. return true;
  5967. const SCEV *Diff = getMinusSCEV(LHS, RHS);
  5968. if (isKnownNonZero(Diff))
  5969. return true;
  5970. break;
  5971. }
  5972. case ICmpInst::ICMP_EQ:
  5973. // The check at the top of the function catches the case where
  5974. // the values are known to be equal.
  5975. break;
  5976. }
  5977. return false;
  5978. }
  5979. /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
  5980. /// protected by a conditional between LHS and RHS. This is used to
  5981. /// to eliminate casts.
  5982. bool
  5983. ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
  5984. ICmpInst::Predicate Pred,
  5985. const SCEV *LHS, const SCEV *RHS) {
  5986. // Interpret a null as meaning no loop, where there is obviously no guard
  5987. // (interprocedural conditions notwithstanding).
  5988. if (!L) return true;
  5989. if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
  5990. BasicBlock *Latch = L->getLoopLatch();
  5991. if (!Latch)
  5992. return false;
  5993. BranchInst *LoopContinuePredicate =
  5994. dyn_cast<BranchInst>(Latch->getTerminator());
  5995. if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
  5996. isImpliedCond(Pred, LHS, RHS,
  5997. LoopContinuePredicate->getCondition(),
  5998. LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
  5999. return true;
  6000. // Check conditions due to any @llvm.assume intrinsics.
  6001. for (auto &AssumeVH : AC->assumptions()) {
  6002. if (!AssumeVH)
  6003. continue;
  6004. auto *CI = cast<CallInst>(AssumeVH);
  6005. if (!DT->dominates(CI, Latch->getTerminator()))
  6006. continue;
  6007. if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
  6008. return true;
  6009. }
  6010. struct ClearWalkingBEDominatingCondsOnExit {
  6011. ScalarEvolution &SE;
  6012. explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
  6013. : SE(SE){};
  6014. ~ClearWalkingBEDominatingCondsOnExit() {
  6015. SE.WalkingBEDominatingConds = false;
  6016. }
  6017. };
  6018. // We don't want more than one activation of the following loop on the stack
  6019. // -- that can lead to O(n!) time complexity.
  6020. if (WalkingBEDominatingConds)
  6021. return false;
  6022. WalkingBEDominatingConds = true;
  6023. ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
  6024. // If the loop is not reachable from the entry block, we risk running into an
  6025. // infinite loop as we walk up into the dom tree. These loops do not matter
  6026. // anyway, so we just return a conservative answer when we see them.
  6027. if (!DT->isReachableFromEntry(L->getHeader()))
  6028. return false;
  6029. for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
  6030. DTN != HeaderDTN;
  6031. DTN = DTN->getIDom()) {
  6032. assert(DTN && "should reach the loop header before reaching the root!");
  6033. BasicBlock *BB = DTN->getBlock();
  6034. BasicBlock *PBB = BB->getSinglePredecessor();
  6035. if (!PBB)
  6036. continue;
  6037. BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
  6038. if (!ContinuePredicate || !ContinuePredicate->isConditional())
  6039. continue;
  6040. Value *Condition = ContinuePredicate->getCondition();
  6041. // If we have an edge `E` within the loop body that dominates the only
  6042. // latch, the condition guarding `E` also guards the backedge. This
  6043. // reasoning works only for loops with a single latch.
  6044. BasicBlockEdge DominatingEdge(PBB, BB);
  6045. if (DominatingEdge.isSingleEdge()) {
  6046. // We're constructively (and conservatively) enumerating edges within the
  6047. // loop body that dominate the latch. The dominator tree better agree
  6048. // with us on this:
  6049. assert(DT->dominates(DominatingEdge, Latch) && "should be!");
  6050. if (isImpliedCond(Pred, LHS, RHS, Condition,
  6051. BB != ContinuePredicate->getSuccessor(0)))
  6052. return true;
  6053. }
  6054. }
  6055. return false;
  6056. }
  6057. /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
  6058. /// by a conditional between LHS and RHS. This is used to help avoid max
  6059. /// expressions in loop trip counts, and to eliminate casts.
  6060. bool
  6061. ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
  6062. ICmpInst::Predicate Pred,
  6063. const SCEV *LHS, const SCEV *RHS) {
  6064. // Interpret a null as meaning no loop, where there is obviously no guard
  6065. // (interprocedural conditions notwithstanding).
  6066. if (!L) return false;
  6067. if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
  6068. // Starting at the loop predecessor, climb up the predecessor chain, as long
  6069. // as there are predecessors that can be found that have unique successors
  6070. // leading to the original header.
  6071. for (std::pair<BasicBlock *, BasicBlock *>
  6072. Pair(L->getLoopPredecessor(), L->getHeader());
  6073. Pair.first;
  6074. Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
  6075. BranchInst *LoopEntryPredicate =
  6076. dyn_cast<BranchInst>(Pair.first->getTerminator());
  6077. if (!LoopEntryPredicate ||
  6078. LoopEntryPredicate->isUnconditional())
  6079. continue;
  6080. if (isImpliedCond(Pred, LHS, RHS,
  6081. LoopEntryPredicate->getCondition(),
  6082. LoopEntryPredicate->getSuccessor(0) != Pair.second))
  6083. return true;
  6084. }
  6085. // Check conditions due to any @llvm.assume intrinsics.
  6086. for (auto &AssumeVH : AC->assumptions()) {
  6087. if (!AssumeVH)
  6088. continue;
  6089. auto *CI = cast<CallInst>(AssumeVH);
  6090. if (!DT->dominates(CI, L->getHeader()))
  6091. continue;
  6092. if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
  6093. return true;
  6094. }
  6095. return false;
  6096. }
  6097. /// RAII wrapper to prevent recursive application of isImpliedCond.
  6098. /// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
  6099. /// currently evaluating isImpliedCond.
  6100. struct MarkPendingLoopPredicate {
  6101. Value *Cond;
  6102. DenseSet<Value*> &LoopPreds;
  6103. bool Pending;
  6104. MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
  6105. : Cond(C), LoopPreds(LP) {
  6106. Pending = !LoopPreds.insert(Cond).second;
  6107. }
  6108. ~MarkPendingLoopPredicate() {
  6109. if (!Pending)
  6110. LoopPreds.erase(Cond);
  6111. }
  6112. };
  6113. /// isImpliedCond - Test whether the condition described by Pred, LHS,
  6114. /// and RHS is true whenever the given Cond value evaluates to true.
  6115. bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
  6116. const SCEV *LHS, const SCEV *RHS,
  6117. Value *FoundCondValue,
  6118. bool Inverse) {
  6119. MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
  6120. if (Mark.Pending)
  6121. return false;
  6122. // Recursively handle And and Or conditions.
  6123. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
  6124. if (BO->getOpcode() == Instruction::And) {
  6125. if (!Inverse)
  6126. return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
  6127. isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
  6128. } else if (BO->getOpcode() == Instruction::Or) {
  6129. if (Inverse)
  6130. return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
  6131. isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
  6132. }
  6133. }
  6134. ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
  6135. if (!ICI) return false;
  6136. // Now that we found a conditional branch that dominates the loop or controls
  6137. // the loop latch. Check to see if it is the comparison we are looking for.
  6138. ICmpInst::Predicate FoundPred;
  6139. if (Inverse)
  6140. FoundPred = ICI->getInversePredicate();
  6141. else
  6142. FoundPred = ICI->getPredicate();
  6143. const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
  6144. const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
  6145. // Balance the types.
  6146. if (getTypeSizeInBits(LHS->getType()) <
  6147. getTypeSizeInBits(FoundLHS->getType())) {
  6148. if (CmpInst::isSigned(Pred)) {
  6149. LHS = getSignExtendExpr(LHS, FoundLHS->getType());
  6150. RHS = getSignExtendExpr(RHS, FoundLHS->getType());
  6151. } else {
  6152. LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
  6153. RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
  6154. }
  6155. } else if (getTypeSizeInBits(LHS->getType()) >
  6156. getTypeSizeInBits(FoundLHS->getType())) {
  6157. if (CmpInst::isSigned(FoundPred)) {
  6158. FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
  6159. FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
  6160. } else {
  6161. FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
  6162. FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
  6163. }
  6164. }
  6165. // Canonicalize the query to match the way instcombine will have
  6166. // canonicalized the comparison.
  6167. if (SimplifyICmpOperands(Pred, LHS, RHS))
  6168. if (LHS == RHS)
  6169. return CmpInst::isTrueWhenEqual(Pred);
  6170. if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
  6171. if (FoundLHS == FoundRHS)
  6172. return CmpInst::isFalseWhenEqual(FoundPred);
  6173. // Check to see if we can make the LHS or RHS match.
  6174. if (LHS == FoundRHS || RHS == FoundLHS) {
  6175. if (isa<SCEVConstant>(RHS)) {
  6176. std::swap(FoundLHS, FoundRHS);
  6177. FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
  6178. } else {
  6179. std::swap(LHS, RHS);
  6180. Pred = ICmpInst::getSwappedPredicate(Pred);
  6181. }
  6182. }
  6183. // Check whether the found predicate is the same as the desired predicate.
  6184. if (FoundPred == Pred)
  6185. return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
  6186. // Check whether swapping the found predicate makes it the same as the
  6187. // desired predicate.
  6188. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
  6189. if (isa<SCEVConstant>(RHS))
  6190. return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
  6191. else
  6192. return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
  6193. RHS, LHS, FoundLHS, FoundRHS);
  6194. }
  6195. // Check if we can make progress by sharpening ranges.
  6196. if (FoundPred == ICmpInst::ICMP_NE &&
  6197. (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
  6198. const SCEVConstant *C = nullptr;
  6199. const SCEV *V = nullptr;
  6200. if (isa<SCEVConstant>(FoundLHS)) {
  6201. C = cast<SCEVConstant>(FoundLHS);
  6202. V = FoundRHS;
  6203. } else {
  6204. C = cast<SCEVConstant>(FoundRHS);
  6205. V = FoundLHS;
  6206. }
  6207. // The guarding predicate tells us that C != V. If the known range
  6208. // of V is [C, t), we can sharpen the range to [C + 1, t). The
  6209. // range we consider has to correspond to same signedness as the
  6210. // predicate we're interested in folding.
  6211. APInt Min = ICmpInst::isSigned(Pred) ?
  6212. getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
  6213. if (Min == C->getValue()->getValue()) {
  6214. // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
  6215. // This is true even if (Min + 1) wraps around -- in case of
  6216. // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
  6217. APInt SharperMin = Min + 1;
  6218. switch (Pred) {
  6219. case ICmpInst::ICMP_SGE:
  6220. case ICmpInst::ICMP_UGE:
  6221. // We know V `Pred` SharperMin. If this implies LHS `Pred`
  6222. // RHS, we're done.
  6223. if (isImpliedCondOperands(Pred, LHS, RHS, V,
  6224. getConstant(SharperMin)))
  6225. return true;
  6226. case ICmpInst::ICMP_SGT:
  6227. case ICmpInst::ICMP_UGT:
  6228. // We know from the range information that (V `Pred` Min ||
  6229. // V == Min). We know from the guarding condition that !(V
  6230. // == Min). This gives us
  6231. //
  6232. // V `Pred` Min || V == Min && !(V == Min)
  6233. // => V `Pred` Min
  6234. //
  6235. // If V `Pred` Min implies LHS `Pred` RHS, we're done.
  6236. if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
  6237. return true;
  6238. default:
  6239. // No change
  6240. break;
  6241. }
  6242. }
  6243. }
  6244. // Check whether the actual condition is beyond sufficient.
  6245. if (FoundPred == ICmpInst::ICMP_EQ)
  6246. if (ICmpInst::isTrueWhenEqual(Pred))
  6247. if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
  6248. return true;
  6249. if (Pred == ICmpInst::ICMP_NE)
  6250. if (!ICmpInst::isTrueWhenEqual(FoundPred))
  6251. if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
  6252. return true;
  6253. // Otherwise assume the worst.
  6254. return false;
  6255. }
  6256. /// isImpliedCondOperands - Test whether the condition described by Pred,
  6257. /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
  6258. /// and FoundRHS is true.
  6259. bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
  6260. const SCEV *LHS, const SCEV *RHS,
  6261. const SCEV *FoundLHS,
  6262. const SCEV *FoundRHS) {
  6263. if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
  6264. return true;
  6265. return isImpliedCondOperandsHelper(Pred, LHS, RHS,
  6266. FoundLHS, FoundRHS) ||
  6267. // ~x < ~y --> x > y
  6268. isImpliedCondOperandsHelper(Pred, LHS, RHS,
  6269. getNotSCEV(FoundRHS),
  6270. getNotSCEV(FoundLHS));
  6271. }
  6272. /// If Expr computes ~A, return A else return nullptr
  6273. static const SCEV *MatchNotExpr(const SCEV *Expr) {
  6274. const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
  6275. if (!Add || Add->getNumOperands() != 2) return nullptr;
  6276. const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0));
  6277. if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue()))
  6278. return nullptr;
  6279. const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
  6280. if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr;
  6281. const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0));
  6282. if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue()))
  6283. return nullptr;
  6284. return AddRHS->getOperand(1);
  6285. }
  6286. /// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
  6287. template<typename MaxExprType>
  6288. static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
  6289. const SCEV *Candidate) {
  6290. const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
  6291. if (!MaxExpr) return false;
  6292. auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate);
  6293. return It != MaxExpr->op_end();
  6294. }
  6295. /// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
  6296. template<typename MaxExprType>
  6297. static bool IsMinConsistingOf(ScalarEvolution &SE,
  6298. const SCEV *MaybeMinExpr,
  6299. const SCEV *Candidate) {
  6300. const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
  6301. if (!MaybeMaxExpr)
  6302. return false;
  6303. return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
  6304. }
  6305. /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
  6306. /// expression?
  6307. static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
  6308. ICmpInst::Predicate Pred,
  6309. const SCEV *LHS, const SCEV *RHS) {
  6310. switch (Pred) {
  6311. default:
  6312. return false;
  6313. case ICmpInst::ICMP_SGE:
  6314. std::swap(LHS, RHS);
  6315. // fall through
  6316. case ICmpInst::ICMP_SLE:
  6317. return
  6318. // min(A, ...) <= A
  6319. IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
  6320. // A <= max(A, ...)
  6321. IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
  6322. case ICmpInst::ICMP_UGE:
  6323. std::swap(LHS, RHS);
  6324. // fall through
  6325. case ICmpInst::ICMP_ULE:
  6326. return
  6327. // min(A, ...) <= A
  6328. IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
  6329. // A <= max(A, ...)
  6330. IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
  6331. }
  6332. llvm_unreachable("covered switch fell through?!");
  6333. }
  6334. /// isImpliedCondOperandsHelper - Test whether the condition described by
  6335. /// Pred, LHS, and RHS is true whenever the condition described by Pred,
  6336. /// FoundLHS, and FoundRHS is true.
  6337. bool
  6338. ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
  6339. const SCEV *LHS, const SCEV *RHS,
  6340. const SCEV *FoundLHS,
  6341. const SCEV *FoundRHS) {
  6342. auto IsKnownPredicateFull =
  6343. [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
  6344. return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
  6345. IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS);
  6346. };
  6347. switch (Pred) {
  6348. default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
  6349. case ICmpInst::ICMP_EQ:
  6350. case ICmpInst::ICMP_NE:
  6351. if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
  6352. return true;
  6353. break;
  6354. case ICmpInst::ICMP_SLT:
  6355. case ICmpInst::ICMP_SLE:
  6356. if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
  6357. IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS))
  6358. return true;
  6359. break;
  6360. case ICmpInst::ICMP_SGT:
  6361. case ICmpInst::ICMP_SGE:
  6362. if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
  6363. IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS))
  6364. return true;
  6365. break;
  6366. case ICmpInst::ICMP_ULT:
  6367. case ICmpInst::ICMP_ULE:
  6368. if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
  6369. IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS))
  6370. return true;
  6371. break;
  6372. case ICmpInst::ICMP_UGT:
  6373. case ICmpInst::ICMP_UGE:
  6374. if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
  6375. IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS))
  6376. return true;
  6377. break;
  6378. }
  6379. return false;
  6380. }
  6381. /// isImpliedCondOperandsViaRanges - helper function for isImpliedCondOperands.
  6382. /// Tries to get cases like "X `sgt` 0 => X - 1 `sgt` -1".
  6383. bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
  6384. const SCEV *LHS,
  6385. const SCEV *RHS,
  6386. const SCEV *FoundLHS,
  6387. const SCEV *FoundRHS) {
  6388. if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
  6389. // The restriction on `FoundRHS` be lifted easily -- it exists only to
  6390. // reduce the compile time impact of this optimization.
  6391. return false;
  6392. const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
  6393. if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
  6394. !isa<SCEVConstant>(AddLHS->getOperand(0)))
  6395. return false;
  6396. APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue();
  6397. // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
  6398. // antecedent "`FoundLHS` `Pred` `FoundRHS`".
  6399. ConstantRange FoundLHSRange =
  6400. ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
  6401. // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
  6402. // for `LHS`:
  6403. APInt Addend =
  6404. cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue();
  6405. ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
  6406. // We can also compute the range of values for `LHS` that satisfy the
  6407. // consequent, "`LHS` `Pred` `RHS`":
  6408. APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue();
  6409. ConstantRange SatisfyingLHSRange =
  6410. ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
  6411. // The antecedent implies the consequent if every value of `LHS` that
  6412. // satisfies the antecedent also satisfies the consequent.
  6413. return SatisfyingLHSRange.contains(LHSRange);
  6414. }
  6415. // Verify if an linear IV with positive stride can overflow when in a
  6416. // less-than comparison, knowing the invariant term of the comparison, the
  6417. // stride and the knowledge of NSW/NUW flags on the recurrence.
  6418. bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
  6419. bool IsSigned, bool NoWrap) {
  6420. if (NoWrap) return false;
  6421. unsigned BitWidth = getTypeSizeInBits(RHS->getType());
  6422. const SCEV *One = getConstant(Stride->getType(), 1);
  6423. if (IsSigned) {
  6424. APInt MaxRHS = getSignedRange(RHS).getSignedMax();
  6425. APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
  6426. APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
  6427. .getSignedMax();
  6428. // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
  6429. return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
  6430. }
  6431. APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
  6432. APInt MaxValue = APInt::getMaxValue(BitWidth);
  6433. APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
  6434. .getUnsignedMax();
  6435. // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
  6436. return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
  6437. }
  6438. // Verify if an linear IV with negative stride can overflow when in a
  6439. // greater-than comparison, knowing the invariant term of the comparison,
  6440. // the stride and the knowledge of NSW/NUW flags on the recurrence.
  6441. bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
  6442. bool IsSigned, bool NoWrap) {
  6443. if (NoWrap) return false;
  6444. unsigned BitWidth = getTypeSizeInBits(RHS->getType());
  6445. const SCEV *One = getConstant(Stride->getType(), 1);
  6446. if (IsSigned) {
  6447. APInt MinRHS = getSignedRange(RHS).getSignedMin();
  6448. APInt MinValue = APInt::getSignedMinValue(BitWidth);
  6449. APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
  6450. .getSignedMax();
  6451. // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
  6452. return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
  6453. }
  6454. APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
  6455. APInt MinValue = APInt::getMinValue(BitWidth);
  6456. APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
  6457. .getUnsignedMax();
  6458. // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
  6459. return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
  6460. }
  6461. // Compute the backedge taken count knowing the interval difference, the
  6462. // stride and presence of the equality in the comparison.
  6463. const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
  6464. bool Equality) {
  6465. const SCEV *One = getConstant(Step->getType(), 1);
  6466. Delta = Equality ? getAddExpr(Delta, Step)
  6467. : getAddExpr(Delta, getMinusSCEV(Step, One));
  6468. return getUDivExpr(Delta, Step);
  6469. }
  6470. /// HowManyLessThans - Return the number of times a backedge containing the
  6471. /// specified less-than comparison will execute. If not computable, return
  6472. /// CouldNotCompute.
  6473. ///
  6474. /// @param ControlsExit is true when the LHS < RHS condition directly controls
  6475. /// the branch (loops exits only if condition is true). In this case, we can use
  6476. /// NoWrapFlags to skip overflow checks.
  6477. ScalarEvolution::ExitLimit
  6478. ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
  6479. const Loop *L, bool IsSigned,
  6480. bool ControlsExit) {
  6481. // We handle only IV < Invariant
  6482. if (!isLoopInvariant(RHS, L))
  6483. return getCouldNotCompute();
  6484. const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
  6485. // Avoid weird loops
  6486. if (!IV || IV->getLoop() != L || !IV->isAffine())
  6487. return getCouldNotCompute();
  6488. bool NoWrap = ControlsExit &&
  6489. IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
  6490. const SCEV *Stride = IV->getStepRecurrence(*this);
  6491. // Avoid negative or zero stride values
  6492. if (!isKnownPositive(Stride))
  6493. return getCouldNotCompute();
  6494. // Avoid proven overflow cases: this will ensure that the backedge taken count
  6495. // will not generate any unsigned overflow. Relaxed no-overflow conditions
  6496. // exploit NoWrapFlags, allowing to optimize in presence of undefined
  6497. // behaviors like the case of C language.
  6498. if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
  6499. return getCouldNotCompute();
  6500. ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
  6501. : ICmpInst::ICMP_ULT;
  6502. const SCEV *Start = IV->getStart();
  6503. const SCEV *End = RHS;
  6504. if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) {
  6505. const SCEV *Diff = getMinusSCEV(RHS, Start);
  6506. // If we have NoWrap set, then we can assume that the increment won't
  6507. // overflow, in which case if RHS - Start is a constant, we don't need to
  6508. // do a max operation since we can just figure it out statically
  6509. if (NoWrap && isa<SCEVConstant>(Diff)) {
  6510. APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
  6511. if (D.isNegative())
  6512. End = Start;
  6513. } else
  6514. End = IsSigned ? getSMaxExpr(RHS, Start)
  6515. : getUMaxExpr(RHS, Start);
  6516. }
  6517. const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
  6518. APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
  6519. : getUnsignedRange(Start).getUnsignedMin();
  6520. APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
  6521. : getUnsignedRange(Stride).getUnsignedMin();
  6522. unsigned BitWidth = getTypeSizeInBits(LHS->getType());
  6523. APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
  6524. : APInt::getMaxValue(BitWidth) - (MinStride - 1);
  6525. // Although End can be a MAX expression we estimate MaxEnd considering only
  6526. // the case End = RHS. This is safe because in the other case (End - Start)
  6527. // is zero, leading to a zero maximum backedge taken count.
  6528. APInt MaxEnd =
  6529. IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
  6530. : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
  6531. const SCEV *MaxBECount;
  6532. if (isa<SCEVConstant>(BECount))
  6533. MaxBECount = BECount;
  6534. else
  6535. MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
  6536. getConstant(MinStride), false);
  6537. if (isa<SCEVCouldNotCompute>(MaxBECount))
  6538. MaxBECount = BECount;
  6539. return ExitLimit(BECount, MaxBECount);
  6540. }
  6541. ScalarEvolution::ExitLimit
  6542. ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
  6543. const Loop *L, bool IsSigned,
  6544. bool ControlsExit) {
  6545. // We handle only IV > Invariant
  6546. if (!isLoopInvariant(RHS, L))
  6547. return getCouldNotCompute();
  6548. const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
  6549. // Avoid weird loops
  6550. if (!IV || IV->getLoop() != L || !IV->isAffine())
  6551. return getCouldNotCompute();
  6552. bool NoWrap = ControlsExit &&
  6553. IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
  6554. const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
  6555. // Avoid negative or zero stride values
  6556. if (!isKnownPositive(Stride))
  6557. return getCouldNotCompute();
  6558. // Avoid proven overflow cases: this will ensure that the backedge taken count
  6559. // will not generate any unsigned overflow. Relaxed no-overflow conditions
  6560. // exploit NoWrapFlags, allowing to optimize in presence of undefined
  6561. // behaviors like the case of C language.
  6562. if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
  6563. return getCouldNotCompute();
  6564. ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
  6565. : ICmpInst::ICMP_UGT;
  6566. const SCEV *Start = IV->getStart();
  6567. const SCEV *End = RHS;
  6568. if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
  6569. const SCEV *Diff = getMinusSCEV(RHS, Start);
  6570. // If we have NoWrap set, then we can assume that the increment won't
  6571. // overflow, in which case if RHS - Start is a constant, we don't need to
  6572. // do a max operation since we can just figure it out statically
  6573. if (NoWrap && isa<SCEVConstant>(Diff)) {
  6574. APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
  6575. if (!D.isNegative())
  6576. End = Start;
  6577. } else
  6578. End = IsSigned ? getSMinExpr(RHS, Start)
  6579. : getUMinExpr(RHS, Start);
  6580. }
  6581. const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
  6582. APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
  6583. : getUnsignedRange(Start).getUnsignedMax();
  6584. APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
  6585. : getUnsignedRange(Stride).getUnsignedMin();
  6586. unsigned BitWidth = getTypeSizeInBits(LHS->getType());
  6587. APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
  6588. : APInt::getMinValue(BitWidth) + (MinStride - 1);
  6589. // Although End can be a MIN expression we estimate MinEnd considering only
  6590. // the case End = RHS. This is safe because in the other case (Start - End)
  6591. // is zero, leading to a zero maximum backedge taken count.
  6592. APInt MinEnd =
  6593. IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
  6594. : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
  6595. const SCEV *MaxBECount = getCouldNotCompute();
  6596. if (isa<SCEVConstant>(BECount))
  6597. MaxBECount = BECount;
  6598. else
  6599. MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
  6600. getConstant(MinStride), false);
  6601. if (isa<SCEVCouldNotCompute>(MaxBECount))
  6602. MaxBECount = BECount;
  6603. return ExitLimit(BECount, MaxBECount);
  6604. }
  6605. /// getNumIterationsInRange - Return the number of iterations of this loop that
  6606. /// produce values in the specified constant range. Another way of looking at
  6607. /// this is that it returns the first iteration number where the value is not in
  6608. /// the condition, thus computing the exit count. If the iteration count can't
  6609. /// be computed, an instance of SCEVCouldNotCompute is returned.
  6610. const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
  6611. ScalarEvolution &SE) const {
  6612. if (Range.isFullSet()) // Infinite loop.
  6613. return SE.getCouldNotCompute();
  6614. // If the start is a non-zero constant, shift the range to simplify things.
  6615. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
  6616. if (!SC->getValue()->isZero()) {
  6617. SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
  6618. Operands[0] = SE.getConstant(SC->getType(), 0);
  6619. const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
  6620. getNoWrapFlags(FlagNW));
  6621. if (const SCEVAddRecExpr *ShiftedAddRec =
  6622. dyn_cast<SCEVAddRecExpr>(Shifted))
  6623. return ShiftedAddRec->getNumIterationsInRange(
  6624. Range.subtract(SC->getValue()->getValue()), SE);
  6625. // This is strange and shouldn't happen.
  6626. return SE.getCouldNotCompute();
  6627. }
  6628. // The only time we can solve this is when we have all constant indices.
  6629. // Otherwise, we cannot determine the overflow conditions.
  6630. for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
  6631. if (!isa<SCEVConstant>(getOperand(i)))
  6632. return SE.getCouldNotCompute();
  6633. // Okay at this point we know that all elements of the chrec are constants and
  6634. // that the start element is zero.
  6635. // First check to see if the range contains zero. If not, the first
  6636. // iteration exits.
  6637. unsigned BitWidth = SE.getTypeSizeInBits(getType());
  6638. if (!Range.contains(APInt(BitWidth, 0)))
  6639. return SE.getConstant(getType(), 0);
  6640. if (isAffine()) {
  6641. // If this is an affine expression then we have this situation:
  6642. // Solve {0,+,A} in Range === Ax in Range
  6643. // We know that zero is in the range. If A is positive then we know that
  6644. // the upper value of the range must be the first possible exit value.
  6645. // If A is negative then the lower of the range is the last possible loop
  6646. // value. Also note that we already checked for a full range.
  6647. APInt One(BitWidth,1);
  6648. APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
  6649. APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
  6650. // The exit value should be (End+A)/A.
  6651. APInt ExitVal = (End + A).udiv(A);
  6652. ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
  6653. // Evaluate at the exit value. If we really did fall out of the valid
  6654. // range, then we computed our trip count, otherwise wrap around or other
  6655. // things must have happened.
  6656. ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
  6657. if (Range.contains(Val->getValue()))
  6658. return SE.getCouldNotCompute(); // Something strange happened
  6659. // Ensure that the previous value is in the range. This is a sanity check.
  6660. assert(Range.contains(
  6661. EvaluateConstantChrecAtConstant(this,
  6662. ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
  6663. "Linear scev computation is off in a bad way!");
  6664. return SE.getConstant(ExitValue);
  6665. } else if (isQuadratic()) {
  6666. // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
  6667. // quadratic equation to solve it. To do this, we must frame our problem in
  6668. // terms of figuring out when zero is crossed, instead of when
  6669. // Range.getUpper() is crossed.
  6670. SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
  6671. NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
  6672. const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
  6673. // getNoWrapFlags(FlagNW)
  6674. FlagAnyWrap);
  6675. // Next, solve the constructed addrec
  6676. std::pair<const SCEV *,const SCEV *> Roots =
  6677. SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
  6678. const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
  6679. const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
  6680. if (R1) {
  6681. // Pick the smallest positive root value.
  6682. if (ConstantInt *CB =
  6683. dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
  6684. R1->getValue(), R2->getValue()))) {
  6685. if (!CB->getZExtValue())
  6686. std::swap(R1, R2); // R1 is the minimum root now.
  6687. // Make sure the root is not off by one. The returned iteration should
  6688. // not be in the range, but the previous one should be. When solving
  6689. // for "X*X < 5", for example, we should not return a root of 2.
  6690. ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
  6691. R1->getValue(),
  6692. SE);
  6693. if (Range.contains(R1Val->getValue())) {
  6694. // The next iteration must be out of the range...
  6695. ConstantInt *NextVal =
  6696. ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
  6697. R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
  6698. if (!Range.contains(R1Val->getValue()))
  6699. return SE.getConstant(NextVal);
  6700. return SE.getCouldNotCompute(); // Something strange happened
  6701. }
  6702. // If R1 was not in the range, then it is a good return value. Make
  6703. // sure that R1-1 WAS in the range though, just in case.
  6704. ConstantInt *NextVal =
  6705. ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
  6706. R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
  6707. if (Range.contains(R1Val->getValue()))
  6708. return R1;
  6709. return SE.getCouldNotCompute(); // Something strange happened
  6710. }
  6711. }
  6712. }
  6713. return SE.getCouldNotCompute();
  6714. }
  6715. namespace {
  6716. struct FindUndefs {
  6717. bool Found;
  6718. FindUndefs() : Found(false) {}
  6719. bool follow(const SCEV *S) {
  6720. if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
  6721. if (isa<UndefValue>(C->getValue()))
  6722. Found = true;
  6723. } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
  6724. if (isa<UndefValue>(C->getValue()))
  6725. Found = true;
  6726. }
  6727. // Keep looking if we haven't found it yet.
  6728. return !Found;
  6729. }
  6730. bool isDone() const {
  6731. // Stop recursion if we have found an undef.
  6732. return Found;
  6733. }
  6734. };
  6735. }
  6736. // Return true when S contains at least an undef value.
  6737. static inline bool
  6738. containsUndefs(const SCEV *S) {
  6739. FindUndefs F;
  6740. SCEVTraversal<FindUndefs> ST(F);
  6741. ST.visitAll(S);
  6742. return F.Found;
  6743. }
  6744. namespace {
  6745. // Collect all steps of SCEV expressions.
  6746. struct SCEVCollectStrides {
  6747. ScalarEvolution &SE;
  6748. SmallVectorImpl<const SCEV *> &Strides;
  6749. SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
  6750. : SE(SE), Strides(S) {}
  6751. bool follow(const SCEV *S) {
  6752. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
  6753. Strides.push_back(AR->getStepRecurrence(SE));
  6754. return true;
  6755. }
  6756. bool isDone() const { return false; }
  6757. };
  6758. // Collect all SCEVUnknown and SCEVMulExpr expressions.
  6759. struct SCEVCollectTerms {
  6760. SmallVectorImpl<const SCEV *> &Terms;
  6761. SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
  6762. : Terms(T) {}
  6763. bool follow(const SCEV *S) {
  6764. if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
  6765. if (!containsUndefs(S))
  6766. Terms.push_back(S);
  6767. // Stop recursion: once we collected a term, do not walk its operands.
  6768. return false;
  6769. }
  6770. // Keep looking.
  6771. return true;
  6772. }
  6773. bool isDone() const { return false; }
  6774. };
  6775. }
  6776. /// Find parametric terms in this SCEVAddRecExpr.
  6777. void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
  6778. SmallVectorImpl<const SCEV *> &Terms) {
  6779. SmallVector<const SCEV *, 4> Strides;
  6780. SCEVCollectStrides StrideCollector(*this, Strides);
  6781. visitAll(Expr, StrideCollector);
  6782. DEBUG({
  6783. dbgs() << "Strides:\n";
  6784. for (const SCEV *S : Strides)
  6785. dbgs() << *S << "\n";
  6786. });
  6787. for (const SCEV *S : Strides) {
  6788. SCEVCollectTerms TermCollector(Terms);
  6789. visitAll(S, TermCollector);
  6790. }
  6791. DEBUG({
  6792. dbgs() << "Terms:\n";
  6793. for (const SCEV *T : Terms)
  6794. dbgs() << *T << "\n";
  6795. });
  6796. }
  6797. static bool findArrayDimensionsRec(ScalarEvolution &SE,
  6798. SmallVectorImpl<const SCEV *> &Terms,
  6799. SmallVectorImpl<const SCEV *> &Sizes) {
  6800. int Last = Terms.size() - 1;
  6801. const SCEV *Step = Terms[Last];
  6802. // End of recursion.
  6803. if (Last == 0) {
  6804. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
  6805. SmallVector<const SCEV *, 2> Qs;
  6806. for (const SCEV *Op : M->operands())
  6807. if (!isa<SCEVConstant>(Op))
  6808. Qs.push_back(Op);
  6809. Step = SE.getMulExpr(Qs);
  6810. }
  6811. Sizes.push_back(Step);
  6812. return true;
  6813. }
  6814. for (const SCEV *&Term : Terms) {
  6815. // Normalize the terms before the next call to findArrayDimensionsRec.
  6816. const SCEV *Q, *R;
  6817. SCEVDivision::divide(SE, Term, Step, &Q, &R);
  6818. // Bail out when GCD does not evenly divide one of the terms.
  6819. if (!R->isZero())
  6820. return false;
  6821. Term = Q;
  6822. }
  6823. // Remove all SCEVConstants.
  6824. Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
  6825. return isa<SCEVConstant>(E);
  6826. }),
  6827. Terms.end());
  6828. if (Terms.size() > 0)
  6829. if (!findArrayDimensionsRec(SE, Terms, Sizes))
  6830. return false;
  6831. Sizes.push_back(Step);
  6832. return true;
  6833. }
  6834. namespace {
  6835. struct FindParameter {
  6836. bool FoundParameter;
  6837. FindParameter() : FoundParameter(false) {}
  6838. bool follow(const SCEV *S) {
  6839. if (isa<SCEVUnknown>(S)) {
  6840. FoundParameter = true;
  6841. // Stop recursion: we found a parameter.
  6842. return false;
  6843. }
  6844. // Keep looking.
  6845. return true;
  6846. }
  6847. bool isDone() const {
  6848. // Stop recursion if we have found a parameter.
  6849. return FoundParameter;
  6850. }
  6851. };
  6852. }
  6853. // Returns true when S contains at least a SCEVUnknown parameter.
  6854. static inline bool
  6855. containsParameters(const SCEV *S) {
  6856. FindParameter F;
  6857. SCEVTraversal<FindParameter> ST(F);
  6858. ST.visitAll(S);
  6859. return F.FoundParameter;
  6860. }
  6861. // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
  6862. static inline bool
  6863. containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
  6864. for (const SCEV *T : Terms)
  6865. if (containsParameters(T))
  6866. return true;
  6867. return false;
  6868. }
  6869. // Return the number of product terms in S.
  6870. static inline int numberOfTerms(const SCEV *S) {
  6871. if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
  6872. return Expr->getNumOperands();
  6873. return 1;
  6874. }
  6875. static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
  6876. if (isa<SCEVConstant>(T))
  6877. return nullptr;
  6878. if (isa<SCEVUnknown>(T))
  6879. return T;
  6880. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
  6881. SmallVector<const SCEV *, 2> Factors;
  6882. for (const SCEV *Op : M->operands())
  6883. if (!isa<SCEVConstant>(Op))
  6884. Factors.push_back(Op);
  6885. return SE.getMulExpr(Factors);
  6886. }
  6887. return T;
  6888. }
  6889. /// Return the size of an element read or written by Inst.
  6890. const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
  6891. Type *Ty;
  6892. if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
  6893. Ty = Store->getValueOperand()->getType();
  6894. else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
  6895. Ty = Load->getType();
  6896. else
  6897. return nullptr;
  6898. Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
  6899. return getSizeOfExpr(ETy, Ty);
  6900. }
  6901. /// Second step of delinearization: compute the array dimensions Sizes from the
  6902. /// set of Terms extracted from the memory access function of this SCEVAddRec.
  6903. void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
  6904. SmallVectorImpl<const SCEV *> &Sizes,
  6905. const SCEV *ElementSize) const {
  6906. if (Terms.size() < 1 || !ElementSize)
  6907. return;
  6908. // Early return when Terms do not contain parameters: we do not delinearize
  6909. // non parametric SCEVs.
  6910. if (!containsParameters(Terms))
  6911. return;
  6912. DEBUG({
  6913. dbgs() << "Terms:\n";
  6914. for (const SCEV *T : Terms)
  6915. dbgs() << *T << "\n";
  6916. });
  6917. // Remove duplicates.
  6918. std::sort(Terms.begin(), Terms.end());
  6919. Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
  6920. // Put larger terms first.
  6921. std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
  6922. return numberOfTerms(LHS) > numberOfTerms(RHS);
  6923. });
  6924. ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
  6925. // Divide all terms by the element size.
  6926. for (const SCEV *&Term : Terms) {
  6927. const SCEV *Q, *R;
  6928. SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
  6929. Term = Q;
  6930. }
  6931. SmallVector<const SCEV *, 4> NewTerms;
  6932. // Remove constant factors.
  6933. for (const SCEV *T : Terms)
  6934. if (const SCEV *NewT = removeConstantFactors(SE, T))
  6935. NewTerms.push_back(NewT);
  6936. DEBUG({
  6937. dbgs() << "Terms after sorting:\n";
  6938. for (const SCEV *T : NewTerms)
  6939. dbgs() << *T << "\n";
  6940. });
  6941. if (NewTerms.empty() ||
  6942. !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
  6943. Sizes.clear();
  6944. return;
  6945. }
  6946. // The last element to be pushed into Sizes is the size of an element.
  6947. Sizes.push_back(ElementSize);
  6948. DEBUG({
  6949. dbgs() << "Sizes:\n";
  6950. for (const SCEV *S : Sizes)
  6951. dbgs() << *S << "\n";
  6952. });
  6953. }
  6954. /// Third step of delinearization: compute the access functions for the
  6955. /// Subscripts based on the dimensions in Sizes.
  6956. void ScalarEvolution::computeAccessFunctions(
  6957. const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
  6958. SmallVectorImpl<const SCEV *> &Sizes) {
  6959. // Early exit in case this SCEV is not an affine multivariate function.
  6960. if (Sizes.empty())
  6961. return;
  6962. if (auto AR = dyn_cast<SCEVAddRecExpr>(Expr))
  6963. if (!AR->isAffine())
  6964. return;
  6965. const SCEV *Res = Expr;
  6966. int Last = Sizes.size() - 1;
  6967. for (int i = Last; i >= 0; i--) {
  6968. const SCEV *Q, *R;
  6969. SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
  6970. DEBUG({
  6971. dbgs() << "Res: " << *Res << "\n";
  6972. dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
  6973. dbgs() << "Res divided by Sizes[i]:\n";
  6974. dbgs() << "Quotient: " << *Q << "\n";
  6975. dbgs() << "Remainder: " << *R << "\n";
  6976. });
  6977. Res = Q;
  6978. // Do not record the last subscript corresponding to the size of elements in
  6979. // the array.
  6980. if (i == Last) {
  6981. // Bail out if the remainder is too complex.
  6982. if (isa<SCEVAddRecExpr>(R)) {
  6983. Subscripts.clear();
  6984. Sizes.clear();
  6985. return;
  6986. }
  6987. continue;
  6988. }
  6989. // Record the access function for the current subscript.
  6990. Subscripts.push_back(R);
  6991. }
  6992. // Also push in last position the remainder of the last division: it will be
  6993. // the access function of the innermost dimension.
  6994. Subscripts.push_back(Res);
  6995. std::reverse(Subscripts.begin(), Subscripts.end());
  6996. DEBUG({
  6997. dbgs() << "Subscripts:\n";
  6998. for (const SCEV *S : Subscripts)
  6999. dbgs() << *S << "\n";
  7000. });
  7001. }
  7002. /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
  7003. /// sizes of an array access. Returns the remainder of the delinearization that
  7004. /// is the offset start of the array. The SCEV->delinearize algorithm computes
  7005. /// the multiples of SCEV coefficients: that is a pattern matching of sub
  7006. /// expressions in the stride and base of a SCEV corresponding to the
  7007. /// computation of a GCD (greatest common divisor) of base and stride. When
  7008. /// SCEV->delinearize fails, it returns the SCEV unchanged.
  7009. ///
  7010. /// For example: when analyzing the memory access A[i][j][k] in this loop nest
  7011. ///
  7012. /// void foo(long n, long m, long o, double A[n][m][o]) {
  7013. ///
  7014. /// for (long i = 0; i < n; i++)
  7015. /// for (long j = 0; j < m; j++)
  7016. /// for (long k = 0; k < o; k++)
  7017. /// A[i][j][k] = 1.0;
  7018. /// }
  7019. ///
  7020. /// the delinearization input is the following AddRec SCEV:
  7021. ///
  7022. /// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
  7023. ///
  7024. /// From this SCEV, we are able to say that the base offset of the access is %A
  7025. /// because it appears as an offset that does not divide any of the strides in
  7026. /// the loops:
  7027. ///
  7028. /// CHECK: Base offset: %A
  7029. ///
  7030. /// and then SCEV->delinearize determines the size of some of the dimensions of
  7031. /// the array as these are the multiples by which the strides are happening:
  7032. ///
  7033. /// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
  7034. ///
  7035. /// Note that the outermost dimension remains of UnknownSize because there are
  7036. /// no strides that would help identifying the size of the last dimension: when
  7037. /// the array has been statically allocated, one could compute the size of that
  7038. /// dimension by dividing the overall size of the array by the size of the known
  7039. /// dimensions: %m * %o * 8.
  7040. ///
  7041. /// Finally delinearize provides the access functions for the array reference
  7042. /// that does correspond to A[i][j][k] of the above C testcase:
  7043. ///
  7044. /// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
  7045. ///
  7046. /// The testcases are checking the output of a function pass:
  7047. /// DelinearizationPass that walks through all loads and stores of a function
  7048. /// asking for the SCEV of the memory access with respect to all enclosing
  7049. /// loops, calling SCEV->delinearize on that and printing the results.
  7050. void ScalarEvolution::delinearize(const SCEV *Expr,
  7051. SmallVectorImpl<const SCEV *> &Subscripts,
  7052. SmallVectorImpl<const SCEV *> &Sizes,
  7053. const SCEV *ElementSize) {
  7054. // First step: collect parametric terms.
  7055. SmallVector<const SCEV *, 4> Terms;
  7056. collectParametricTerms(Expr, Terms);
  7057. if (Terms.empty())
  7058. return;
  7059. // Second step: find subscript sizes.
  7060. findArrayDimensions(Terms, Sizes, ElementSize);
  7061. if (Sizes.empty())
  7062. return;
  7063. // Third step: compute the access functions for each subscript.
  7064. computeAccessFunctions(Expr, Subscripts, Sizes);
  7065. if (Subscripts.empty())
  7066. return;
  7067. DEBUG({
  7068. dbgs() << "succeeded to delinearize " << *Expr << "\n";
  7069. dbgs() << "ArrayDecl[UnknownSize]";
  7070. for (const SCEV *S : Sizes)
  7071. dbgs() << "[" << *S << "]";
  7072. dbgs() << "\nArrayRef";
  7073. for (const SCEV *S : Subscripts)
  7074. dbgs() << "[" << *S << "]";
  7075. dbgs() << "\n";
  7076. });
  7077. }
  7078. //===----------------------------------------------------------------------===//
  7079. // SCEVCallbackVH Class Implementation
  7080. //===----------------------------------------------------------------------===//
  7081. void ScalarEvolution::SCEVCallbackVH::deleted() {
  7082. assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
  7083. if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
  7084. SE->ConstantEvolutionLoopExitValue.erase(PN);
  7085. SE->ValueExprMap.erase(getValPtr());
  7086. // this now dangles!
  7087. }
  7088. void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
  7089. assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
  7090. // Forget all the expressions associated with users of the old value,
  7091. // so that future queries will recompute the expressions using the new
  7092. // value.
  7093. Value *Old = getValPtr();
  7094. SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
  7095. SmallPtrSet<User *, 8> Visited;
  7096. while (!Worklist.empty()) {
  7097. User *U = Worklist.pop_back_val();
  7098. // Deleting the Old value will cause this to dangle. Postpone
  7099. // that until everything else is done.
  7100. if (U == Old)
  7101. continue;
  7102. if (!Visited.insert(U).second)
  7103. continue;
  7104. if (PHINode *PN = dyn_cast<PHINode>(U))
  7105. SE->ConstantEvolutionLoopExitValue.erase(PN);
  7106. SE->ValueExprMap.erase(U);
  7107. Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
  7108. }
  7109. // Delete the Old value.
  7110. if (PHINode *PN = dyn_cast<PHINode>(Old))
  7111. SE->ConstantEvolutionLoopExitValue.erase(PN);
  7112. SE->ValueExprMap.erase(Old);
  7113. // this now dangles!
  7114. }
  7115. ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
  7116. : CallbackVH(V), SE(se) {}
  7117. //===----------------------------------------------------------------------===//
  7118. // ScalarEvolution Class Implementation
  7119. //===----------------------------------------------------------------------===//
  7120. ScalarEvolution::ScalarEvolution()
  7121. : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
  7122. LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
  7123. initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
  7124. }
  7125. bool ScalarEvolution::runOnFunction(Function &F) {
  7126. this->F = &F;
  7127. AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  7128. LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  7129. TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
  7130. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  7131. return false;
  7132. }
  7133. void ScalarEvolution::releaseMemory() {
  7134. // Iterate through all the SCEVUnknown instances and call their
  7135. // destructors, so that they release their references to their values.
  7136. for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
  7137. U->~SCEVUnknown();
  7138. FirstUnknown = nullptr;
  7139. ValueExprMap.clear();
  7140. // Free any extra memory created for ExitNotTakenInfo in the unlikely event
  7141. // that a loop had multiple computable exits.
  7142. for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
  7143. BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
  7144. I != E; ++I) {
  7145. I->second.clear();
  7146. }
  7147. assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
  7148. assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
  7149. BackedgeTakenCounts.clear();
  7150. ConstantEvolutionLoopExitValue.clear();
  7151. ValuesAtScopes.clear();
  7152. LoopDispositions.clear();
  7153. BlockDispositions.clear();
  7154. UnsignedRanges.clear();
  7155. SignedRanges.clear();
  7156. UniqueSCEVs.clear();
  7157. SCEVAllocator.Reset();
  7158. }
  7159. void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
  7160. AU.setPreservesAll();
  7161. AU.addRequired<AssumptionCacheTracker>();
  7162. AU.addRequiredTransitive<LoopInfoWrapperPass>();
  7163. AU.addRequiredTransitive<DominatorTreeWrapperPass>();
  7164. AU.addRequired<TargetLibraryInfoWrapperPass>();
  7165. }
  7166. bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
  7167. return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
  7168. }
  7169. static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
  7170. const Loop *L) {
  7171. // Print all inner loops first
  7172. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
  7173. PrintLoopInfo(OS, SE, *I);
  7174. OS << "Loop ";
  7175. L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  7176. OS << ": ";
  7177. SmallVector<BasicBlock *, 8> ExitBlocks;
  7178. L->getExitBlocks(ExitBlocks);
  7179. if (ExitBlocks.size() != 1)
  7180. OS << "<multiple exits> ";
  7181. if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
  7182. OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
  7183. } else {
  7184. OS << "Unpredictable backedge-taken count. ";
  7185. }
  7186. OS << "\n"
  7187. "Loop ";
  7188. L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  7189. OS << ": ";
  7190. if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
  7191. OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
  7192. } else {
  7193. OS << "Unpredictable max backedge-taken count. ";
  7194. }
  7195. OS << "\n";
  7196. }
  7197. void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
  7198. // ScalarEvolution's implementation of the print method is to print
  7199. // out SCEV values of all instructions that are interesting. Doing
  7200. // this potentially causes it to create new SCEV objects though,
  7201. // which technically conflicts with the const qualifier. This isn't
  7202. // observable from outside the class though, so casting away the
  7203. // const isn't dangerous.
  7204. ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
  7205. OS << "Classifying expressions for: ";
  7206. F->printAsOperand(OS, /*PrintType=*/false);
  7207. OS << "\n";
  7208. for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
  7209. if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
  7210. OS << *I << '\n';
  7211. OS << " --> ";
  7212. const SCEV *SV = SE.getSCEV(&*I);
  7213. SV->print(OS);
  7214. if (!isa<SCEVCouldNotCompute>(SV)) {
  7215. OS << " U: ";
  7216. SE.getUnsignedRange(SV).print(OS);
  7217. OS << " S: ";
  7218. SE.getSignedRange(SV).print(OS);
  7219. }
  7220. const Loop *L = LI->getLoopFor((*I).getParent());
  7221. const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
  7222. if (AtUse != SV) {
  7223. OS << " --> ";
  7224. AtUse->print(OS);
  7225. if (!isa<SCEVCouldNotCompute>(AtUse)) {
  7226. OS << " U: ";
  7227. SE.getUnsignedRange(AtUse).print(OS);
  7228. OS << " S: ";
  7229. SE.getSignedRange(AtUse).print(OS);
  7230. }
  7231. }
  7232. if (L) {
  7233. OS << "\t\t" "Exits: ";
  7234. const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
  7235. if (!SE.isLoopInvariant(ExitValue, L)) {
  7236. OS << "<<Unknown>>";
  7237. } else {
  7238. OS << *ExitValue;
  7239. }
  7240. }
  7241. OS << "\n";
  7242. }
  7243. OS << "Determining loop execution counts for: ";
  7244. F->printAsOperand(OS, /*PrintType=*/false);
  7245. OS << "\n";
  7246. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
  7247. PrintLoopInfo(OS, &SE, *I);
  7248. }
  7249. ScalarEvolution::LoopDisposition
  7250. ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
  7251. auto &Values = LoopDispositions[S];
  7252. for (auto &V : Values) {
  7253. if (V.getPointer() == L)
  7254. return V.getInt();
  7255. }
  7256. Values.emplace_back(L, LoopVariant);
  7257. LoopDisposition D = computeLoopDisposition(S, L);
  7258. auto &Values2 = LoopDispositions[S];
  7259. for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
  7260. if (V.getPointer() == L) {
  7261. V.setInt(D);
  7262. break;
  7263. }
  7264. }
  7265. return D;
  7266. }
  7267. ScalarEvolution::LoopDisposition
  7268. ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
  7269. switch (static_cast<SCEVTypes>(S->getSCEVType())) {
  7270. case scConstant:
  7271. return LoopInvariant;
  7272. case scTruncate:
  7273. case scZeroExtend:
  7274. case scSignExtend:
  7275. return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
  7276. case scAddRecExpr: {
  7277. const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
  7278. // If L is the addrec's loop, it's computable.
  7279. if (AR->getLoop() == L)
  7280. return LoopComputable;
  7281. // Add recurrences are never invariant in the function-body (null loop).
  7282. if (!L)
  7283. return LoopVariant;
  7284. // This recurrence is variant w.r.t. L if L contains AR's loop.
  7285. if (L->contains(AR->getLoop()))
  7286. return LoopVariant;
  7287. // This recurrence is invariant w.r.t. L if AR's loop contains L.
  7288. if (AR->getLoop()->contains(L))
  7289. return LoopInvariant;
  7290. // This recurrence is variant w.r.t. L if any of its operands
  7291. // are variant.
  7292. for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
  7293. I != E; ++I)
  7294. if (!isLoopInvariant(*I, L))
  7295. return LoopVariant;
  7296. // Otherwise it's loop-invariant.
  7297. return LoopInvariant;
  7298. }
  7299. case scAddExpr:
  7300. case scMulExpr:
  7301. case scUMaxExpr:
  7302. case scSMaxExpr: {
  7303. const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
  7304. bool HasVarying = false;
  7305. for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
  7306. I != E; ++I) {
  7307. LoopDisposition D = getLoopDisposition(*I, L);
  7308. if (D == LoopVariant)
  7309. return LoopVariant;
  7310. if (D == LoopComputable)
  7311. HasVarying = true;
  7312. }
  7313. return HasVarying ? LoopComputable : LoopInvariant;
  7314. }
  7315. case scUDivExpr: {
  7316. const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
  7317. LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
  7318. if (LD == LoopVariant)
  7319. return LoopVariant;
  7320. LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
  7321. if (RD == LoopVariant)
  7322. return LoopVariant;
  7323. return (LD == LoopInvariant && RD == LoopInvariant) ?
  7324. LoopInvariant : LoopComputable;
  7325. }
  7326. case scUnknown:
  7327. // All non-instruction values are loop invariant. All instructions are loop
  7328. // invariant if they are not contained in the specified loop.
  7329. // Instructions are never considered invariant in the function body
  7330. // (null loop) because they are defined within the "loop".
  7331. if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
  7332. return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
  7333. return LoopInvariant;
  7334. case scCouldNotCompute:
  7335. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  7336. }
  7337. llvm_unreachable("Unknown SCEV kind!");
  7338. }
  7339. bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
  7340. return getLoopDisposition(S, L) == LoopInvariant;
  7341. }
  7342. bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
  7343. return getLoopDisposition(S, L) == LoopComputable;
  7344. }
  7345. ScalarEvolution::BlockDisposition
  7346. ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
  7347. auto &Values = BlockDispositions[S];
  7348. for (auto &V : Values) {
  7349. if (V.getPointer() == BB)
  7350. return V.getInt();
  7351. }
  7352. Values.emplace_back(BB, DoesNotDominateBlock);
  7353. BlockDisposition D = computeBlockDisposition(S, BB);
  7354. auto &Values2 = BlockDispositions[S];
  7355. for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
  7356. if (V.getPointer() == BB) {
  7357. V.setInt(D);
  7358. break;
  7359. }
  7360. }
  7361. return D;
  7362. }
  7363. ScalarEvolution::BlockDisposition
  7364. ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
  7365. switch (static_cast<SCEVTypes>(S->getSCEVType())) {
  7366. case scConstant:
  7367. return ProperlyDominatesBlock;
  7368. case scTruncate:
  7369. case scZeroExtend:
  7370. case scSignExtend:
  7371. return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
  7372. case scAddRecExpr: {
  7373. // This uses a "dominates" query instead of "properly dominates" query
  7374. // to test for proper dominance too, because the instruction which
  7375. // produces the addrec's value is a PHI, and a PHI effectively properly
  7376. // dominates its entire containing block.
  7377. const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
  7378. if (!DT->dominates(AR->getLoop()->getHeader(), BB))
  7379. return DoesNotDominateBlock;
  7380. }
  7381. // FALL THROUGH into SCEVNAryExpr handling.
  7382. case scAddExpr:
  7383. case scMulExpr:
  7384. case scUMaxExpr:
  7385. case scSMaxExpr: {
  7386. const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
  7387. bool Proper = true;
  7388. for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
  7389. I != E; ++I) {
  7390. BlockDisposition D = getBlockDisposition(*I, BB);
  7391. if (D == DoesNotDominateBlock)
  7392. return DoesNotDominateBlock;
  7393. if (D == DominatesBlock)
  7394. Proper = false;
  7395. }
  7396. return Proper ? ProperlyDominatesBlock : DominatesBlock;
  7397. }
  7398. case scUDivExpr: {
  7399. const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
  7400. const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
  7401. BlockDisposition LD = getBlockDisposition(LHS, BB);
  7402. if (LD == DoesNotDominateBlock)
  7403. return DoesNotDominateBlock;
  7404. BlockDisposition RD = getBlockDisposition(RHS, BB);
  7405. if (RD == DoesNotDominateBlock)
  7406. return DoesNotDominateBlock;
  7407. return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
  7408. ProperlyDominatesBlock : DominatesBlock;
  7409. }
  7410. case scUnknown:
  7411. if (Instruction *I =
  7412. dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
  7413. if (I->getParent() == BB)
  7414. return DominatesBlock;
  7415. if (DT->properlyDominates(I->getParent(), BB))
  7416. return ProperlyDominatesBlock;
  7417. return DoesNotDominateBlock;
  7418. }
  7419. return ProperlyDominatesBlock;
  7420. case scCouldNotCompute:
  7421. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  7422. }
  7423. llvm_unreachable("Unknown SCEV kind!");
  7424. }
  7425. bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
  7426. return getBlockDisposition(S, BB) >= DominatesBlock;
  7427. }
  7428. bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
  7429. return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
  7430. }
  7431. namespace {
  7432. // Search for a SCEV expression node within an expression tree.
  7433. // Implements SCEVTraversal::Visitor.
  7434. struct SCEVSearch {
  7435. const SCEV *Node;
  7436. bool IsFound;
  7437. SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
  7438. bool follow(const SCEV *S) {
  7439. IsFound |= (S == Node);
  7440. return !IsFound;
  7441. }
  7442. bool isDone() const { return IsFound; }
  7443. };
  7444. }
  7445. bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
  7446. SCEVSearch Search(Op);
  7447. visitAll(S, Search);
  7448. return Search.IsFound;
  7449. }
  7450. void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
  7451. ValuesAtScopes.erase(S);
  7452. LoopDispositions.erase(S);
  7453. BlockDispositions.erase(S);
  7454. UnsignedRanges.erase(S);
  7455. SignedRanges.erase(S);
  7456. for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
  7457. BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
  7458. BackedgeTakenInfo &BEInfo = I->second;
  7459. if (BEInfo.hasOperand(S, this)) {
  7460. BEInfo.clear();
  7461. BackedgeTakenCounts.erase(I++);
  7462. }
  7463. else
  7464. ++I;
  7465. }
  7466. }
  7467. typedef DenseMap<const Loop *, std::string> VerifyMap;
  7468. /// replaceSubString - Replaces all occurrences of From in Str with To.
  7469. static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
  7470. size_t Pos = 0;
  7471. while ((Pos = Str.find(From, Pos)) != std::string::npos) {
  7472. Str.replace(Pos, From.size(), To.data(), To.size());
  7473. Pos += To.size();
  7474. }
  7475. }
  7476. /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
  7477. static void
  7478. getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
  7479. for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
  7480. getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
  7481. std::string &S = Map[L];
  7482. if (S.empty()) {
  7483. raw_string_ostream OS(S);
  7484. SE.getBackedgeTakenCount(L)->print(OS);
  7485. // false and 0 are semantically equivalent. This can happen in dead loops.
  7486. replaceSubString(OS.str(), "false", "0");
  7487. // Remove wrap flags, their use in SCEV is highly fragile.
  7488. // FIXME: Remove this when SCEV gets smarter about them.
  7489. replaceSubString(OS.str(), "<nw>", "");
  7490. replaceSubString(OS.str(), "<nsw>", "");
  7491. replaceSubString(OS.str(), "<nuw>", "");
  7492. }
  7493. }
  7494. }
  7495. void ScalarEvolution::verifyAnalysis() const {
  7496. if (!VerifySCEV)
  7497. return;
  7498. ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
  7499. // Gather stringified backedge taken counts for all loops using SCEV's caches.
  7500. // FIXME: It would be much better to store actual values instead of strings,
  7501. // but SCEV pointers will change if we drop the caches.
  7502. VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
  7503. for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
  7504. getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
  7505. // Gather stringified backedge taken counts for all loops without using
  7506. // SCEV's caches.
  7507. SE.releaseMemory();
  7508. for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
  7509. getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
  7510. // Now compare whether they're the same with and without caches. This allows
  7511. // verifying that no pass changed the cache.
  7512. assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
  7513. "New loops suddenly appeared!");
  7514. for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
  7515. OldE = BackedgeDumpsOld.end(),
  7516. NewI = BackedgeDumpsNew.begin();
  7517. OldI != OldE; ++OldI, ++NewI) {
  7518. assert(OldI->first == NewI->first && "Loop order changed!");
  7519. // Compare the stringified SCEVs. We don't care if undef backedgetaken count
  7520. // changes.
  7521. // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
  7522. // means that a pass is buggy or SCEV has to learn a new pattern but is
  7523. // usually not harmful.
  7524. if (OldI->second != NewI->second &&
  7525. OldI->second.find("undef") == std::string::npos &&
  7526. NewI->second.find("undef") == std::string::npos &&
  7527. OldI->second != "***COULDNOTCOMPUTE***" &&
  7528. NewI->second != "***COULDNOTCOMPUTE***") {
  7529. dbgs() << "SCEVValidator: SCEV for loop '"
  7530. << OldI->first->getHeader()->getName()
  7531. << "' changed from '" << OldI->second
  7532. << "' to '" << NewI->second << "'!\n";
  7533. std::abort();
  7534. }
  7535. }
  7536. // TODO: Verify more things.
  7537. }