FastWindingNumberForSoups.h 272 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837
  1. // This header created by issuing: `echo "// This header created by issuing: \`$BASH_COMMAND\` $(echo "" | cat - LICENSE README.md | sed -e "s#^..*#\/\/ &#") $(echo "" | cat - SYS_Types.h SYS_Math.h VM_SSEFunc.h VM_SIMDFunc.h VM_SIMD.h UT_Array.h UT_ArrayImpl.h UT_SmallArray.h UT_FixedVector.h UT_ParallelUtil.h UT_BVH.h UT_BVHImpl.h UT_SolidAngle.h UT_Array.cpp UT_SolidAngle.cpp | sed -e "s/^#.*include *\".*$//g")" > ~/Repos/libigl/include/igl/FastWindingNumberForSoups.h`
  2. // MIT License
  3. // Copyright (c) 2018 Side Effects Software Inc.
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. // The above copyright notice and this permission notice shall be included in all
  11. // copies or substantial portions of the Software.
  12. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18. // SOFTWARE.
  19. // # Fast Winding Numbers for Soups
  20. // https://github.com/alecjacobson/WindingNumber
  21. // Implementation of the _ACM SIGGRAPH_ 2018 paper,
  22. // "Fast Winding Numbers for Soups and Clouds"
  23. // Gavin Barill¹, Neil Dickson², Ryan Schmidt³, David I.W. Levin¹, Alec Jacobson¹
  24. // ¹University of Toronto, ²SideFX, ³Gradient Space
  25. // _Note: this implementation is for triangle soups only, not point clouds._
  26. // This version does _not_ depend on Intel TBB. Instead it depends on
  27. // [libigl](https://github.com/libigl/libigl)'s simpler `igl::parallel_for` (which
  28. // uses `std::thread`)
  29. // <del>This code, as written, depends on Intel's Threading Building Blocks (TBB) library for parallelism, but it should be fairly easy to change it to use any other means of threading, since it only uses parallel for loops with simple partitioning.</del>
  30. // The main class of interest is UT_SolidAngle and its init and computeSolidAngle functions, which you can use by including UT_SolidAngle.h, and whose implementation is mostly in UT_SolidAngle.cpp, using a 4-way bounding volume hierarchy (BVH) implemented in the UT_BVH.h and UT_BVHImpl.h headers. The rest of the files are mostly various supporting code. UT_SubtendedAngle, for computing angles subtended by 2D curves, can also be found in UT_SolidAngle.h and UT_SolidAngle.cpp .
  31. // An example of very similar code and how to use it to create a geometry operator (SOP) in Houdini can be found in the HDK examples (toolkit/samples/SOP/SOP_WindingNumber) for Houdini 16.5.121 and later. Query points go in the first input and the mesh geometry goes in the second input.
  32. // Create a single header using:
  33. // echo "// This header created by issuing: \`$BASH_COMMAND\` $(echo "" | cat - LICENSE README.md | sed -e "s#^..*#\/\/ &#") $(echo "" | cat - SYS_Types.h SYS_Math.h VM_SSEFunc.h VM_SIMD.h UT_Array.h UT_ArrayImpl.h UT_SmallArray.h UT_FixedVector.h UT_ParallelUtil.h UT_BVH.h UT_BVHImpl.h UT_SolidAngle.h UT_Array.cpp UT_SolidAngle.cpp | sed -e "s/^#.*include *\".*$//g")"
  34. /*
  35. * Copyright (c) 2018 Side Effects Software Inc.
  36. *
  37. * Permission is hereby granted, free of charge, to any person obtaining a copy
  38. * of this software and associated documentation files (the "Software"), to deal
  39. * in the Software without restriction, including without limitation the rights
  40. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  41. * copies of the Software, and to permit persons to whom the Software is
  42. * furnished to do so, subject to the following conditions:
  43. *
  44. * The above copyright notice and this permission notice shall be included in all
  45. * copies or substantial portions of the Software.
  46. *
  47. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  48. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  49. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  50. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  51. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  52. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  53. * SOFTWARE.
  54. *
  55. * COMMENTS:
  56. * Common type definitions.
  57. */
  58. #pragma once
  59. #ifndef __SYS_Types__
  60. #define __SYS_Types__
  61. /* Include system types */
  62. #include <limits>
  63. #include <type_traits>
  64. #include <sys/types.h>
  65. #include <stdint.h>
  66. #include <cassert>
  67. namespace igl {
  68. /// @private
  69. namespace FastWindingNumber {
  70. /*
  71. * Integer types
  72. */
  73. typedef signed char int8;
  74. typedef unsigned char uint8;
  75. typedef short int16;
  76. typedef unsigned short uint16;
  77. typedef int int32;
  78. typedef unsigned int uint32;
  79. #ifndef MBSD
  80. typedef unsigned int uint;
  81. #endif
  82. /*
  83. * Avoid using uint64.
  84. * The extra bit of precision is NOT worth the cost in pain and suffering
  85. * induced by use of unsigned.
  86. */
  87. #if defined(_WIN32)
  88. typedef __int64 int64;
  89. typedef unsigned __int64 uint64;
  90. #elif defined(MBSD)
  91. // On MBSD, int64/uint64 are also defined in the system headers so we must
  92. // declare these in the same way or else we get conflicts.
  93. typedef int64_t int64;
  94. typedef uint64_t uint64;
  95. #elif defined(AMD64)
  96. typedef long int64;
  97. typedef unsigned long uint64;
  98. #else
  99. typedef long long int64;
  100. typedef unsigned long long uint64;
  101. #endif
  102. /// The problem with int64 is that it implies that it is a fixed 64-bit quantity
  103. /// that is saved to disk. Therefore, we need another integral type for
  104. /// indexing our arrays.
  105. typedef int64 exint;
  106. /// Mark function to be inlined. If this is done, taking the address of such
  107. /// a function is not allowed.
  108. #if defined(__GNUC__) || defined(__clang__)
  109. #define SYS_FORCE_INLINE __attribute__ ((always_inline)) inline
  110. #elif defined(_MSC_VER)
  111. #define SYS_FORCE_INLINE __forceinline
  112. #else
  113. #define SYS_FORCE_INLINE inline
  114. #endif
  115. /// Floating Point Types
  116. typedef float fpreal32;
  117. typedef double fpreal64;
  118. /// SYS_FPRealUnionT for type-safe casting with integral types
  119. template <typename T>
  120. union SYS_FPRealUnionT;
  121. template <>
  122. union SYS_FPRealUnionT<fpreal32>
  123. {
  124. typedef int32 int_type;
  125. typedef uint32 uint_type;
  126. typedef fpreal32 fpreal_type;
  127. enum {
  128. EXPONENT_BITS = 8,
  129. MANTISSA_BITS = 23,
  130. EXPONENT_BIAS = 127 };
  131. int_type ival;
  132. uint_type uval;
  133. fpreal_type fval;
  134. struct
  135. {
  136. uint_type mantissa_val: 23;
  137. uint_type exponent_val: 8;
  138. uint_type sign_val: 1;
  139. };
  140. };
  141. template <>
  142. union SYS_FPRealUnionT<fpreal64>
  143. {
  144. typedef int64 int_type;
  145. typedef uint64 uint_type;
  146. typedef fpreal64 fpreal_type;
  147. enum {
  148. EXPONENT_BITS = 11,
  149. MANTISSA_BITS = 52,
  150. EXPONENT_BIAS = 1023 };
  151. int_type ival;
  152. uint_type uval;
  153. fpreal_type fval;
  154. struct
  155. {
  156. uint_type mantissa_val: 52;
  157. uint_type exponent_val: 11;
  158. uint_type sign_val: 1;
  159. };
  160. };
  161. typedef union SYS_FPRealUnionT<fpreal32> SYS_FPRealUnionF;
  162. typedef union SYS_FPRealUnionT<fpreal64> SYS_FPRealUnionD;
  163. /// Asserts are disabled
  164. /// @{
  165. #define UT_IGL_ASSERT_P(ZZ) ((void)0)
  166. #define UT_IGL_ASSERT(ZZ) ((void)0)
  167. #define UT_IGL_ASSERT_MSG_P(ZZ, MM) ((void)0)
  168. #define UT_IGL_ASSERT_MSG(ZZ, MM) ((void)0)
  169. /// @}
  170. }}
  171. #endif
  172. /*
  173. * Copyright (c) 2018 Side Effects Software Inc.
  174. *
  175. * Permission is hereby granted, free of charge, to any person obtaining a copy
  176. * of this software and associated documentation files (the "Software"), to deal
  177. * in the Software without restriction, including without limitation the rights
  178. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  179. * copies of the Software, and to permit persons to whom the Software is
  180. * furnished to do so, subject to the following conditions:
  181. *
  182. * The above copyright notice and this permission notice shall be included in all
  183. * copies or substantial portions of the Software.
  184. *
  185. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  186. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  187. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  188. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  189. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  190. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  191. * SOFTWARE.
  192. *
  193. * COMMENTS:
  194. * Miscellaneous math functions.
  195. */
  196. #pragma once
  197. #ifndef __SYS_Math__
  198. #define __SYS_Math__
  199. #include <float.h>
  200. #include <limits>
  201. #include <math.h>
  202. namespace igl {
  203. /// @private
  204. namespace FastWindingNumber {
  205. // NOTE:
  206. // These have been carefully written so that in the case of equality
  207. // we always return the first parameter. This is so that NANs in
  208. // in the second parameter are suppressed.
  209. #define h_min(a, b) (((a) > (b)) ? (b) : (a))
  210. #define h_max(a, b) (((a) < (b)) ? (b) : (a))
  211. // DO NOT CHANGE THE ABOVE WITHOUT READING THE COMMENT
  212. #define h_abs(a) (((a) > 0) ? (a) : -(a))
  213. static constexpr inline int16 SYSmin(int16 a, int16 b) { return h_min(a,b); }
  214. static constexpr inline int16 SYSmax(int16 a, int16 b) { return h_max(a,b); }
  215. static constexpr inline int16 SYSabs(int16 a) { return h_abs(a); }
  216. static constexpr inline int32 SYSmin(int32 a, int32 b) { return h_min(a,b); }
  217. static constexpr inline int32 SYSmax(int32 a, int32 b) { return h_max(a,b); }
  218. static constexpr inline int32 SYSabs(int32 a) { return h_abs(a); }
  219. static constexpr inline int64 SYSmin(int64 a, int64 b) { return h_min(a,b); }
  220. static constexpr inline int64 SYSmax(int64 a, int64 b) { return h_max(a,b); }
  221. static constexpr inline int64 SYSmin(int32 a, int64 b) { return h_min(a,b); }
  222. static constexpr inline int64 SYSmax(int32 a, int64 b) { return h_max(a,b); }
  223. static constexpr inline int64 SYSmin(int64 a, int32 b) { return h_min(a,b); }
  224. static constexpr inline int64 SYSmax(int64 a, int32 b) { return h_max(a,b); }
  225. static constexpr inline int64 SYSabs(int64 a) { return h_abs(a); }
  226. static constexpr inline uint16 SYSmin(uint16 a, uint16 b) { return h_min(a,b); }
  227. static constexpr inline uint16 SYSmax(uint16 a, uint16 b) { return h_max(a,b); }
  228. static constexpr inline uint32 SYSmin(uint32 a, uint32 b) { return h_min(a,b); }
  229. static constexpr inline uint32 SYSmax(uint32 a, uint32 b) { return h_max(a,b); }
  230. static constexpr inline uint64 SYSmin(uint64 a, uint64 b) { return h_min(a,b); }
  231. static constexpr inline uint64 SYSmax(uint64 a, uint64 b) { return h_max(a,b); }
  232. static constexpr inline fpreal32 SYSmin(fpreal32 a, fpreal32 b) { return h_min(a,b); }
  233. static constexpr inline fpreal32 SYSmax(fpreal32 a, fpreal32 b) { return h_max(a,b); }
  234. static constexpr inline fpreal64 SYSmin(fpreal64 a, fpreal64 b) { return h_min(a,b); }
  235. static constexpr inline fpreal64 SYSmax(fpreal64 a, fpreal64 b) { return h_max(a,b); }
  236. // Some systems have size_t as a seperate type from uint. Some don't.
  237. #if (defined(LINUX) && defined(IA64)) || defined(MBSD)
  238. static constexpr inline size_t SYSmin(size_t a, size_t b) { return h_min(a,b); }
  239. static constexpr inline size_t SYSmax(size_t a, size_t b) { return h_max(a,b); }
  240. #endif
  241. #undef h_min
  242. #undef h_max
  243. #undef h_abs
  244. #define h_clamp(val, min, max, tol) \
  245. ((val <= min+tol) ? min : ((val >= max-tol) ? max : val))
  246. static constexpr inline int
  247. SYSclamp(int v, int min, int max)
  248. { return h_clamp(v, min, max, 0); }
  249. static constexpr inline uint
  250. SYSclamp(uint v, uint min, uint max)
  251. { return h_clamp(v, min, max, 0); }
  252. static constexpr inline int64
  253. SYSclamp(int64 v, int64 min, int64 max)
  254. { return h_clamp(v, min, max, int64(0)); }
  255. static constexpr inline uint64
  256. SYSclamp(uint64 v, uint64 min, uint64 max)
  257. { return h_clamp(v, min, max, uint64(0)); }
  258. static constexpr inline fpreal32
  259. SYSclamp(fpreal32 v, fpreal32 min, fpreal32 max, fpreal32 tol=(fpreal32)0)
  260. { return h_clamp(v, min, max, tol); }
  261. static constexpr inline fpreal64
  262. SYSclamp(fpreal64 v, fpreal64 min, fpreal64 max, fpreal64 tol=(fpreal64)0)
  263. { return h_clamp(v, min, max, tol); }
  264. #undef h_clamp
  265. static inline fpreal64 SYSsqrt(fpreal64 arg)
  266. { return ::sqrt(arg); }
  267. static inline fpreal32 SYSsqrt(fpreal32 arg)
  268. { return ::sqrtf(arg); }
  269. static inline fpreal64 SYSatan2(fpreal64 a, fpreal64 b)
  270. { return ::atan2(a, b); }
  271. static inline fpreal32 SYSatan2(fpreal32 a, fpreal32 b)
  272. { return ::atan2(a, b); }
  273. static inline fpreal32 SYSabs(fpreal32 a) { return ::fabsf(a); }
  274. static inline fpreal64 SYSabs(fpreal64 a) { return ::fabs(a); }
  275. }}
  276. #endif
  277. /*
  278. * Copyright (c) 2018 Side Effects Software Inc.
  279. *
  280. * Permission is hereby granted, free of charge, to any person obtaining a copy
  281. * of this software and associated documentation files (the "Software"), to deal
  282. * in the Software without restriction, including without limitation the rights
  283. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  284. * copies of the Software, and to permit persons to whom the Software is
  285. * furnished to do so, subject to the following conditions:
  286. *
  287. * The above copyright notice and this permission notice shall be included in all
  288. * copies or substantial portions of the Software.
  289. *
  290. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  291. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  292. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  293. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  294. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  295. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  296. * SOFTWARE.
  297. *
  298. * COMMENTS:
  299. * SIMD wrapper functions for SSE instructions
  300. */
  301. #pragma once
  302. #ifdef __SSE__
  303. #ifndef __VM_SSEFunc__
  304. #define __VM_SSEFunc__
  305. #if defined(_MSC_VER)
  306. #pragma warning(push)
  307. #pragma warning(disable:4799)
  308. #endif
  309. #define CPU_HAS_SIMD_INSTR 1
  310. #define VM_SSE_STYLE 1
  311. #include <emmintrin.h>
  312. #if defined(__SSE4_1__)
  313. #define VM_SSE41_STYLE 1
  314. #include <smmintrin.h>
  315. #endif
  316. #if defined(_MSC_VER)
  317. #pragma warning(pop)
  318. #endif
  319. namespace igl {
  320. /// @private
  321. namespace FastWindingNumber {
  322. typedef __m128 v4sf;
  323. typedef __m128i v4si;
  324. // Plain casting (no conversion)
  325. // MSVC has problems casting between __m128 and __m128i, so we implement a
  326. // custom casting routine specifically for windows.
  327. #if defined(_MSC_VER)
  328. static SYS_FORCE_INLINE v4sf
  329. vm_v4sf(const v4si &a)
  330. {
  331. union {
  332. v4si ival;
  333. v4sf fval;
  334. };
  335. ival = a;
  336. return fval;
  337. }
  338. static SYS_FORCE_INLINE v4si
  339. vm_v4si(const v4sf &a)
  340. {
  341. union {
  342. v4si ival;
  343. v4sf fval;
  344. };
  345. fval = a;
  346. return ival;
  347. }
  348. #define V4SF(A) vm_v4sf(A)
  349. #define V4SI(A) vm_v4si(A)
  350. #else
  351. #define V4SF(A) (v4sf)A
  352. #define V4SI(A) (v4si)A
  353. #endif
  354. #define VM_SHUFFLE_MASK(a0,a1, b0,b1) ((b1)<<6|(b0)<<4 | (a1)<<2|(a0))
  355. template <int mask>
  356. static SYS_FORCE_INLINE v4sf
  357. vm_shuffle(const v4sf &a, const v4sf &b)
  358. {
  359. return _mm_shuffle_ps(a, b, mask);
  360. }
  361. template <int mask>
  362. static SYS_FORCE_INLINE v4si
  363. vm_shuffle(const v4si &a, const v4si &b)
  364. {
  365. return V4SI(_mm_shuffle_ps(V4SF(a), V4SF(b), mask));
  366. }
  367. template <int A, int B, int C, int D, typename T>
  368. static SYS_FORCE_INLINE T
  369. vm_shuffle(const T &a, const T &b)
  370. {
  371. return vm_shuffle<VM_SHUFFLE_MASK(A,B,C,D)>(a, b);
  372. }
  373. template <int mask, typename T>
  374. static SYS_FORCE_INLINE T
  375. vm_shuffle(const T &a)
  376. {
  377. return vm_shuffle<mask>(a, a);
  378. }
  379. template <int A, int B, int C, int D, typename T>
  380. static SYS_FORCE_INLINE T
  381. vm_shuffle(const T &a)
  382. {
  383. return vm_shuffle<A,B,C,D>(a, a);
  384. }
  385. #if defined(VM_SSE41_STYLE)
  386. static SYS_FORCE_INLINE v4si
  387. vm_insert(const v4si v, int32 a, int n)
  388. {
  389. switch (n)
  390. {
  391. case 0: return _mm_insert_epi32(v, a, 0);
  392. case 1: return _mm_insert_epi32(v, a, 1);
  393. case 2: return _mm_insert_epi32(v, a, 2);
  394. case 3: return _mm_insert_epi32(v, a, 3);
  395. }
  396. return v;
  397. }
  398. static SYS_FORCE_INLINE v4sf
  399. vm_insert(const v4sf v, float a, int n)
  400. {
  401. switch (n)
  402. {
  403. case 0: return _mm_insert_ps(v, _mm_set_ss(a), _MM_MK_INSERTPS_NDX(0,0,0));
  404. case 1: return _mm_insert_ps(v, _mm_set_ss(a), _MM_MK_INSERTPS_NDX(0,1,0));
  405. case 2: return _mm_insert_ps(v, _mm_set_ss(a), _MM_MK_INSERTPS_NDX(0,2,0));
  406. case 3: return _mm_insert_ps(v, _mm_set_ss(a), _MM_MK_INSERTPS_NDX(0,3,0));
  407. }
  408. return v;
  409. }
  410. static SYS_FORCE_INLINE int
  411. vm_extract(const v4si v, int n)
  412. {
  413. switch (n)
  414. {
  415. case 0: return _mm_extract_epi32(v, 0);
  416. case 1: return _mm_extract_epi32(v, 1);
  417. case 2: return _mm_extract_epi32(v, 2);
  418. case 3: return _mm_extract_epi32(v, 3);
  419. }
  420. return 0;
  421. }
  422. static SYS_FORCE_INLINE float
  423. vm_extract(const v4sf v, int n)
  424. {
  425. SYS_FPRealUnionF tmp;
  426. switch (n)
  427. {
  428. case 0: tmp.ival = _mm_extract_ps(v, 0); break;
  429. case 1: tmp.ival = _mm_extract_ps(v, 1); break;
  430. case 2: tmp.ival = _mm_extract_ps(v, 2); break;
  431. case 3: tmp.ival = _mm_extract_ps(v, 3); break;
  432. }
  433. return tmp.fval;
  434. }
  435. #else
  436. static SYS_FORCE_INLINE v4si
  437. vm_insert(const v4si v, int32 a, int n)
  438. {
  439. union { v4si vector; int32 comp[4]; };
  440. vector = v;
  441. comp[n] = a;
  442. return vector;
  443. }
  444. static SYS_FORCE_INLINE v4sf
  445. vm_insert(const v4sf v, float a, int n)
  446. {
  447. union { v4sf vector; float comp[4]; };
  448. vector = v;
  449. comp[n] = a;
  450. return vector;
  451. }
  452. static SYS_FORCE_INLINE int
  453. vm_extract(const v4si v, int n)
  454. {
  455. union { v4si vector; int32 comp[4]; };
  456. vector = v;
  457. return comp[n];
  458. }
  459. static SYS_FORCE_INLINE float
  460. vm_extract(const v4sf v, int n)
  461. {
  462. union { v4sf vector; float comp[4]; };
  463. vector = v;
  464. return comp[n];
  465. }
  466. #endif
  467. static SYS_FORCE_INLINE v4sf
  468. vm_splats(float a)
  469. {
  470. return _mm_set1_ps(a);
  471. }
  472. static SYS_FORCE_INLINE v4si
  473. vm_splats(uint32 a)
  474. {
  475. SYS_FPRealUnionF tmp;
  476. tmp.uval = a;
  477. return V4SI(vm_splats(tmp.fval));
  478. }
  479. static SYS_FORCE_INLINE v4si
  480. vm_splats(int32 a)
  481. {
  482. SYS_FPRealUnionF tmp;
  483. tmp.ival = a;
  484. return V4SI(vm_splats(tmp.fval));
  485. }
  486. static SYS_FORCE_INLINE v4sf
  487. vm_splats(float a, float b, float c, float d)
  488. {
  489. return vm_shuffle<0,2,0,2>(
  490. vm_shuffle<0>(_mm_set_ss(a), _mm_set_ss(b)),
  491. vm_shuffle<0>(_mm_set_ss(c), _mm_set_ss(d)));
  492. }
  493. static SYS_FORCE_INLINE v4si
  494. vm_splats(uint32 a, uint32 b, uint32 c, uint32 d)
  495. {
  496. SYS_FPRealUnionF af, bf, cf, df;
  497. af.uval = a;
  498. bf.uval = b;
  499. cf.uval = c;
  500. df.uval = d;
  501. return V4SI(vm_splats(af.fval, bf.fval, cf.fval, df.fval));
  502. }
  503. static SYS_FORCE_INLINE v4si
  504. vm_splats(int32 a, int32 b, int32 c, int32 d)
  505. {
  506. SYS_FPRealUnionF af, bf, cf, df;
  507. af.ival = a;
  508. bf.ival = b;
  509. cf.ival = c;
  510. df.ival = d;
  511. return V4SI(vm_splats(af.fval, bf.fval, cf.fval, df.fval));
  512. }
  513. static SYS_FORCE_INLINE v4si
  514. vm_load(const int32 v[4])
  515. {
  516. return V4SI(_mm_loadu_ps((const float *)v));
  517. }
  518. static SYS_FORCE_INLINE v4sf
  519. vm_load(const float v[4])
  520. {
  521. return _mm_loadu_ps(v);
  522. }
  523. static SYS_FORCE_INLINE void
  524. vm_store(float dst[4], v4sf value)
  525. {
  526. _mm_storeu_ps(dst, value);
  527. }
  528. static SYS_FORCE_INLINE v4sf
  529. vm_negate(v4sf a)
  530. {
  531. return _mm_sub_ps(_mm_setzero_ps(), a);
  532. }
  533. static SYS_FORCE_INLINE v4sf
  534. vm_abs(v4sf a)
  535. {
  536. return _mm_max_ps(a, vm_negate(a));
  537. }
  538. static SYS_FORCE_INLINE v4sf
  539. vm_fdiv(v4sf a, v4sf b)
  540. {
  541. return _mm_mul_ps(a, _mm_rcp_ps(b));
  542. }
  543. static SYS_FORCE_INLINE v4sf
  544. vm_fsqrt(v4sf a)
  545. {
  546. return _mm_rcp_ps(_mm_rsqrt_ps(a));
  547. }
  548. static SYS_FORCE_INLINE v4sf
  549. vm_madd(v4sf a, v4sf b, v4sf c)
  550. {
  551. return _mm_add_ps(_mm_mul_ps(a, b), c);
  552. }
  553. static const v4si theSSETrue = vm_splats(0xFFFFFFFF);
  554. static SYS_FORCE_INLINE bool
  555. vm_allbits(const v4si &a)
  556. {
  557. return _mm_movemask_ps(V4SF(_mm_cmpeq_epi32(a, theSSETrue))) == 0xF;
  558. }
  559. #define VM_EXTRACT vm_extract
  560. #define VM_INSERT vm_insert
  561. #define VM_SPLATS vm_splats
  562. #define VM_LOAD vm_load
  563. #define VM_STORE vm_store
  564. #define VM_CMPLT(A,B) V4SI(_mm_cmplt_ps(A,B))
  565. #define VM_CMPLE(A,B) V4SI(_mm_cmple_ps(A,B))
  566. #define VM_CMPGT(A,B) V4SI(_mm_cmpgt_ps(A,B))
  567. #define VM_CMPGE(A,B) V4SI(_mm_cmpge_ps(A,B))
  568. #define VM_CMPEQ(A,B) V4SI(_mm_cmpeq_ps(A,B))
  569. #define VM_CMPNE(A,B) V4SI(_mm_cmpneq_ps(A,B))
  570. #define VM_ICMPLT _mm_cmplt_epi32
  571. #define VM_ICMPGT _mm_cmpgt_epi32
  572. #define VM_ICMPEQ _mm_cmpeq_epi32
  573. #define VM_IADD _mm_add_epi32
  574. #define VM_ISUB _mm_sub_epi32
  575. #define VM_ADD _mm_add_ps
  576. #define VM_SUB _mm_sub_ps
  577. #define VM_MUL _mm_mul_ps
  578. #define VM_DIV _mm_div_ps
  579. #define VM_SQRT _mm_sqrt_ps
  580. #define VM_ISQRT _mm_rsqrt_ps
  581. #define VM_INVERT _mm_rcp_ps
  582. #define VM_ABS vm_abs
  583. #define VM_FDIV vm_fdiv
  584. #define VM_NEG vm_negate
  585. #define VM_FSQRT vm_fsqrt
  586. #define VM_MADD vm_madd
  587. #define VM_MIN _mm_min_ps
  588. #define VM_MAX _mm_max_ps
  589. #define VM_AND _mm_and_si128
  590. #define VM_ANDNOT _mm_andnot_si128
  591. #define VM_OR _mm_or_si128
  592. #define VM_XOR _mm_xor_si128
  593. #define VM_ALLBITS vm_allbits
  594. #define VM_SHUFFLE vm_shuffle
  595. // Integer to float conversions
  596. #define VM_SSE_ROUND_MASK 0x6000
  597. #define VM_SSE_ROUND_ZERO 0x6000
  598. #define VM_SSE_ROUND_UP 0x4000
  599. #define VM_SSE_ROUND_DOWN 0x2000
  600. #define VM_SSE_ROUND_NEAR 0x0000
  601. #define GETROUND() (_mm_getcsr()&VM_SSE_ROUND_MASK)
  602. #define SETROUND(x) (_mm_setcsr(x|(_mm_getcsr()&~VM_SSE_ROUND_MASK)))
  603. // The P functions must be invoked before FLOOR, the E functions invoked
  604. // afterwards to reset the state.
  605. #define VM_P_FLOOR() uint rounding = GETROUND(); \
  606. SETROUND(VM_SSE_ROUND_DOWN);
  607. #define VM_FLOOR _mm_cvtps_epi32
  608. #define VM_INT _mm_cvttps_epi32
  609. #define VM_E_FLOOR() SETROUND(rounding);
  610. // Float to integer conversion
  611. #define VM_IFLOAT _mm_cvtepi32_ps
  612. }}
  613. #endif
  614. #endif
  615. #pragma once
  616. #ifndef __SSE__
  617. #ifndef __VM_SIMDFunc__
  618. #define __VM_SIMDFunc__
  619. #include <cmath>
  620. namespace igl {
  621. /// @private
  622. namespace FastWindingNumber {
  623. struct v4si {
  624. int32 v[4];
  625. };
  626. struct v4sf {
  627. float v[4];
  628. };
  629. static SYS_FORCE_INLINE v4sf V4SF(const v4si &v) {
  630. static_assert(sizeof(v4si) == sizeof(v4sf) && alignof(v4si) == alignof(v4sf), "v4si and v4sf must be compatible");
  631. return *(const v4sf*)&v;
  632. }
  633. static SYS_FORCE_INLINE v4si V4SI(const v4sf &v) {
  634. static_assert(sizeof(v4si) == sizeof(v4sf) && alignof(v4si) == alignof(v4sf), "v4si and v4sf must be compatible");
  635. return *(const v4si*)&v;
  636. }
  637. static SYS_FORCE_INLINE int32 conditionMask(bool c) {
  638. return c ? int32(0xFFFFFFFF) : 0;
  639. }
  640. static SYS_FORCE_INLINE v4sf
  641. VM_SPLATS(float f) {
  642. return v4sf{{f, f, f, f}};
  643. }
  644. static SYS_FORCE_INLINE v4si
  645. VM_SPLATS(uint32 i) {
  646. return v4si{{int32(i), int32(i), int32(i), int32(i)}};
  647. }
  648. static SYS_FORCE_INLINE v4si
  649. VM_SPLATS(int32 i) {
  650. return v4si{{i, i, i, i}};
  651. }
  652. static SYS_FORCE_INLINE v4sf
  653. VM_SPLATS(float a, float b, float c, float d) {
  654. return v4sf{{a, b, c, d}};
  655. }
  656. static SYS_FORCE_INLINE v4si
  657. VM_SPLATS(uint32 a, uint32 b, uint32 c, uint32 d) {
  658. return v4si{{int32(a), int32(b), int32(c), int32(d)}};
  659. }
  660. static SYS_FORCE_INLINE v4si
  661. VM_SPLATS(int32 a, int32 b, int32 c, int32 d) {
  662. return v4si{{a, b, c, d}};
  663. }
  664. static SYS_FORCE_INLINE v4si
  665. VM_LOAD(const int32 v[4]) {
  666. return v4si{{v[0], v[1], v[2], v[3]}};
  667. }
  668. static SYS_FORCE_INLINE v4sf
  669. VM_LOAD(const float v[4]) {
  670. return v4sf{{v[0], v[1], v[2], v[3]}};
  671. }
  672. static inline v4si VM_ICMPEQ(v4si a, v4si b) {
  673. return v4si{{
  674. conditionMask(a.v[0] == b.v[0]),
  675. conditionMask(a.v[1] == b.v[1]),
  676. conditionMask(a.v[2] == b.v[2]),
  677. conditionMask(a.v[3] == b.v[3])
  678. }};
  679. }
  680. static inline v4si VM_ICMPGT(v4si a, v4si b) {
  681. return v4si{{
  682. conditionMask(a.v[0] > b.v[0]),
  683. conditionMask(a.v[1] > b.v[1]),
  684. conditionMask(a.v[2] > b.v[2]),
  685. conditionMask(a.v[3] > b.v[3])
  686. }};
  687. }
  688. static inline v4si VM_ICMPLT(v4si a, v4si b) {
  689. return v4si{{
  690. conditionMask(a.v[0] < b.v[0]),
  691. conditionMask(a.v[1] < b.v[1]),
  692. conditionMask(a.v[2] < b.v[2]),
  693. conditionMask(a.v[3] < b.v[3])
  694. }};
  695. }
  696. static inline v4si VM_IADD(v4si a, v4si b) {
  697. return v4si{{
  698. (a.v[0] + b.v[0]),
  699. (a.v[1] + b.v[1]),
  700. (a.v[2] + b.v[2]),
  701. (a.v[3] + b.v[3])
  702. }};
  703. }
  704. static inline v4si VM_ISUB(v4si a, v4si b) {
  705. return v4si{{
  706. (a.v[0] - b.v[0]),
  707. (a.v[1] - b.v[1]),
  708. (a.v[2] - b.v[2]),
  709. (a.v[3] - b.v[3])
  710. }};
  711. }
  712. static inline v4si VM_OR(v4si a, v4si b) {
  713. return v4si{{
  714. (a.v[0] | b.v[0]),
  715. (a.v[1] | b.v[1]),
  716. (a.v[2] | b.v[2]),
  717. (a.v[3] | b.v[3])
  718. }};
  719. }
  720. static inline v4si VM_AND(v4si a, v4si b) {
  721. return v4si{{
  722. (a.v[0] & b.v[0]),
  723. (a.v[1] & b.v[1]),
  724. (a.v[2] & b.v[2]),
  725. (a.v[3] & b.v[3])
  726. }};
  727. }
  728. static inline v4si VM_ANDNOT(v4si a, v4si b) {
  729. return v4si{{
  730. ((~a.v[0]) & b.v[0]),
  731. ((~a.v[1]) & b.v[1]),
  732. ((~a.v[2]) & b.v[2]),
  733. ((~a.v[3]) & b.v[3])
  734. }};
  735. }
  736. static inline v4si VM_XOR(v4si a, v4si b) {
  737. return v4si{{
  738. (a.v[0] ^ b.v[0]),
  739. (a.v[1] ^ b.v[1]),
  740. (a.v[2] ^ b.v[2]),
  741. (a.v[3] ^ b.v[3])
  742. }};
  743. }
  744. static SYS_FORCE_INLINE int
  745. VM_EXTRACT(const v4si v, int index) {
  746. return v.v[index];
  747. }
  748. static SYS_FORCE_INLINE float
  749. VM_EXTRACT(const v4sf v, int index) {
  750. return v.v[index];
  751. }
  752. static SYS_FORCE_INLINE v4si
  753. VM_INSERT(v4si v, int32 value, int index) {
  754. v.v[index] = value;
  755. return v;
  756. }
  757. static SYS_FORCE_INLINE v4sf
  758. VM_INSERT(v4sf v, float value, int index) {
  759. v.v[index] = value;
  760. return v;
  761. }
  762. static inline v4si VM_CMPEQ(v4sf a, v4sf b) {
  763. return v4si{{
  764. conditionMask(a.v[0] == b.v[0]),
  765. conditionMask(a.v[1] == b.v[1]),
  766. conditionMask(a.v[2] == b.v[2]),
  767. conditionMask(a.v[3] == b.v[3])
  768. }};
  769. }
  770. static inline v4si VM_CMPNE(v4sf a, v4sf b) {
  771. return v4si{{
  772. conditionMask(a.v[0] != b.v[0]),
  773. conditionMask(a.v[1] != b.v[1]),
  774. conditionMask(a.v[2] != b.v[2]),
  775. conditionMask(a.v[3] != b.v[3])
  776. }};
  777. }
  778. static inline v4si VM_CMPGT(v4sf a, v4sf b) {
  779. return v4si{{
  780. conditionMask(a.v[0] > b.v[0]),
  781. conditionMask(a.v[1] > b.v[1]),
  782. conditionMask(a.v[2] > b.v[2]),
  783. conditionMask(a.v[3] > b.v[3])
  784. }};
  785. }
  786. static inline v4si VM_CMPLT(v4sf a, v4sf b) {
  787. return v4si{{
  788. conditionMask(a.v[0] < b.v[0]),
  789. conditionMask(a.v[1] < b.v[1]),
  790. conditionMask(a.v[2] < b.v[2]),
  791. conditionMask(a.v[3] < b.v[3])
  792. }};
  793. }
  794. static inline v4si VM_CMPGE(v4sf a, v4sf b) {
  795. return v4si{{
  796. conditionMask(a.v[0] >= b.v[0]),
  797. conditionMask(a.v[1] >= b.v[1]),
  798. conditionMask(a.v[2] >= b.v[2]),
  799. conditionMask(a.v[3] >= b.v[3])
  800. }};
  801. }
  802. static inline v4si VM_CMPLE(v4sf a, v4sf b) {
  803. return v4si{{
  804. conditionMask(a.v[0] <= b.v[0]),
  805. conditionMask(a.v[1] <= b.v[1]),
  806. conditionMask(a.v[2] <= b.v[2]),
  807. conditionMask(a.v[3] <= b.v[3])
  808. }};
  809. }
  810. static inline v4sf VM_ADD(v4sf a, v4sf b) {
  811. return v4sf{{
  812. (a.v[0] + b.v[0]),
  813. (a.v[1] + b.v[1]),
  814. (a.v[2] + b.v[2]),
  815. (a.v[3] + b.v[3])
  816. }};
  817. }
  818. static inline v4sf VM_SUB(v4sf a, v4sf b) {
  819. return v4sf{{
  820. (a.v[0] - b.v[0]),
  821. (a.v[1] - b.v[1]),
  822. (a.v[2] - b.v[2]),
  823. (a.v[3] - b.v[3])
  824. }};
  825. }
  826. static inline v4sf VM_NEG(v4sf a) {
  827. return v4sf{{
  828. (-a.v[0]),
  829. (-a.v[1]),
  830. (-a.v[2]),
  831. (-a.v[3])
  832. }};
  833. }
  834. static inline v4sf VM_MUL(v4sf a, v4sf b) {
  835. return v4sf{{
  836. (a.v[0] * b.v[0]),
  837. (a.v[1] * b.v[1]),
  838. (a.v[2] * b.v[2]),
  839. (a.v[3] * b.v[3])
  840. }};
  841. }
  842. static inline v4sf VM_DIV(v4sf a, v4sf b) {
  843. return v4sf{{
  844. (a.v[0] / b.v[0]),
  845. (a.v[1] / b.v[1]),
  846. (a.v[2] / b.v[2]),
  847. (a.v[3] / b.v[3])
  848. }};
  849. }
  850. static inline v4sf VM_MADD(v4sf a, v4sf b, v4sf c) {
  851. return v4sf{{
  852. (a.v[0] * b.v[0]) + c.v[0],
  853. (a.v[1] * b.v[1]) + c.v[1],
  854. (a.v[2] * b.v[2]) + c.v[2],
  855. (a.v[3] * b.v[3]) + c.v[3]
  856. }};
  857. }
  858. static inline v4sf VM_ABS(v4sf a) {
  859. return v4sf{{
  860. (a.v[0] < 0) ? -a.v[0] : a.v[0],
  861. (a.v[1] < 0) ? -a.v[1] : a.v[1],
  862. (a.v[2] < 0) ? -a.v[2] : a.v[2],
  863. (a.v[3] < 0) ? -a.v[3] : a.v[3]
  864. }};
  865. }
  866. static inline v4sf VM_MAX(v4sf a, v4sf b) {
  867. return v4sf{{
  868. (a.v[0] < b.v[0]) ? b.v[0] : a.v[0],
  869. (a.v[1] < b.v[1]) ? b.v[1] : a.v[1],
  870. (a.v[2] < b.v[2]) ? b.v[2] : a.v[2],
  871. (a.v[3] < b.v[3]) ? b.v[3] : a.v[3]
  872. }};
  873. }
  874. static inline v4sf VM_MIN(v4sf a, v4sf b) {
  875. return v4sf{{
  876. (a.v[0] > b.v[0]) ? b.v[0] : a.v[0],
  877. (a.v[1] > b.v[1]) ? b.v[1] : a.v[1],
  878. (a.v[2] > b.v[2]) ? b.v[2] : a.v[2],
  879. (a.v[3] > b.v[3]) ? b.v[3] : a.v[3]
  880. }};
  881. }
  882. static inline v4sf VM_INVERT(v4sf a) {
  883. return v4sf{{
  884. (1.0f/a.v[0]),
  885. (1.0f/a.v[1]),
  886. (1.0f/a.v[2]),
  887. (1.0f/a.v[3])
  888. }};
  889. }
  890. static inline v4sf VM_SQRT(v4sf a) {
  891. return v4sf{{
  892. std::sqrt(a.v[0]),
  893. std::sqrt(a.v[1]),
  894. std::sqrt(a.v[2]),
  895. std::sqrt(a.v[3])
  896. }};
  897. }
  898. static inline v4si VM_INT(v4sf a) {
  899. return v4si{{
  900. int32(a.v[0]),
  901. int32(a.v[1]),
  902. int32(a.v[2]),
  903. int32(a.v[3])
  904. }};
  905. }
  906. static inline v4sf VM_IFLOAT(v4si a) {
  907. return v4sf{{
  908. float(a.v[0]),
  909. float(a.v[1]),
  910. float(a.v[2]),
  911. float(a.v[3])
  912. }};
  913. }
  914. static SYS_FORCE_INLINE void VM_P_FLOOR() {}
  915. static SYS_FORCE_INLINE int32 singleIntFloor(float f) {
  916. // Casting to int32 usually truncates toward zero, instead of rounding down,
  917. // so subtract one if the result is above f.
  918. int32 i = int32(f);
  919. i -= (float(i) > f);
  920. return i;
  921. }
  922. static inline v4si VM_FLOOR(v4sf a) {
  923. return v4si{{
  924. singleIntFloor(a.v[0]),
  925. singleIntFloor(a.v[1]),
  926. singleIntFloor(a.v[2]),
  927. singleIntFloor(a.v[3])
  928. }};
  929. }
  930. static SYS_FORCE_INLINE void VM_E_FLOOR() {}
  931. static SYS_FORCE_INLINE bool vm_allbits(v4si a) {
  932. return (
  933. (a.v[0] == -1) &&
  934. (a.v[1] == -1) &&
  935. (a.v[2] == -1) &&
  936. (a.v[3] == -1)
  937. );
  938. }
  939. int SYS_FORCE_INLINE _mm_movemask_ps(const v4si& v) {
  940. return (
  941. int(v.v[0] < 0) |
  942. (int(v.v[1] < 0)<<1) |
  943. (int(v.v[2] < 0)<<2) |
  944. (int(v.v[3] < 0)<<3)
  945. );
  946. }
  947. int SYS_FORCE_INLINE _mm_movemask_ps(const v4sf& v) {
  948. // Use std::signbit just in case it needs to distinguish between +0 and -0
  949. // or between positive and negative NaN values (e.g. these could really
  950. // be integers instead of floats).
  951. return (
  952. int(std::signbit(v.v[0])) |
  953. (int(std::signbit(v.v[1]))<<1) |
  954. (int(std::signbit(v.v[2]))<<2) |
  955. (int(std::signbit(v.v[3]))<<3)
  956. );
  957. }
  958. }}
  959. #endif
  960. #endif
  961. /*
  962. * Copyright (c) 2018 Side Effects Software Inc.
  963. *
  964. * Permission is hereby granted, free of charge, to any person obtaining a copy
  965. * of this software and associated documentation files (the "Software"), to deal
  966. * in the Software without restriction, including without limitation the rights
  967. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  968. * copies of the Software, and to permit persons to whom the Software is
  969. * furnished to do so, subject to the following conditions:
  970. *
  971. * The above copyright notice and this permission notice shall be included in all
  972. * copies or substantial portions of the Software.
  973. *
  974. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  975. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  976. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  977. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  978. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  979. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  980. * SOFTWARE.
  981. *
  982. * COMMENTS:
  983. * SIMD wrapper classes for 4 floats or 4 ints
  984. */
  985. #pragma once
  986. #ifndef __HDK_VM_SIMD__
  987. #define __HDK_VM_SIMD__
  988. #include <cstdint>
  989. //#define FORCE_NON_SIMD
  990. namespace igl {
  991. /// @private
  992. namespace FastWindingNumber {
  993. class v4uf;
  994. class v4uu {
  995. public:
  996. SYS_FORCE_INLINE v4uu() {}
  997. SYS_FORCE_INLINE v4uu(const v4si &v) : vector(v) {}
  998. SYS_FORCE_INLINE v4uu(const v4uu &v) : vector(v.vector) {}
  999. explicit SYS_FORCE_INLINE v4uu(int32 v) { vector = VM_SPLATS(v); }
  1000. explicit SYS_FORCE_INLINE v4uu(const int32 v[4])
  1001. { vector = VM_LOAD(v); }
  1002. SYS_FORCE_INLINE v4uu(int32 a, int32 b, int32 c, int32 d)
  1003. { vector = VM_SPLATS(a, b, c, d); }
  1004. // Assignment
  1005. SYS_FORCE_INLINE v4uu operator=(int32 v)
  1006. { vector = v4uu(v).vector; return *this; }
  1007. SYS_FORCE_INLINE v4uu operator=(v4si v)
  1008. { vector = v; return *this; }
  1009. SYS_FORCE_INLINE v4uu operator=(const v4uu &v)
  1010. { vector = v.vector; return *this; }
  1011. SYS_FORCE_INLINE void condAssign(const v4uu &val, const v4uu &c)
  1012. { *this = (c & val) | ((!c) & *this); }
  1013. // Comparison
  1014. SYS_FORCE_INLINE v4uu operator == (const v4uu &v) const
  1015. { return v4uu(VM_ICMPEQ(vector, v.vector)); }
  1016. SYS_FORCE_INLINE v4uu operator != (const v4uu &v) const
  1017. { return ~(*this == v); }
  1018. SYS_FORCE_INLINE v4uu operator > (const v4uu &v) const
  1019. { return v4uu(VM_ICMPGT(vector, v.vector)); }
  1020. SYS_FORCE_INLINE v4uu operator < (const v4uu &v) const
  1021. { return v4uu(VM_ICMPLT(vector, v.vector)); }
  1022. SYS_FORCE_INLINE v4uu operator >= (const v4uu &v) const
  1023. { return ~(*this < v); }
  1024. SYS_FORCE_INLINE v4uu operator <= (const v4uu &v) const
  1025. { return ~(*this > v); }
  1026. SYS_FORCE_INLINE v4uu operator == (int32 v) const { return *this == v4uu(v); }
  1027. SYS_FORCE_INLINE v4uu operator != (int32 v) const { return *this != v4uu(v); }
  1028. SYS_FORCE_INLINE v4uu operator > (int32 v) const { return *this > v4uu(v); }
  1029. SYS_FORCE_INLINE v4uu operator < (int32 v) const { return *this < v4uu(v); }
  1030. SYS_FORCE_INLINE v4uu operator >= (int32 v) const { return *this >= v4uu(v); }
  1031. SYS_FORCE_INLINE v4uu operator <= (int32 v) const { return *this <= v4uu(v); }
  1032. // Basic math
  1033. SYS_FORCE_INLINE v4uu operator+(const v4uu &r) const
  1034. { return v4uu(VM_IADD(vector, r.vector)); }
  1035. SYS_FORCE_INLINE v4uu operator-(const v4uu &r) const
  1036. { return v4uu(VM_ISUB(vector, r.vector)); }
  1037. SYS_FORCE_INLINE v4uu operator+=(const v4uu &r) { return (*this = *this + r); }
  1038. SYS_FORCE_INLINE v4uu operator-=(const v4uu &r) { return (*this = *this - r); }
  1039. SYS_FORCE_INLINE v4uu operator+(int32 r) const { return *this + v4uu(r); }
  1040. SYS_FORCE_INLINE v4uu operator-(int32 r) const { return *this - v4uu(r); }
  1041. SYS_FORCE_INLINE v4uu operator+=(int32 r) { return (*this = *this + r); }
  1042. SYS_FORCE_INLINE v4uu operator-=(int32 r) { return (*this = *this - r); }
  1043. // logical/bitwise
  1044. SYS_FORCE_INLINE v4uu operator||(const v4uu &r) const
  1045. { return v4uu(VM_OR(vector, r.vector)); }
  1046. SYS_FORCE_INLINE v4uu operator&&(const v4uu &r) const
  1047. { return v4uu(VM_AND(vector, r.vector)); }
  1048. SYS_FORCE_INLINE v4uu operator^(const v4uu &r) const
  1049. { return v4uu(VM_XOR(vector, r.vector)); }
  1050. SYS_FORCE_INLINE v4uu operator!() const
  1051. { return *this == v4uu(0); }
  1052. SYS_FORCE_INLINE v4uu operator|(const v4uu &r) const { return *this || r; }
  1053. SYS_FORCE_INLINE v4uu operator&(const v4uu &r) const { return *this && r; }
  1054. SYS_FORCE_INLINE v4uu operator~() const
  1055. { return *this ^ v4uu(0xFFFFFFFF); }
  1056. // component
  1057. SYS_FORCE_INLINE int32 operator[](int idx) const { return VM_EXTRACT(vector, idx); }
  1058. SYS_FORCE_INLINE void setComp(int idx, int32 v) { vector = VM_INSERT(vector, v, idx); }
  1059. v4uf toFloat() const;
  1060. public:
  1061. v4si vector;
  1062. };
  1063. class v4uf {
  1064. public:
  1065. SYS_FORCE_INLINE v4uf() {}
  1066. SYS_FORCE_INLINE v4uf(const v4sf &v) : vector(v) {}
  1067. SYS_FORCE_INLINE v4uf(const v4uf &v) : vector(v.vector) {}
  1068. explicit SYS_FORCE_INLINE v4uf(float v) { vector = VM_SPLATS(v); }
  1069. explicit SYS_FORCE_INLINE v4uf(const float v[4])
  1070. { vector = VM_LOAD(v); }
  1071. SYS_FORCE_INLINE v4uf(float a, float b, float c, float d)
  1072. { vector = VM_SPLATS(a, b, c, d); }
  1073. // Assignment
  1074. SYS_FORCE_INLINE v4uf operator=(float v)
  1075. { vector = v4uf(v).vector; return *this; }
  1076. SYS_FORCE_INLINE v4uf operator=(v4sf v)
  1077. { vector = v; return *this; }
  1078. SYS_FORCE_INLINE v4uf operator=(const v4uf &v)
  1079. { vector = v.vector; return *this; }
  1080. SYS_FORCE_INLINE void condAssign(const v4uf &val, const v4uu &c)
  1081. { *this = (val & c) | (*this & ~c); }
  1082. // Comparison
  1083. SYS_FORCE_INLINE v4uu operator == (const v4uf &v) const
  1084. { return v4uu(VM_CMPEQ(vector, v.vector)); }
  1085. SYS_FORCE_INLINE v4uu operator != (const v4uf &v) const
  1086. { return v4uu(VM_CMPNE(vector, v.vector)); }
  1087. SYS_FORCE_INLINE v4uu operator > (const v4uf &v) const
  1088. { return v4uu(VM_CMPGT(vector, v.vector)); }
  1089. SYS_FORCE_INLINE v4uu operator < (const v4uf &v) const
  1090. { return v4uu(VM_CMPLT(vector, v.vector)); }
  1091. SYS_FORCE_INLINE v4uu operator >= (const v4uf &v) const
  1092. { return v4uu(VM_CMPGE(vector, v.vector)); }
  1093. SYS_FORCE_INLINE v4uu operator <= (const v4uf &v) const
  1094. { return v4uu(VM_CMPLE(vector, v.vector)); }
  1095. SYS_FORCE_INLINE v4uu operator == (float v) const { return *this == v4uf(v); }
  1096. SYS_FORCE_INLINE v4uu operator != (float v) const { return *this != v4uf(v); }
  1097. SYS_FORCE_INLINE v4uu operator > (float v) const { return *this > v4uf(v); }
  1098. SYS_FORCE_INLINE v4uu operator < (float v) const { return *this < v4uf(v); }
  1099. SYS_FORCE_INLINE v4uu operator >= (float v) const { return *this >= v4uf(v); }
  1100. SYS_FORCE_INLINE v4uu operator <= (float v) const { return *this <= v4uf(v); }
  1101. // Basic math
  1102. SYS_FORCE_INLINE v4uf operator+(const v4uf &r) const
  1103. { return v4uf(VM_ADD(vector, r.vector)); }
  1104. SYS_FORCE_INLINE v4uf operator-(const v4uf &r) const
  1105. { return v4uf(VM_SUB(vector, r.vector)); }
  1106. SYS_FORCE_INLINE v4uf operator-() const
  1107. { return v4uf(VM_NEG(vector)); }
  1108. SYS_FORCE_INLINE v4uf operator*(const v4uf &r) const
  1109. { return v4uf(VM_MUL(vector, r.vector)); }
  1110. SYS_FORCE_INLINE v4uf operator/(const v4uf &r) const
  1111. { return v4uf(VM_DIV(vector, r.vector)); }
  1112. SYS_FORCE_INLINE v4uf operator+=(const v4uf &r) { return (*this = *this + r); }
  1113. SYS_FORCE_INLINE v4uf operator-=(const v4uf &r) { return (*this = *this - r); }
  1114. SYS_FORCE_INLINE v4uf operator*=(const v4uf &r) { return (*this = *this * r); }
  1115. SYS_FORCE_INLINE v4uf operator/=(const v4uf &r) { return (*this = *this / r); }
  1116. SYS_FORCE_INLINE v4uf operator+(float r) const { return *this + v4uf(r); }
  1117. SYS_FORCE_INLINE v4uf operator-(float r) const { return *this - v4uf(r); }
  1118. SYS_FORCE_INLINE v4uf operator*(float r) const { return *this * v4uf(r); }
  1119. SYS_FORCE_INLINE v4uf operator/(float r) const { return *this / v4uf(r); }
  1120. SYS_FORCE_INLINE v4uf operator+=(float r) { return (*this = *this + r); }
  1121. SYS_FORCE_INLINE v4uf operator-=(float r) { return (*this = *this - r); }
  1122. SYS_FORCE_INLINE v4uf operator*=(float r) { return (*this = *this * r); }
  1123. SYS_FORCE_INLINE v4uf operator/=(float r) { return (*this = *this / r); }
  1124. // logical/bitwise
  1125. SYS_FORCE_INLINE v4uf operator||(const v4uu &r) const
  1126. { return v4uf(V4SF(VM_OR(V4SI(vector), r.vector))); }
  1127. SYS_FORCE_INLINE v4uf operator&&(const v4uu &r) const
  1128. { return v4uf(V4SF(VM_AND(V4SI(vector), r.vector))); }
  1129. SYS_FORCE_INLINE v4uf operator^(const v4uu &r) const
  1130. { return v4uf(V4SF(VM_XOR(V4SI(vector), r.vector))); }
  1131. SYS_FORCE_INLINE v4uf operator!() const
  1132. { return v4uf(V4SF((*this == v4uf(0.0F)).vector)); }
  1133. SYS_FORCE_INLINE v4uf operator||(const v4uf &r) const
  1134. { return v4uf(V4SF(VM_OR(V4SI(vector), V4SI(r.vector)))); }
  1135. SYS_FORCE_INLINE v4uf operator&&(const v4uf &r) const
  1136. { return v4uf(V4SF(VM_AND(V4SI(vector), V4SI(r.vector)))); }
  1137. SYS_FORCE_INLINE v4uf operator^(const v4uf &r) const
  1138. { return v4uf(V4SF(VM_XOR(V4SI(vector), V4SI(r.vector)))); }
  1139. SYS_FORCE_INLINE v4uf operator|(const v4uu &r) const { return *this || r; }
  1140. SYS_FORCE_INLINE v4uf operator&(const v4uu &r) const { return *this && r; }
  1141. SYS_FORCE_INLINE v4uf operator~() const
  1142. { return *this ^ v4uu(0xFFFFFFFF); }
  1143. SYS_FORCE_INLINE v4uf operator|(const v4uf &r) const { return *this || r; }
  1144. SYS_FORCE_INLINE v4uf operator&(const v4uf &r) const { return *this && r; }
  1145. // component
  1146. SYS_FORCE_INLINE float operator[](int idx) const { return VM_EXTRACT(vector, idx); }
  1147. SYS_FORCE_INLINE void setComp(int idx, float v) { vector = VM_INSERT(vector, v, idx); }
  1148. // more math
  1149. SYS_FORCE_INLINE v4uf abs() const { return v4uf(VM_ABS(vector)); }
  1150. SYS_FORCE_INLINE v4uf clamp(const v4uf &low, const v4uf &high) const
  1151. { return v4uf(
  1152. VM_MIN(VM_MAX(vector, low.vector), high.vector)); }
  1153. SYS_FORCE_INLINE v4uf clamp(float low, float high) const
  1154. { return v4uf(VM_MIN(VM_MAX(vector,
  1155. v4uf(low).vector), v4uf(high).vector)); }
  1156. SYS_FORCE_INLINE v4uf recip() const { return v4uf(VM_INVERT(vector)); }
  1157. /// This is a lie, it is a signed int.
  1158. SYS_FORCE_INLINE v4uu toUnsignedInt() const { return VM_INT(vector); }
  1159. SYS_FORCE_INLINE v4uu toSignedInt() const { return VM_INT(vector); }
  1160. v4uu floor() const
  1161. {
  1162. VM_P_FLOOR();
  1163. v4uu result = VM_FLOOR(vector);
  1164. VM_E_FLOOR();
  1165. return result;
  1166. }
  1167. /// Returns the integer part of this float, this becomes the
  1168. /// 0..1 fractional component.
  1169. v4uu splitFloat()
  1170. {
  1171. v4uu base = toSignedInt();
  1172. *this -= base.toFloat();
  1173. return base;
  1174. }
  1175. #ifdef __SSE__
  1176. template <int A, int B, int C, int D>
  1177. SYS_FORCE_INLINE v4uf swizzle() const
  1178. {
  1179. return VM_SHUFFLE<A,B,C,D>(vector);
  1180. }
  1181. #endif
  1182. SYS_FORCE_INLINE v4uu isFinite() const
  1183. {
  1184. // If the exponent is the maximum value, it's either infinite or NaN.
  1185. const v4si mask = VM_SPLATS(0x7F800000);
  1186. return ~v4uu(VM_ICMPEQ(VM_AND(V4SI(vector), mask), mask));
  1187. }
  1188. public:
  1189. v4sf vector;
  1190. };
  1191. SYS_FORCE_INLINE v4uf
  1192. v4uu::toFloat() const
  1193. {
  1194. return v4uf(VM_IFLOAT(vector));
  1195. }
  1196. //
  1197. // Custom vector operations
  1198. //
  1199. static SYS_FORCE_INLINE v4uf
  1200. sqrt(const v4uf &a)
  1201. {
  1202. return v4uf(VM_SQRT(a.vector));
  1203. }
  1204. static SYS_FORCE_INLINE v4uf
  1205. fabs(const v4uf &a)
  1206. {
  1207. return a.abs();
  1208. }
  1209. // Use this operation to mask disabled values to 0
  1210. // rval = !a ? b : 0;
  1211. static SYS_FORCE_INLINE v4uf
  1212. andn(const v4uu &a, const v4uf &b)
  1213. {
  1214. return v4uf(V4SF(VM_ANDNOT(a.vector, V4SI(b.vector))));
  1215. }
  1216. static SYS_FORCE_INLINE v4uu
  1217. andn(const v4uu &a, const v4uu &b)
  1218. {
  1219. return v4uu(VM_ANDNOT(a.vector, b.vector));
  1220. }
  1221. // rval = a ? b : c;
  1222. static SYS_FORCE_INLINE v4uf
  1223. ternary(const v4uu &a, const v4uf &b, const v4uf &c)
  1224. {
  1225. return (b & a) | andn(a, c);
  1226. }
  1227. static SYS_FORCE_INLINE v4uu
  1228. ternary(const v4uu &a, const v4uu &b, const v4uu &c)
  1229. {
  1230. return (b & a) | andn(a, c);
  1231. }
  1232. // rval = !(a && b)
  1233. static SYS_FORCE_INLINE v4uu
  1234. nand(const v4uu &a, const v4uu &b)
  1235. {
  1236. return !v4uu(VM_AND(a.vector, b.vector));
  1237. }
  1238. static SYS_FORCE_INLINE v4uf
  1239. vmin(const v4uf &a, const v4uf &b)
  1240. {
  1241. return v4uf(VM_MIN(a.vector, b.vector));
  1242. }
  1243. static SYS_FORCE_INLINE v4uf
  1244. vmax(const v4uf &a, const v4uf &b)
  1245. {
  1246. return v4uf(VM_MAX(a.vector, b.vector));
  1247. }
  1248. static SYS_FORCE_INLINE v4uf
  1249. clamp(const v4uf &a, const v4uf &b, const v4uf &c)
  1250. {
  1251. return vmax(vmin(a, c), b);
  1252. }
  1253. static SYS_FORCE_INLINE v4uf
  1254. clamp(const v4uf &a, float b, float c)
  1255. {
  1256. return vmax(vmin(a, v4uf(c)), v4uf(b));
  1257. }
  1258. static SYS_FORCE_INLINE bool
  1259. allbits(const v4uu &a)
  1260. {
  1261. return vm_allbits(a.vector);
  1262. }
  1263. static SYS_FORCE_INLINE bool
  1264. anybits(const v4uu &a)
  1265. {
  1266. return !allbits(~a);
  1267. }
  1268. static SYS_FORCE_INLINE v4uf
  1269. madd(const v4uf &v, const v4uf &f, const v4uf &a)
  1270. {
  1271. return v4uf(VM_MADD(v.vector, f.vector, a.vector));
  1272. }
  1273. static SYS_FORCE_INLINE v4uf
  1274. madd(const v4uf &v, float f, float a)
  1275. {
  1276. return v4uf(VM_MADD(v.vector, v4uf(f).vector, v4uf(a).vector));
  1277. }
  1278. static SYS_FORCE_INLINE v4uf
  1279. madd(const v4uf &v, float f, const v4uf &a)
  1280. {
  1281. return v4uf(VM_MADD(v.vector, v4uf(f).vector, a.vector));
  1282. }
  1283. static SYS_FORCE_INLINE v4uf
  1284. msub(const v4uf &v, const v4uf &f, const v4uf &s)
  1285. {
  1286. return madd(v, f, -s);
  1287. }
  1288. static SYS_FORCE_INLINE v4uf
  1289. msub(const v4uf &v, float f, float s)
  1290. {
  1291. return madd(v, f, -s);
  1292. }
  1293. static SYS_FORCE_INLINE v4uf
  1294. lerp(const v4uf &a, const v4uf &b, const v4uf &w)
  1295. {
  1296. v4uf w1 = v4uf(1.0F) - w;
  1297. return madd(a, w1, b*w);
  1298. }
  1299. static SYS_FORCE_INLINE v4uf
  1300. luminance(const v4uf &r, const v4uf &g, const v4uf &b,
  1301. float rw, float gw, float bw)
  1302. {
  1303. return v4uf(madd(r, v4uf(rw), madd(g, v4uf(gw), b * bw)));
  1304. }
  1305. static SYS_FORCE_INLINE float
  1306. dot3(const v4uf &a, const v4uf &b)
  1307. {
  1308. v4uf res = a*b;
  1309. return res[0] + res[1] + res[2];
  1310. }
  1311. static SYS_FORCE_INLINE float
  1312. dot4(const v4uf &a, const v4uf &b)
  1313. {
  1314. v4uf res = a*b;
  1315. return res[0] + res[1] + res[2] + res[3];
  1316. }
  1317. static SYS_FORCE_INLINE float
  1318. length(const v4uf &a)
  1319. {
  1320. return SYSsqrt(dot3(a, a));
  1321. }
  1322. static SYS_FORCE_INLINE v4uf
  1323. normalize(const v4uf &a)
  1324. {
  1325. return a / length(a);
  1326. }
  1327. static SYS_FORCE_INLINE v4uf
  1328. cross(const v4uf &a, const v4uf &b)
  1329. {
  1330. return v4uf(a[1]*b[2] - a[2]*b[1],
  1331. a[2]*b[0] - a[0]*b[2],
  1332. a[0]*b[1] - a[1]*b[0], 0);
  1333. }
  1334. // Currently there is no specific support for signed integers
  1335. typedef v4uu v4ui;
  1336. // Assuming that ptr is an array of elements of type STYPE, this operation
  1337. // will return the index of the first element that is aligned to (1<<ASIZE)
  1338. // bytes.
  1339. #define VM_ALIGN(ptr, ASIZE, STYPE) \
  1340. ((((1<<ASIZE)-(intptr_t)ptr)&((1<<ASIZE)-1))/sizeof(STYPE))
  1341. }}
  1342. #endif
  1343. /*
  1344. * Copyright (c) 2018 Side Effects Software Inc.
  1345. *
  1346. * Permission is hereby granted, free of charge, to any person obtaining a copy
  1347. * of this software and associated documentation files (the "Software"), to deal
  1348. * in the Software without restriction, including without limitation the rights
  1349. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  1350. * copies of the Software, and to permit persons to whom the Software is
  1351. * furnished to do so, subject to the following conditions:
  1352. *
  1353. * The above copyright notice and this permission notice shall be included in all
  1354. * copies or substantial portions of the Software.
  1355. *
  1356. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  1357. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  1358. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  1359. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  1360. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  1361. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  1362. * SOFTWARE.
  1363. *
  1364. * COMMENTS:
  1365. * This is the array class implementation used by almost everything here.
  1366. */
  1367. #pragma once
  1368. #ifndef __UT_ARRAY_H_INCLUDED__
  1369. #define __UT_ARRAY_H_INCLUDED__
  1370. #include <algorithm>
  1371. #include <functional>
  1372. #include <type_traits>
  1373. #include <string.h>
  1374. namespace igl {
  1375. /// @private
  1376. namespace FastWindingNumber {
  1377. /// This routine describes how to change the size of an array.
  1378. /// It must increase the current_size by at least one!
  1379. ///
  1380. /// Current expected sequence of small sizes:
  1381. /// 4, 8, 16, 32, 48, 64, 80, 96, 112,
  1382. /// 128, 256, 384, 512, 640, 768, 896, 1024,
  1383. /// (increases by approx factor of 1.125 each time after this)
  1384. template <typename T>
  1385. static inline T
  1386. UTbumpAlloc(T current_size)
  1387. {
  1388. // NOTE: These must be powers of two. See below.
  1389. constexpr T SMALL_ALLOC(16);
  1390. constexpr T BIG_ALLOC(128);
  1391. // For small values, we increment by fixed amounts. For
  1392. // large values, we increment by one eighth of the current size.
  1393. // This prevents n^2 behaviour with allocation one element at a time.
  1394. // A factor of 1/8 will waste 1/16 the memory on average, and will
  1395. // double the size of the array in approximately 6 reallocations.
  1396. if (current_size < T(8))
  1397. {
  1398. return (current_size < T(4)) ? T(4) : T(8);
  1399. }
  1400. if (current_size < T(BIG_ALLOC))
  1401. {
  1402. // Snap up to next multiple of SMALL_ALLOC (must be power of 2)
  1403. return (current_size + T(SMALL_ALLOC)) & ~T(SMALL_ALLOC-1);
  1404. }
  1405. if (current_size < T(BIG_ALLOC * 8))
  1406. {
  1407. // Snap up to next multiple of BIG_ALLOC (must be power of 2)
  1408. return (current_size + T(BIG_ALLOC)) & ~T(BIG_ALLOC-1);
  1409. }
  1410. T bump = current_size >> 3; // Divided by 8.
  1411. current_size += bump;
  1412. return current_size;
  1413. }
  1414. template <typename T>
  1415. class UT_Array
  1416. {
  1417. public:
  1418. typedef T value_type;
  1419. typedef int (*Comparator)(const T *, const T *);
  1420. /// Copy constructor. It duplicates the data.
  1421. /// It's marked explicit so that it's not accidentally passed by value.
  1422. /// You can always pass by reference and then copy it, if needed.
  1423. /// If you have a line like:
  1424. /// UT_Array<int> a = otherarray;
  1425. /// and it really does need to copy instead of referencing,
  1426. /// you can rewrite it as:
  1427. /// UT_Array<int> a(otherarray);
  1428. inline explicit UT_Array(const UT_Array<T> &a);
  1429. /// Move constructor. Steals the working data from the original.
  1430. inline UT_Array(UT_Array<T> &&a) noexcept;
  1431. /// Construct based on given capacity and size
  1432. UT_Array(exint capacity, exint size)
  1433. {
  1434. myData = capacity ? allocateCapacity(capacity) : NULL;
  1435. if (capacity < size)
  1436. size = capacity;
  1437. mySize = size;
  1438. myCapacity = capacity;
  1439. trivialConstructRange(myData, mySize);
  1440. }
  1441. /// Construct based on given capacity with a size of 0
  1442. explicit UT_Array(exint capacity = 0) : myCapacity(capacity), mySize(0)
  1443. {
  1444. myData = capacity ? allocateCapacity(capacity) : NULL;
  1445. }
  1446. /// Construct with the contents of an initializer list
  1447. inline explicit UT_Array(std::initializer_list<T> init);
  1448. inline ~UT_Array();
  1449. inline void swap(UT_Array<T> &other);
  1450. /// Append an element to the current elements and return its index in the
  1451. /// array, or insert the element at a specified position; if necessary,
  1452. /// insert() grows the array to accommodate the element. The insert
  1453. /// methods use the assignment operator '=' to place the element into the
  1454. /// right spot; be aware that '=' works differently on objects and pointers.
  1455. /// The test for duplicates uses the logical equal operator '=='; as with
  1456. /// '=', the behaviour of the equality operator on pointers versus objects
  1457. /// is not the same.
  1458. /// Use the subscript operators instead of insert() if you are appending
  1459. /// to the array, or if you don't mind overwriting the element already
  1460. /// inserted at the given index.
  1461. exint append(void) { return insert(mySize); }
  1462. exint append(const T &t) { return appendImpl(t); }
  1463. exint append(T &&t) { return appendImpl(std::move(t)); }
  1464. inline void append(const T *pt, exint count);
  1465. inline void appendMultiple(const T &t, exint count);
  1466. inline exint insert(exint index);
  1467. exint insert(const T &t, exint i)
  1468. { return insertImpl(t, i); }
  1469. exint insert(T &&t, exint i)
  1470. { return insertImpl(std::move(t), i); }
  1471. /// Adds a new element to the array (resizing if necessary) and forwards
  1472. /// the given arguments to T's constructor.
  1473. /// NOTE: Unlike append(), the arguments cannot reference any existing
  1474. /// elements in the array. Checking for and handling such cases would
  1475. /// remove most of the performance gain versus append(T(...)). Debug builds
  1476. /// will assert that the arguments are valid.
  1477. template <typename... S>
  1478. inline exint emplace_back(S&&... s);
  1479. /// Takes another T array and concatenate it onto my end
  1480. inline exint concat(const UT_Array<T> &a);
  1481. /// Insert an element "count" times at the given index. Return the index.
  1482. inline exint multipleInsert(exint index, exint count);
  1483. /// An alias for unique element insertion at a certain index. Also used by
  1484. /// the other insertion methods.
  1485. exint insertAt(const T &t, exint index)
  1486. { return insertImpl(t, index); }
  1487. /// Return true if given index is valid.
  1488. bool isValidIndex(exint index) const
  1489. { return (index >= 0 && index < mySize); }
  1490. /// Remove one element from the array given its
  1491. /// position in the list, and fill the gap by shifting the elements down
  1492. /// by one position. Return the index of the element removed or -1 if
  1493. /// the index was out of bounds.
  1494. exint removeIndex(exint index)
  1495. {
  1496. return isValidIndex(index) ? removeAt(index) : -1;
  1497. }
  1498. void removeLast()
  1499. {
  1500. if (mySize) removeAt(mySize-1);
  1501. }
  1502. /// Remove the range [begin_i,end_i) of elements from the array.
  1503. inline void removeRange(exint begin_i, exint end_i);
  1504. /// Remove the range [begin_i, end_i) of elements from this array and place
  1505. /// them in the dest array, shrinking/growing the dest array as necessary.
  1506. inline void extractRange(exint begin_i, exint end_i,
  1507. UT_Array<T>& dest);
  1508. /// Removes all matching elements from the list, shuffling down and changing
  1509. /// the size appropriately.
  1510. /// Returns the number of elements left.
  1511. template <typename IsEqual>
  1512. inline exint removeIf(IsEqual is_equal);
  1513. /// Remove all matching elements. Also sets the capacity of the array.
  1514. template <typename IsEqual>
  1515. void collapseIf(IsEqual is_equal)
  1516. {
  1517. removeIf(is_equal);
  1518. setCapacity(size());
  1519. }
  1520. /// Move howMany objects starting at index srcIndex to destIndex;
  1521. /// This method will remove the elements at [srcIdx, srcIdx+howMany) and
  1522. /// then insert them at destIdx. This method can be used in place of
  1523. /// the old shift() operation.
  1524. inline void move(exint srcIdx, exint destIdx, exint howMany);
  1525. /// Cyclically shifts the entire array by howMany
  1526. inline void cycle(exint howMany);
  1527. /// Quickly set the array to a single value.
  1528. inline void constant(const T &v);
  1529. /// Zeros the array if a POD type, else trivial constructs if a class type.
  1530. inline void zero();
  1531. /// The fastest search possible, which does pointer arithmetic to find the
  1532. /// index of the element. WARNING: index() does no out-of-bounds checking.
  1533. exint index(const T &t) const { return &t - myData; }
  1534. exint safeIndex(const T &t) const
  1535. {
  1536. return (&t >= myData && &t < (myData + mySize))
  1537. ? &t - myData : -1;
  1538. }
  1539. /// Set the capacity of the array, i.e. grow it or shrink it. The
  1540. /// function copies the data after reallocating space for the array.
  1541. inline void setCapacity(exint newcapacity);
  1542. void setCapacityIfNeeded(exint mincapacity)
  1543. {
  1544. if (capacity() < mincapacity)
  1545. setCapacity(mincapacity);
  1546. }
  1547. /// If the capacity is smaller than mincapacity, expand the array
  1548. /// to at least mincapacity and to at least a constant factor of the
  1549. /// array's previous capacity, to avoid having a linear number of
  1550. /// reallocations in a linear number of calls to bumpCapacity.
  1551. void bumpCapacity(exint mincapacity)
  1552. {
  1553. if (capacity() >= mincapacity)
  1554. return;
  1555. // The following 4 lines are just
  1556. // SYSmax(mincapacity, UTbumpAlloc(capacity())), avoiding SYSmax
  1557. exint bumped = UTbumpAlloc(capacity());
  1558. exint newcapacity = mincapacity;
  1559. if (bumped > mincapacity)
  1560. newcapacity = bumped;
  1561. setCapacity(newcapacity);
  1562. }
  1563. /// First bumpCapacity to ensure that there's space for newsize,
  1564. /// expanding either not at all or by at least a constant factor
  1565. /// of the array's previous capacity,
  1566. /// then set the size to newsize.
  1567. void bumpSize(exint newsize)
  1568. {
  1569. bumpCapacity(newsize);
  1570. setSize(newsize);
  1571. }
  1572. /// NOTE: bumpEntries() will be deprecated in favour of bumpSize() in a
  1573. /// future version.
  1574. void bumpEntries(exint newsize)
  1575. {
  1576. bumpSize(newsize);
  1577. }
  1578. /// Query the capacity, i.e. the allocated length of the array.
  1579. /// NOTE: capacity() >= size().
  1580. exint capacity() const { return myCapacity; }
  1581. /// Query the size, i.e. the number of occupied elements in the array.
  1582. /// NOTE: capacity() >= size().
  1583. exint size() const { return mySize; }
  1584. /// Alias of size(). size() is preferred.
  1585. exint entries() const { return mySize; }
  1586. /// Returns true iff there are no occupied elements in the array.
  1587. bool isEmpty() const { return mySize==0; }
  1588. /// Set the size, the number of occupied elements in the array.
  1589. /// NOTE: This will not do bumpCapacity, so if you call this
  1590. /// n times to increase the size, it may take
  1591. /// n^2 time.
  1592. void setSize(exint newsize)
  1593. {
  1594. if (newsize < 0)
  1595. newsize = 0;
  1596. if (newsize == mySize)
  1597. return;
  1598. setCapacityIfNeeded(newsize);
  1599. if (mySize > newsize)
  1600. trivialDestructRange(myData + newsize, mySize - newsize);
  1601. else // newsize > mySize
  1602. trivialConstructRange(myData + mySize, newsize - mySize);
  1603. mySize = newsize;
  1604. }
  1605. /// Alias of setSize(). setSize() is preferred.
  1606. void entries(exint newsize)
  1607. {
  1608. setSize(newsize);
  1609. }
  1610. /// Set the size, but unlike setSize(newsize), this function
  1611. /// will not initialize new POD elements to zero. Non-POD data types
  1612. /// will still have their constructors called.
  1613. /// This function is faster than setSize(ne) if you intend to fill in
  1614. /// data for all elements.
  1615. void setSizeNoInit(exint newsize)
  1616. {
  1617. if (newsize < 0)
  1618. newsize = 0;
  1619. if (newsize == mySize)
  1620. return;
  1621. setCapacityIfNeeded(newsize);
  1622. if (mySize > newsize)
  1623. trivialDestructRange(myData + newsize, mySize - newsize);
  1624. else if (!isPOD()) // newsize > mySize
  1625. trivialConstructRange(myData + mySize, newsize - mySize);
  1626. mySize = newsize;
  1627. }
  1628. /// Decreases, but never expands, to the given maxsize.
  1629. void truncate(exint maxsize)
  1630. {
  1631. if (maxsize >= 0 && size() > maxsize)
  1632. setSize(maxsize);
  1633. }
  1634. /// Resets list to an empty list.
  1635. void clear() {
  1636. // Don't call setSize(0) since that would require a valid default
  1637. // constructor.
  1638. trivialDestructRange(myData, mySize);
  1639. mySize = 0;
  1640. }
  1641. /// Assign array a to this array by copying each of a's elements with
  1642. /// memcpy for POD types, and with copy construction for class types.
  1643. inline UT_Array<T> & operator=(const UT_Array<T> &a);
  1644. /// Replace the contents with those from the initializer_list ilist
  1645. inline UT_Array<T> & operator=(std::initializer_list<T> ilist);
  1646. /// Move the contents of array a to this array.
  1647. inline UT_Array<T> & operator=(UT_Array<T> &&a);
  1648. /// Compare two array and return true if they are equal and false otherwise.
  1649. /// Two elements are checked against each other using operator '==' or
  1650. /// compare() respectively.
  1651. /// NOTE: The capacities of the arrays are not checked when
  1652. /// determining whether they are equal.
  1653. inline bool operator==(const UT_Array<T> &a) const;
  1654. inline bool operator!=(const UT_Array<T> &a) const;
  1655. /// Subscript operator
  1656. /// NOTE: This does NOT do any bounds checking unless paranoid
  1657. /// asserts are enabled.
  1658. T & operator()(exint i)
  1659. {
  1660. UT_IGL_ASSERT_P(i >= 0 && i < mySize);
  1661. return myData[i];
  1662. }
  1663. /// Const subscript operator
  1664. /// NOTE: This does NOT do any bounds checking unless paranoid
  1665. /// asserts are enabled.
  1666. const T & operator()(exint i) const
  1667. {
  1668. UT_IGL_ASSERT_P(i >= 0 && i < mySize);
  1669. return myData[i];
  1670. }
  1671. /// Subscript operator
  1672. /// NOTE: This does NOT do any bounds checking unless paranoid
  1673. /// asserts are enabled.
  1674. T & operator[](exint i)
  1675. {
  1676. UT_IGL_ASSERT_P(i >= 0 && i < mySize);
  1677. return myData[i];
  1678. }
  1679. /// Const subscript operator
  1680. /// NOTE: This does NOT do any bounds checking unless paranoid
  1681. /// asserts are enabled.
  1682. const T & operator[](exint i) const
  1683. {
  1684. UT_IGL_ASSERT_P(i >= 0 && i < mySize);
  1685. return myData[i];
  1686. }
  1687. /// forcedRef(exint) will grow the array if necessary, initializing any
  1688. /// new elements to zero for POD types and default constructing for
  1689. /// class types.
  1690. T & forcedRef(exint i)
  1691. {
  1692. UT_IGL_ASSERT_P(i >= 0);
  1693. if (i >= mySize)
  1694. bumpSize(i+1);
  1695. return myData[i];
  1696. }
  1697. /// forcedGet(exint) does NOT grow the array, and will return default
  1698. /// objects for out of bound array indices.
  1699. T forcedGet(exint i) const
  1700. {
  1701. return (i >= 0 && i < mySize) ? myData[i] : T();
  1702. }
  1703. T & last()
  1704. {
  1705. UT_IGL_ASSERT_P(mySize);
  1706. return myData[mySize-1];
  1707. }
  1708. const T & last() const
  1709. {
  1710. UT_IGL_ASSERT_P(mySize);
  1711. return myData[mySize-1];
  1712. }
  1713. T * getArray() const { return myData; }
  1714. const T * getRawArray() const { return myData; }
  1715. T * array() { return myData; }
  1716. const T * array() const { return myData; }
  1717. T * data() { return myData; }
  1718. const T * data() const { return myData; }
  1719. /// This method allows you to swap in a new raw T array, which must be
  1720. /// the same size as myCapacity. Use caution with this method.
  1721. T * aliasArray(T *newdata)
  1722. { T *data = myData; myData = newdata; return data; }
  1723. template <typename IT, bool FORWARD>
  1724. class base_iterator :
  1725. public std::iterator<std::random_access_iterator_tag, T, exint>
  1726. {
  1727. public:
  1728. typedef IT& reference;
  1729. typedef IT* pointer;
  1730. // Note: When we drop gcc 4.4 support and allow range-based for
  1731. // loops, we should also drop atEnd(), which means we can drop
  1732. // myEnd here.
  1733. base_iterator() : myCurrent(NULL), myEnd(NULL) {}
  1734. // Allow iterator to const_iterator conversion
  1735. template<typename EIT>
  1736. base_iterator(const base_iterator<EIT, FORWARD> &src)
  1737. : myCurrent(src.myCurrent), myEnd(src.myEnd) {}
  1738. pointer operator->() const
  1739. { return FORWARD ? myCurrent : myCurrent - 1; }
  1740. reference operator*() const
  1741. { return FORWARD ? *myCurrent : myCurrent[-1]; }
  1742. reference item() const
  1743. { return FORWARD ? *myCurrent : myCurrent[-1]; }
  1744. reference operator[](exint n) const
  1745. { return FORWARD ? myCurrent[n] : myCurrent[-n - 1]; }
  1746. /// Pre-increment operator
  1747. base_iterator &operator++()
  1748. {
  1749. if (FORWARD) ++myCurrent; else --myCurrent;
  1750. return *this;
  1751. }
  1752. /// Post-increment operator
  1753. base_iterator operator++(int)
  1754. {
  1755. base_iterator tmp = *this;
  1756. if (FORWARD) ++myCurrent; else --myCurrent;
  1757. return tmp;
  1758. }
  1759. /// Pre-decrement operator
  1760. base_iterator &operator--()
  1761. {
  1762. if (FORWARD) --myCurrent; else ++myCurrent;
  1763. return *this;
  1764. }
  1765. /// Post-decrement operator
  1766. base_iterator operator--(int)
  1767. {
  1768. base_iterator tmp = *this;
  1769. if (FORWARD) --myCurrent; else ++myCurrent;
  1770. return tmp;
  1771. }
  1772. base_iterator &operator+=(exint n)
  1773. {
  1774. if (FORWARD)
  1775. myCurrent += n;
  1776. else
  1777. myCurrent -= n;
  1778. return *this;
  1779. }
  1780. base_iterator operator+(exint n) const
  1781. {
  1782. if (FORWARD)
  1783. return base_iterator(myCurrent + n, myEnd);
  1784. else
  1785. return base_iterator(myCurrent - n, myEnd);
  1786. }
  1787. base_iterator &operator-=(exint n)
  1788. { return (*this) += (-n); }
  1789. base_iterator operator-(exint n) const
  1790. { return (*this) + (-n); }
  1791. bool atEnd() const { return myCurrent == myEnd; }
  1792. void advance() { this->operator++(); }
  1793. // Comparators
  1794. template<typename ITR, bool FR>
  1795. bool operator==(const base_iterator<ITR, FR> &r) const
  1796. { return myCurrent == r.myCurrent; }
  1797. template<typename ITR, bool FR>
  1798. bool operator!=(const base_iterator<ITR, FR> &r) const
  1799. { return myCurrent != r.myCurrent; }
  1800. template<typename ITR>
  1801. bool operator<(const base_iterator<ITR, FORWARD> &r) const
  1802. {
  1803. if (FORWARD)
  1804. return myCurrent < r.myCurrent;
  1805. else
  1806. return r.myCurrent < myCurrent;
  1807. }
  1808. template<typename ITR>
  1809. bool operator>(const base_iterator<ITR, FORWARD> &r) const
  1810. {
  1811. if (FORWARD)
  1812. return myCurrent > r.myCurrent;
  1813. else
  1814. return r.myCurrent > myCurrent;
  1815. }
  1816. template<typename ITR>
  1817. bool operator<=(const base_iterator<ITR, FORWARD> &r) const
  1818. {
  1819. if (FORWARD)
  1820. return myCurrent <= r.myCurrent;
  1821. else
  1822. return r.myCurrent <= myCurrent;
  1823. }
  1824. template<typename ITR>
  1825. bool operator>=(const base_iterator<ITR, FORWARD> &r) const
  1826. {
  1827. if (FORWARD)
  1828. return myCurrent >= r.myCurrent;
  1829. else
  1830. return r.myCurrent >= myCurrent;
  1831. }
  1832. // Difference operator for std::distance
  1833. template<typename ITR>
  1834. exint operator-(const base_iterator<ITR, FORWARD> &r) const
  1835. {
  1836. if (FORWARD)
  1837. return exint(myCurrent - r.myCurrent);
  1838. else
  1839. return exint(r.myCurrent - myCurrent);
  1840. }
  1841. protected:
  1842. friend class UT_Array<T>;
  1843. base_iterator(IT *c, IT *e) : myCurrent(c), myEnd(e) {}
  1844. private:
  1845. IT *myCurrent;
  1846. IT *myEnd;
  1847. };
  1848. typedef base_iterator<T, true> iterator;
  1849. typedef base_iterator<const T, true> const_iterator;
  1850. typedef base_iterator<T, false> reverse_iterator;
  1851. typedef base_iterator<const T, false> const_reverse_iterator;
  1852. typedef const_iterator traverser; // For backward compatibility
  1853. /// Begin iterating over the array. The contents of the array may be
  1854. /// modified during the traversal.
  1855. iterator begin()
  1856. {
  1857. return iterator(myData, myData + mySize);
  1858. }
  1859. /// End iterator.
  1860. iterator end()
  1861. {
  1862. return iterator(myData + mySize,
  1863. myData + mySize);
  1864. }
  1865. /// Begin iterating over the array. The array may not be modified during
  1866. /// the traversal.
  1867. const_iterator begin() const
  1868. {
  1869. return const_iterator(myData, myData + mySize);
  1870. }
  1871. /// End const iterator. Consider using it.atEnd() instead.
  1872. const_iterator end() const
  1873. {
  1874. return const_iterator(myData + mySize,
  1875. myData + mySize);
  1876. }
  1877. /// Begin iterating over the array in reverse.
  1878. reverse_iterator rbegin()
  1879. {
  1880. return reverse_iterator(myData + mySize,
  1881. myData);
  1882. }
  1883. /// End reverse iterator.
  1884. reverse_iterator rend()
  1885. {
  1886. return reverse_iterator(myData, myData);
  1887. }
  1888. /// Begin iterating over the array in reverse.
  1889. const_reverse_iterator rbegin() const
  1890. {
  1891. return const_reverse_iterator(myData + mySize,
  1892. myData);
  1893. }
  1894. /// End reverse iterator. Consider using it.atEnd() instead.
  1895. const_reverse_iterator rend() const
  1896. {
  1897. return const_reverse_iterator(myData, myData);
  1898. }
  1899. /// Remove item specified by the reverse_iterator.
  1900. void removeItem(const reverse_iterator &it)
  1901. {
  1902. removeAt(&it.item() - myData);
  1903. }
  1904. /// Very dangerous methods to share arrays.
  1905. /// The array is not aware of the sharing, so ensure you clear
  1906. /// out the array prior a destructor or setCapacity operation.
  1907. void unsafeShareData(UT_Array<T> &src)
  1908. {
  1909. myData = src.myData;
  1910. myCapacity = src.myCapacity;
  1911. mySize = src.mySize;
  1912. }
  1913. void unsafeShareData(T *src, exint srcsize)
  1914. {
  1915. myData = src;
  1916. myCapacity = srcsize;
  1917. mySize = srcsize;
  1918. }
  1919. void unsafeShareData(T *src, exint size, exint capacity)
  1920. {
  1921. myData = src;
  1922. mySize = size;
  1923. myCapacity = capacity;
  1924. }
  1925. void unsafeClearData()
  1926. {
  1927. myData = NULL;
  1928. myCapacity = 0;
  1929. mySize = 0;
  1930. }
  1931. /// Returns true if the data used by the array was allocated on the heap.
  1932. inline bool isHeapBuffer() const
  1933. {
  1934. return (myData != (T *)(((char*)this) + sizeof(*this)));
  1935. }
  1936. inline bool isHeapBuffer(T* data) const
  1937. {
  1938. return (data != (T *)(((char*)this) + sizeof(*this)));
  1939. }
  1940. protected:
  1941. // Check whether T may have a constructor, destructor, or copy
  1942. // constructor. This test is conservative in that some POD types will
  1943. // not be recognized as POD by this function. To mark your type as POD,
  1944. // use the SYS_DECLARE_IS_POD() macro in SYS_TypeDecorate.h.
  1945. static constexpr SYS_FORCE_INLINE bool isPOD()
  1946. {
  1947. return std::is_pod<T>::value;
  1948. }
  1949. /// Implements both append(const T &) and append(T &&) via perfect
  1950. /// forwarding. Unlike the variadic emplace_back(), its argument may be a
  1951. /// reference to another element in the array.
  1952. template <typename S>
  1953. inline exint appendImpl(S &&s);
  1954. /// Similar to appendImpl() but for insertion.
  1955. template <typename S>
  1956. inline exint insertImpl(S &&s, exint index);
  1957. // Construct the given type
  1958. template <typename... S>
  1959. static void construct(T &dst, S&&... s)
  1960. {
  1961. new (&dst) T(std::forward<S>(s)...);
  1962. }
  1963. // Copy construct the given type
  1964. static void copyConstruct(T &dst, const T &src)
  1965. {
  1966. if (isPOD())
  1967. dst = src;
  1968. else
  1969. new (&dst) T(src);
  1970. }
  1971. static void copyConstructRange(T *dst, const T *src, exint n)
  1972. {
  1973. if (isPOD())
  1974. {
  1975. if (n > 0)
  1976. {
  1977. ::memcpy((void *)dst, (const void *)src,
  1978. n * sizeof(T));
  1979. }
  1980. }
  1981. else
  1982. {
  1983. for (exint i = 0; i < n; i++)
  1984. new (&dst[i]) T(src[i]);
  1985. }
  1986. }
  1987. /// Element Constructor
  1988. static void trivialConstruct(T &dst)
  1989. {
  1990. if (!isPOD())
  1991. new (&dst) T();
  1992. else
  1993. memset((void *)&dst, 0, sizeof(T));
  1994. }
  1995. static void trivialConstructRange(T *dst, exint n)
  1996. {
  1997. if (!isPOD())
  1998. {
  1999. for (exint i = 0; i < n; i++)
  2000. new (&dst[i]) T();
  2001. }
  2002. else if (n == 1)
  2003. {
  2004. // Special case for n == 1. If the size parameter
  2005. // passed to memset is known at compile time, this
  2006. // function call will be inlined. This results in
  2007. // much faster performance than a real memset
  2008. // function call which is required in the case
  2009. // below, where n is not known until runtime.
  2010. // This makes calls to append() much faster.
  2011. memset((void *)dst, 0, sizeof(T));
  2012. }
  2013. else
  2014. memset((void *)dst, 0, sizeof(T) * n);
  2015. }
  2016. /// Element Destructor
  2017. static void trivialDestruct(T &dst)
  2018. {
  2019. if (!isPOD())
  2020. dst.~T();
  2021. }
  2022. static void trivialDestructRange(T *dst, exint n)
  2023. {
  2024. if (!isPOD())
  2025. {
  2026. for (exint i = 0; i < n; i++)
  2027. dst[i].~T();
  2028. }
  2029. }
  2030. private:
  2031. /// Pointer to the array of elements of type T
  2032. T *myData;
  2033. /// The number of elements for which we have allocated memory
  2034. exint myCapacity;
  2035. /// The actual number of valid elements in the array
  2036. exint mySize;
  2037. // The guts of the remove() methods.
  2038. inline exint removeAt(exint index);
  2039. inline T * allocateCapacity(exint num_items);
  2040. };
  2041. }}
  2042. #endif // __UT_ARRAY_H_INCLUDED__
  2043. /*
  2044. * Copyright (c) 2018 Side Effects Software Inc.
  2045. *
  2046. * Permission is hereby granted, free of charge, to any person obtaining a copy
  2047. * of this software and associated documentation files (the "Software"), to deal
  2048. * in the Software without restriction, including without limitation the rights
  2049. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  2050. * copies of the Software, and to permit persons to whom the Software is
  2051. * furnished to do so, subject to the following conditions:
  2052. *
  2053. * The above copyright notice and this permission notice shall be included in all
  2054. * copies or substantial portions of the Software.
  2055. *
  2056. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  2057. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  2058. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  2059. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  2060. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  2061. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  2062. * SOFTWARE.
  2063. *
  2064. * COMMENTS:
  2065. * This is meant to be included by UT_Array.h and includes
  2066. * the template implementations needed by external code.
  2067. */
  2068. #pragma once
  2069. #ifndef __UT_ARRAYIMPL_H_INCLUDED__
  2070. #define __UT_ARRAYIMPL_H_INCLUDED__
  2071. #include <algorithm>
  2072. #include <utility>
  2073. #include <stdlib.h>
  2074. #include <string.h>
  2075. namespace igl {
  2076. /// @private
  2077. namespace FastWindingNumber {
  2078. // Implemented in UT_Array.C
  2079. extern void ut_ArrayImplFree(void *p);
  2080. template <typename T>
  2081. inline UT_Array<T>::UT_Array(const UT_Array<T> &a)
  2082. : myCapacity(a.size()), mySize(a.size())
  2083. {
  2084. if (myCapacity)
  2085. {
  2086. myData = allocateCapacity(myCapacity);
  2087. copyConstructRange(myData, a.array(), mySize);
  2088. }
  2089. else
  2090. {
  2091. myData = nullptr;
  2092. }
  2093. }
  2094. template <typename T>
  2095. inline UT_Array<T>::UT_Array(std::initializer_list<T> init)
  2096. : myCapacity(init.size()), mySize(init.size())
  2097. {
  2098. if (myCapacity)
  2099. {
  2100. myData = allocateCapacity(myCapacity);
  2101. copyConstructRange(myData, init.begin(), mySize);
  2102. }
  2103. else
  2104. {
  2105. myData = nullptr;
  2106. }
  2107. }
  2108. template <typename T>
  2109. inline UT_Array<T>::UT_Array(UT_Array<T> &&a) noexcept
  2110. {
  2111. if (!a.isHeapBuffer())
  2112. {
  2113. myData = nullptr;
  2114. myCapacity = 0;
  2115. mySize = 0;
  2116. operator=(std::move(a));
  2117. return;
  2118. }
  2119. myCapacity = a.myCapacity;
  2120. mySize = a.mySize;
  2121. myData = a.myData;
  2122. a.myCapacity = a.mySize = 0;
  2123. a.myData = nullptr;
  2124. }
  2125. template <typename T>
  2126. inline UT_Array<T>::~UT_Array()
  2127. {
  2128. // NOTE: We call setCapacity to ensure that we call trivialDestructRange,
  2129. // then call free on myData.
  2130. setCapacity(0);
  2131. }
  2132. template <typename T>
  2133. inline T *
  2134. UT_Array<T>::allocateCapacity(exint capacity)
  2135. {
  2136. T *data = (T *)malloc(capacity * sizeof(T));
  2137. // Avoid degenerate case if we happen to be aliased the wrong way
  2138. if (!isHeapBuffer(data))
  2139. {
  2140. T *prev = data;
  2141. data = (T *)malloc(capacity * sizeof(T));
  2142. ut_ArrayImplFree(prev);
  2143. }
  2144. return data;
  2145. }
  2146. template <typename T>
  2147. inline void
  2148. UT_Array<T>::swap( UT_Array<T> &other )
  2149. {
  2150. std::swap( myData, other.myData );
  2151. std::swap( myCapacity, other.myCapacity );
  2152. std::swap( mySize, other.mySize );
  2153. }
  2154. template <typename T>
  2155. inline exint
  2156. UT_Array<T>::insert(exint index)
  2157. {
  2158. if (index >= mySize)
  2159. {
  2160. bumpCapacity(index + 1);
  2161. trivialConstructRange(myData + mySize, index - mySize + 1);
  2162. mySize = index+1;
  2163. return index;
  2164. }
  2165. bumpCapacity(mySize + 1);
  2166. UT_IGL_ASSERT_P(index >= 0);
  2167. ::memmove((void *)&myData[index+1], (void *)&myData[index],
  2168. ((mySize-index)*sizeof(T)));
  2169. trivialConstruct(myData[index]);
  2170. mySize++;
  2171. return index;
  2172. }
  2173. template <typename T>
  2174. template <typename S>
  2175. inline exint
  2176. UT_Array<T>::appendImpl(S &&s)
  2177. {
  2178. if (mySize == myCapacity)
  2179. {
  2180. exint idx = safeIndex(s);
  2181. // NOTE: UTbumpAlloc always returns a strictly larger value.
  2182. setCapacity(UTbumpAlloc(myCapacity));
  2183. if (idx >= 0)
  2184. construct(myData[mySize], std::forward<S>(myData[idx]));
  2185. else
  2186. construct(myData[mySize], std::forward<S>(s));
  2187. }
  2188. else
  2189. {
  2190. construct(myData[mySize], std::forward<S>(s));
  2191. }
  2192. return mySize++;
  2193. }
  2194. template <typename T>
  2195. template <typename... S>
  2196. inline exint
  2197. UT_Array<T>::emplace_back(S&&... s)
  2198. {
  2199. if (mySize == myCapacity)
  2200. setCapacity(UTbumpAlloc(myCapacity));
  2201. construct(myData[mySize], std::forward<S>(s)...);
  2202. return mySize++;
  2203. }
  2204. template <typename T>
  2205. inline void
  2206. UT_Array<T>::append(const T *pt, exint count)
  2207. {
  2208. bumpCapacity(mySize + count);
  2209. copyConstructRange(myData + mySize, pt, count);
  2210. mySize += count;
  2211. }
  2212. template <typename T>
  2213. inline void
  2214. UT_Array<T>::appendMultiple(const T &t, exint count)
  2215. {
  2216. UT_IGL_ASSERT_P(count >= 0);
  2217. if (count <= 0)
  2218. return;
  2219. if (mySize + count >= myCapacity)
  2220. {
  2221. exint tidx = safeIndex(t);
  2222. bumpCapacity(mySize + count);
  2223. for (exint i = 0; i < count; i++)
  2224. copyConstruct(myData[mySize+i], tidx >= 0 ? myData[tidx] : t);
  2225. }
  2226. else
  2227. {
  2228. for (exint i = 0; i < count; i++)
  2229. copyConstruct(myData[mySize+i], t);
  2230. }
  2231. mySize += count;
  2232. }
  2233. template <typename T>
  2234. inline exint
  2235. UT_Array<T>::concat(const UT_Array<T> &a)
  2236. {
  2237. bumpCapacity(mySize + a.mySize);
  2238. copyConstructRange(myData + mySize, a.myData, a.mySize);
  2239. mySize += a.mySize;
  2240. return mySize;
  2241. }
  2242. template <typename T>
  2243. inline exint
  2244. UT_Array<T>::multipleInsert(exint beg_index, exint count)
  2245. {
  2246. exint end_index = beg_index + count;
  2247. if (beg_index >= mySize)
  2248. {
  2249. bumpCapacity(end_index);
  2250. trivialConstructRange(myData + mySize, end_index - mySize);
  2251. mySize = end_index;
  2252. return beg_index;
  2253. }
  2254. bumpCapacity(mySize+count);
  2255. ::memmove((void *)&myData[end_index], (void *)&myData[beg_index],
  2256. ((mySize-beg_index)*sizeof(T)));
  2257. mySize += count;
  2258. trivialConstructRange(myData + beg_index, count);
  2259. return beg_index;
  2260. }
  2261. template <typename T>
  2262. template <typename S>
  2263. inline exint
  2264. UT_Array<T>::insertImpl(S &&s, exint index)
  2265. {
  2266. if (index == mySize)
  2267. {
  2268. // This case avoids an extraneous call to trivialConstructRange()
  2269. // which the compiler may not optimize out.
  2270. (void) appendImpl(std::forward<S>(s));
  2271. }
  2272. else if (index > mySize)
  2273. {
  2274. exint src_i = safeIndex(s);
  2275. bumpCapacity(index + 1);
  2276. trivialConstructRange(myData + mySize, index - mySize);
  2277. if (src_i >= 0)
  2278. construct(myData[index], std::forward<S>(myData[src_i]));
  2279. else
  2280. construct(myData[index], std::forward<S>(s));
  2281. mySize = index + 1;
  2282. }
  2283. else // (index < mySize)
  2284. {
  2285. exint src_i = safeIndex(s);
  2286. bumpCapacity(mySize + 1);
  2287. ::memmove((void *)&myData[index+1], (void *)&myData[index],
  2288. ((mySize-index)*sizeof(T)));
  2289. if (src_i >= index)
  2290. ++src_i;
  2291. if (src_i >= 0)
  2292. construct(myData[index], std::forward<S>(myData[src_i]));
  2293. else
  2294. construct(myData[index], std::forward<S>(s));
  2295. ++mySize;
  2296. }
  2297. return index;
  2298. }
  2299. template <typename T>
  2300. inline exint
  2301. UT_Array<T>::removeAt(exint idx)
  2302. {
  2303. trivialDestruct(myData[idx]);
  2304. if (idx != --mySize)
  2305. {
  2306. ::memmove((void *)&myData[idx], (void *)&myData[idx+1],
  2307. ((mySize-idx)*sizeof(T)));
  2308. }
  2309. return idx;
  2310. }
  2311. template <typename T>
  2312. inline void
  2313. UT_Array<T>::removeRange(exint begin_i, exint end_i)
  2314. {
  2315. UT_IGL_ASSERT(begin_i <= end_i);
  2316. UT_IGL_ASSERT(end_i <= size());
  2317. if (end_i < size())
  2318. {
  2319. trivialDestructRange(myData + begin_i, end_i - begin_i);
  2320. ::memmove((void *)&myData[begin_i], (void *)&myData[end_i],
  2321. (mySize - end_i)*sizeof(T));
  2322. }
  2323. setSize(mySize - (end_i - begin_i));
  2324. }
  2325. template <typename T>
  2326. inline void
  2327. UT_Array<T>::extractRange(exint begin_i, exint end_i, UT_Array<T>& dest)
  2328. {
  2329. UT_IGL_ASSERT_P(begin_i >= 0);
  2330. UT_IGL_ASSERT_P(begin_i <= end_i);
  2331. UT_IGL_ASSERT_P(end_i <= size());
  2332. UT_IGL_ASSERT(this != &dest);
  2333. exint nelements = end_i - begin_i;
  2334. // grow the raw array if necessary.
  2335. dest.setCapacityIfNeeded(nelements);
  2336. ::memmove((void*)dest.myData, (void*)&myData[begin_i],
  2337. nelements * sizeof(T));
  2338. dest.mySize = nelements;
  2339. // we just asserted this was true, but just in case
  2340. if (this != &dest)
  2341. {
  2342. if (end_i < size())
  2343. {
  2344. ::memmove((void*)&myData[begin_i], (void*)&myData[end_i],
  2345. (mySize - end_i) * sizeof(T));
  2346. }
  2347. setSize(mySize - nelements);
  2348. }
  2349. }
  2350. template <typename T>
  2351. inline void
  2352. UT_Array<T>::move(exint srcIdx, exint destIdx, exint howMany)
  2353. {
  2354. // Make sure all the parameters are valid.
  2355. if( srcIdx < 0 )
  2356. srcIdx = 0;
  2357. if( destIdx < 0 )
  2358. destIdx = 0;
  2359. // If we are told to move a set of elements that would extend beyond the
  2360. // end of the current array, trim the group.
  2361. if( srcIdx + howMany > size() )
  2362. howMany = size() - srcIdx;
  2363. // If the destIdx would have us move the source beyond the end of the
  2364. // current array, move the destIdx back.
  2365. if( destIdx + howMany > size() )
  2366. destIdx = size() - howMany;
  2367. if( srcIdx != destIdx && howMany > 0 )
  2368. {
  2369. void **tmp = 0;
  2370. exint savelen;
  2371. savelen = SYSabs(srcIdx - destIdx);
  2372. tmp = (void **)::malloc(savelen*sizeof(T));
  2373. if( srcIdx > destIdx && howMany > 0 )
  2374. {
  2375. // We're moving the group backwards. Save all the stuff that
  2376. // we would overwrite, plus everything beyond that to the
  2377. // start of the source group. Then move the source group, then
  2378. // tack the saved data onto the end of the moved group.
  2379. ::memcpy(tmp, (void *)&myData[destIdx], (savelen*sizeof(T)));
  2380. ::memmove((void *)&myData[destIdx], (void *)&myData[srcIdx],
  2381. (howMany*sizeof(T)));
  2382. ::memcpy((void *)&myData[destIdx+howMany], tmp, (savelen*sizeof(T)));
  2383. }
  2384. if( srcIdx < destIdx && howMany > 0 )
  2385. {
  2386. // We're moving the group forwards. Save from the end of the
  2387. // group being moved to the end of the where the destination
  2388. // group will end up. Then copy the source to the destination.
  2389. // Then move back up to the original source location and drop
  2390. // in our saved data.
  2391. ::memcpy(tmp, (void *)&myData[srcIdx+howMany], (savelen*sizeof(T)));
  2392. ::memmove((void *)&myData[destIdx], (void *)&myData[srcIdx],
  2393. (howMany*sizeof(T)));
  2394. ::memcpy((void *)&myData[srcIdx], tmp, (savelen*sizeof(T)));
  2395. }
  2396. ::free(tmp);
  2397. }
  2398. }
  2399. template <typename T>
  2400. template <typename IsEqual>
  2401. inline exint
  2402. UT_Array<T>::removeIf(IsEqual is_equal)
  2403. {
  2404. // Move dst to the first element to remove.
  2405. exint dst;
  2406. for (dst = 0; dst < mySize; dst++)
  2407. {
  2408. if (is_equal(myData[dst]))
  2409. break;
  2410. }
  2411. // Now start looking at all the elements past the first one to remove.
  2412. for (exint idx = dst+1; idx < mySize; idx++)
  2413. {
  2414. if (!is_equal(myData[idx]))
  2415. {
  2416. UT_IGL_ASSERT(idx != dst);
  2417. myData[dst] = myData[idx];
  2418. dst++;
  2419. }
  2420. // On match, ignore.
  2421. }
  2422. // New size
  2423. mySize = dst;
  2424. return mySize;
  2425. }
  2426. template <typename T>
  2427. inline void
  2428. UT_Array<T>::cycle(exint howMany)
  2429. {
  2430. char *tempPtr;
  2431. exint numShift; // The number of items we shift
  2432. exint remaining; // mySize - numShift
  2433. if (howMany == 0 || mySize < 1) return;
  2434. numShift = howMany % (exint)mySize;
  2435. if (numShift < 0) numShift += mySize;
  2436. remaining = mySize - numShift;
  2437. tempPtr = new char[numShift*sizeof(T)];
  2438. ::memmove(tempPtr, (void *)&myData[remaining], (numShift * sizeof(T)));
  2439. ::memmove((void *)&myData[numShift], (void *)&myData[0], (remaining * sizeof(T)));
  2440. ::memmove((void *)&myData[0], tempPtr, (numShift * sizeof(T)));
  2441. delete [] tempPtr;
  2442. }
  2443. template <typename T>
  2444. inline void
  2445. UT_Array<T>::constant(const T &value)
  2446. {
  2447. for (exint i = 0; i < mySize; i++)
  2448. {
  2449. myData[i] = value;
  2450. }
  2451. }
  2452. template <typename T>
  2453. inline void
  2454. UT_Array<T>::zero()
  2455. {
  2456. if (isPOD())
  2457. ::memset((void *)myData, 0, mySize*sizeof(T));
  2458. else
  2459. trivialConstructRange(myData, mySize);
  2460. }
  2461. template <typename T>
  2462. inline void
  2463. UT_Array<T>::setCapacity(exint capacity)
  2464. {
  2465. // Do nothing when new capacity is the same as the current
  2466. if (capacity == myCapacity)
  2467. return;
  2468. // Special case for non-heap buffers
  2469. if (!isHeapBuffer())
  2470. {
  2471. if (capacity < mySize)
  2472. {
  2473. // Destroy the extra elements without changing myCapacity
  2474. trivialDestructRange(myData + capacity, mySize - capacity);
  2475. mySize = capacity;
  2476. }
  2477. else if (capacity > myCapacity)
  2478. {
  2479. T *prev = myData;
  2480. myData = (T *)malloc(sizeof(T) * capacity);
  2481. // myData is safe because we're already a stack buffer
  2482. UT_IGL_ASSERT_P(isHeapBuffer());
  2483. if (mySize > 0)
  2484. memcpy((void *)myData, (void *)prev, sizeof(T) * mySize);
  2485. myCapacity = capacity;
  2486. }
  2487. else
  2488. {
  2489. // Keep myCapacity unchanged in this case
  2490. UT_IGL_ASSERT_P(capacity >= mySize && capacity <= myCapacity);
  2491. }
  2492. return;
  2493. }
  2494. if (capacity == 0)
  2495. {
  2496. if (myData)
  2497. {
  2498. trivialDestructRange(myData, mySize);
  2499. free(myData);
  2500. }
  2501. myData = 0;
  2502. myCapacity = 0;
  2503. mySize = 0;
  2504. return;
  2505. }
  2506. if (capacity < mySize)
  2507. {
  2508. trivialDestructRange(myData + capacity, mySize - capacity);
  2509. mySize = capacity;
  2510. }
  2511. if (myData)
  2512. myData = (T *)realloc(myData, capacity*sizeof(T));
  2513. else
  2514. myData = (T *)malloc(sizeof(T) * capacity);
  2515. // Avoid degenerate case if we happen to be aliased the wrong way
  2516. if (!isHeapBuffer())
  2517. {
  2518. T *prev = myData;
  2519. myData = (T *)malloc(sizeof(T) * capacity);
  2520. if (mySize > 0)
  2521. memcpy((void *)myData, (void *)prev, sizeof(T) * mySize);
  2522. ut_ArrayImplFree(prev);
  2523. }
  2524. myCapacity = capacity;
  2525. UT_IGL_ASSERT(myData);
  2526. }
  2527. template <typename T>
  2528. inline UT_Array<T> &
  2529. UT_Array<T>::operator=(const UT_Array<T> &a)
  2530. {
  2531. if (this == &a)
  2532. return *this;
  2533. // Grow the raw array if necessary.
  2534. setCapacityIfNeeded(a.size());
  2535. // Make sure destructors and constructors are called on all elements
  2536. // being removed/added.
  2537. trivialDestructRange(myData, mySize);
  2538. copyConstructRange(myData, a.myData, a.size());
  2539. mySize = a.size();
  2540. return *this;
  2541. }
  2542. template <typename T>
  2543. inline UT_Array<T> &
  2544. UT_Array<T>::operator=(std::initializer_list<T> a)
  2545. {
  2546. const exint new_size = a.size();
  2547. // Grow the raw array if necessary.
  2548. setCapacityIfNeeded(new_size);
  2549. // Make sure destructors and constructors are called on all elements
  2550. // being removed/added.
  2551. trivialDestructRange(myData, mySize);
  2552. copyConstructRange(myData, a.begin(), new_size);
  2553. mySize = new_size;
  2554. return *this;
  2555. }
  2556. template <typename T>
  2557. inline UT_Array<T> &
  2558. UT_Array<T>::operator=(UT_Array<T> &&a)
  2559. {
  2560. if (!a.isHeapBuffer())
  2561. {
  2562. // Cannot steal from non-heap buffers
  2563. clear();
  2564. const exint n = a.size();
  2565. setCapacityIfNeeded(n);
  2566. if (isPOD())
  2567. {
  2568. if (n > 0)
  2569. memcpy(myData, a.myData, n * sizeof(T));
  2570. }
  2571. else
  2572. {
  2573. for (exint i = 0; i < n; ++i)
  2574. new (&myData[i]) T(std::move(a.myData[i]));
  2575. }
  2576. mySize = a.mySize;
  2577. a.mySize = 0;
  2578. return *this;
  2579. }
  2580. // else, just steal even if we're a small buffer
  2581. // Destroy all the elements we're currently holding.
  2582. if (myData)
  2583. {
  2584. trivialDestructRange(myData, mySize);
  2585. if (isHeapBuffer())
  2586. ::free(myData);
  2587. }
  2588. // Move the contents of the other array to us and empty the other container
  2589. // so that it destructs cleanly.
  2590. myCapacity = a.myCapacity;
  2591. mySize = a.mySize;
  2592. myData = a.myData;
  2593. a.myCapacity = a.mySize = 0;
  2594. a.myData = nullptr;
  2595. return *this;
  2596. }
  2597. template <typename T>
  2598. inline bool
  2599. UT_Array<T>::operator==(const UT_Array<T> &a) const
  2600. {
  2601. if (this == &a) return true;
  2602. if (mySize != a.size()) return false;
  2603. for (exint i = 0; i < mySize; i++)
  2604. if (!(myData[i] == a(i))) return false;
  2605. return true;
  2606. }
  2607. template <typename T>
  2608. inline bool
  2609. UT_Array<T>::operator!=(const UT_Array<T> &a) const
  2610. {
  2611. return (!operator==(a));
  2612. }
  2613. }}
  2614. #endif // __UT_ARRAYIMPL_H_INCLUDED__
  2615. /*
  2616. * Copyright (c) 2018 Side Effects Software Inc.
  2617. *
  2618. * Permission is hereby granted, free of charge, to any person obtaining a copy
  2619. * of this software and associated documentation files (the "Software"), to deal
  2620. * in the Software without restriction, including without limitation the rights
  2621. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  2622. * copies of the Software, and to permit persons to whom the Software is
  2623. * furnished to do so, subject to the following conditions:
  2624. *
  2625. * The above copyright notice and this permission notice shall be included in all
  2626. * copies or substantial portions of the Software.
  2627. *
  2628. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  2629. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  2630. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  2631. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  2632. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  2633. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  2634. * SOFTWARE.
  2635. *
  2636. * COMMENTS:
  2637. * Special case for arrays that are usually small,
  2638. * to avoid a heap allocation when the array really is small.
  2639. */
  2640. #pragma once
  2641. #ifndef __UT_SMALLARRAY_H_INCLUDED__
  2642. #define __UT_SMALLARRAY_H_INCLUDED__
  2643. #include <utility>
  2644. #include <stddef.h>
  2645. namespace igl {
  2646. /// @private
  2647. namespace FastWindingNumber {
  2648. /// An array class with the small buffer optimization, making it ideal for
  2649. /// cases when you know it will only contain a few elements at the expense of
  2650. /// increasing the object size by MAX_BYTES (subject to alignment).
  2651. template <typename T, size_t MAX_BYTES = 64>
  2652. class UT_SmallArray : public UT_Array<T>
  2653. {
  2654. // As many elements that fit into MAX_BYTES with 1 item minimum
  2655. enum { MAX_ELEMS = MAX_BYTES/sizeof(T) < 1 ? 1 : MAX_BYTES/sizeof(T) };
  2656. public:
  2657. // gcc falsely warns about our use of offsetof() on non-POD types. We can't
  2658. // easily suppress this because it has to be done in the caller at
  2659. // instantiation time. Instead, punt to a runtime check instead.
  2660. #if defined(__clang__) || defined(_MSC_VER)
  2661. #define UT_SMALL_ARRAY_SIZE_IGL_ASSERT() \
  2662. using ThisT = UT_SmallArray<T,MAX_BYTES>; \
  2663. static_assert(offsetof(ThisT, myBuffer) == sizeof(UT_Array<T>), \
  2664. "In order for UT_Array's checks for whether it needs to free the buffer to work, " \
  2665. "the buffer must be exactly following the base class memory.")
  2666. #else
  2667. #define UT_SMALL_ARRAY_SIZE_IGL_ASSERT() \
  2668. UT_IGL_ASSERT_P(!UT_Array<T>::isHeapBuffer());
  2669. #endif
  2670. /// Default construction
  2671. UT_SmallArray()
  2672. : UT_Array<T>(/*capacity*/0)
  2673. {
  2674. UT_Array<T>::unsafeShareData((T*)myBuffer, 0, MAX_ELEMS);
  2675. UT_SMALL_ARRAY_SIZE_IGL_ASSERT();
  2676. }
  2677. /// Copy constructor
  2678. /// @{
  2679. explicit UT_SmallArray(const UT_Array<T> &copy)
  2680. : UT_Array<T>(/*capacity*/0)
  2681. {
  2682. UT_Array<T>::unsafeShareData((T*)myBuffer, 0, MAX_ELEMS);
  2683. UT_SMALL_ARRAY_SIZE_IGL_ASSERT();
  2684. UT_Array<T>::operator=(copy);
  2685. }
  2686. explicit UT_SmallArray(const UT_SmallArray<T,MAX_BYTES> &copy)
  2687. : UT_Array<T>(/*capacity*/0)
  2688. {
  2689. UT_Array<T>::unsafeShareData((T*)myBuffer, 0, MAX_ELEMS);
  2690. UT_SMALL_ARRAY_SIZE_IGL_ASSERT();
  2691. UT_Array<T>::operator=(copy);
  2692. }
  2693. /// @}
  2694. /// Move constructor
  2695. /// @{
  2696. UT_SmallArray(UT_Array<T> &&movable) noexcept
  2697. {
  2698. UT_Array<T>::unsafeShareData((T*)myBuffer, 0, MAX_ELEMS);
  2699. UT_SMALL_ARRAY_SIZE_IGL_ASSERT();
  2700. UT_Array<T>::operator=(std::move(movable));
  2701. }
  2702. UT_SmallArray(UT_SmallArray<T,MAX_BYTES> &&movable) noexcept
  2703. {
  2704. UT_Array<T>::unsafeShareData((T*)myBuffer, 0, MAX_ELEMS);
  2705. UT_SMALL_ARRAY_SIZE_IGL_ASSERT();
  2706. UT_Array<T>::operator=(std::move(movable));
  2707. }
  2708. /// @}
  2709. /// Initializer list constructor
  2710. explicit UT_SmallArray(std::initializer_list<T> init)
  2711. {
  2712. UT_Array<T>::unsafeShareData((T*)myBuffer, 0, MAX_ELEMS);
  2713. UT_SMALL_ARRAY_SIZE_IGL_ASSERT();
  2714. UT_Array<T>::operator=(init);
  2715. }
  2716. #undef UT_SMALL_ARRAY_SIZE_IGL_ASSERT
  2717. /// Assignment operator
  2718. /// @{
  2719. UT_SmallArray<T,MAX_BYTES> &
  2720. operator=(const UT_SmallArray<T,MAX_BYTES> &copy)
  2721. {
  2722. UT_Array<T>::operator=(copy);
  2723. return *this;
  2724. }
  2725. UT_SmallArray<T,MAX_BYTES> &
  2726. operator=(const UT_Array<T> &copy)
  2727. {
  2728. UT_Array<T>::operator=(copy);
  2729. return *this;
  2730. }
  2731. /// @}
  2732. /// Move operator
  2733. /// @{
  2734. UT_SmallArray<T,MAX_BYTES> &
  2735. operator=(UT_SmallArray<T,MAX_BYTES> &&movable)
  2736. {
  2737. UT_Array<T>::operator=(std::move(movable));
  2738. return *this;
  2739. }
  2740. UT_SmallArray<T,MAX_BYTES> &
  2741. operator=(UT_Array<T> &&movable)
  2742. {
  2743. UT_Array<T>::operator=(std::move(movable));
  2744. return *this;
  2745. }
  2746. /// @}
  2747. UT_SmallArray<T,MAX_BYTES> &
  2748. operator=(std::initializer_list<T> src)
  2749. {
  2750. UT_Array<T>::operator=(src);
  2751. return *this;
  2752. }
  2753. private:
  2754. alignas(T) char myBuffer[MAX_ELEMS*sizeof(T)];
  2755. };
  2756. }}
  2757. #endif // __UT_SMALLARRAY_H_INCLUDED__
  2758. /*
  2759. * Copyright (c) 2018 Side Effects Software Inc.
  2760. *
  2761. * Permission is hereby granted, free of charge, to any person obtaining a copy
  2762. * of this software and associated documentation files (the "Software"), to deal
  2763. * in the Software without restriction, including without limitation the rights
  2764. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  2765. * copies of the Software, and to permit persons to whom the Software is
  2766. * furnished to do so, subject to the following conditions:
  2767. *
  2768. * The above copyright notice and this permission notice shall be included in all
  2769. * copies or substantial portions of the Software.
  2770. *
  2771. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  2772. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  2773. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  2774. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  2775. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  2776. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  2777. * SOFTWARE.
  2778. *
  2779. * COMMENTS:
  2780. * A vector class templated on its size and data type.
  2781. */
  2782. #pragma once
  2783. #ifndef __UT_FixedVector__
  2784. #define __UT_FixedVector__
  2785. namespace igl {
  2786. /// @private
  2787. namespace FastWindingNumber {
  2788. template<typename T,exint SIZE,bool INSTANTIATED=false>
  2789. class UT_FixedVector
  2790. {
  2791. public:
  2792. typedef UT_FixedVector<T,SIZE,INSTANTIATED> ThisType;
  2793. typedef T value_type;
  2794. typedef T theType;
  2795. static const exint theSize = SIZE;
  2796. T vec[SIZE];
  2797. SYS_FORCE_INLINE UT_FixedVector() = default;
  2798. /// Initializes every component to the same value
  2799. SYS_FORCE_INLINE explicit UT_FixedVector(T that) noexcept
  2800. {
  2801. for (exint i = 0; i < SIZE; ++i)
  2802. vec[i] = that;
  2803. }
  2804. SYS_FORCE_INLINE UT_FixedVector(const ThisType &that) = default;
  2805. SYS_FORCE_INLINE UT_FixedVector(ThisType &&that) = default;
  2806. /// Converts vector of S into vector of T,
  2807. /// or just copies if same type.
  2808. template<typename S,bool S_INSTANTIATED>
  2809. SYS_FORCE_INLINE UT_FixedVector(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) noexcept
  2810. {
  2811. for (exint i = 0; i < SIZE; ++i)
  2812. vec[i] = that[i];
  2813. }
  2814. template<typename S>
  2815. SYS_FORCE_INLINE UT_FixedVector(const S that[SIZE]) noexcept
  2816. {
  2817. for (exint i = 0; i < SIZE; ++i)
  2818. vec[i] = that[i];
  2819. }
  2820. SYS_FORCE_INLINE const T &operator[](exint i) const noexcept
  2821. {
  2822. UT_IGL_ASSERT_P(i >= 0 && i < SIZE);
  2823. return vec[i];
  2824. }
  2825. SYS_FORCE_INLINE T &operator[](exint i) noexcept
  2826. {
  2827. UT_IGL_ASSERT_P(i >= 0 && i < SIZE);
  2828. return vec[i];
  2829. }
  2830. SYS_FORCE_INLINE constexpr const T *data() const noexcept
  2831. {
  2832. return vec;
  2833. }
  2834. SYS_FORCE_INLINE T *data() noexcept
  2835. {
  2836. return vec;
  2837. }
  2838. SYS_FORCE_INLINE ThisType &operator=(const ThisType &that) = default;
  2839. SYS_FORCE_INLINE ThisType &operator=(ThisType &&that) = default;
  2840. template <typename S,bool S_INSTANTIATED>
  2841. SYS_FORCE_INLINE ThisType &operator=(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) noexcept
  2842. {
  2843. for (exint i = 0; i < SIZE; ++i)
  2844. vec[i] = that[i];
  2845. return *this;
  2846. }
  2847. SYS_FORCE_INLINE const ThisType &operator=(T that) noexcept
  2848. {
  2849. for (exint i = 0; i < SIZE; ++i)
  2850. vec[i] = that;
  2851. return *this;
  2852. }
  2853. template<typename S,bool S_INSTANTIATED>
  2854. SYS_FORCE_INLINE void operator+=(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that)
  2855. {
  2856. for (exint i = 0; i < SIZE; ++i)
  2857. vec[i] += that[i];
  2858. }
  2859. SYS_FORCE_INLINE void operator+=(T that)
  2860. {
  2861. for (exint i = 0; i < SIZE; ++i)
  2862. vec[i] += that;
  2863. }
  2864. template<typename S,bool S_INSTANTIATED>
  2865. SYS_FORCE_INLINE auto operator+(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const -> UT_FixedVector<decltype(vec[0]+that[0]),SIZE>
  2866. {
  2867. using Type = decltype(vec[0]+that[0]);
  2868. UT_FixedVector<Type,SIZE> result;
  2869. for (exint i = 0; i < SIZE; ++i)
  2870. result[i] = vec[i] + that[i];
  2871. return result;
  2872. }
  2873. template<typename S,bool S_INSTANTIATED>
  2874. SYS_FORCE_INLINE void operator-=(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that)
  2875. {
  2876. for (exint i = 0; i < SIZE; ++i)
  2877. vec[i] -= that[i];
  2878. }
  2879. SYS_FORCE_INLINE void operator-=(T that)
  2880. {
  2881. for (exint i = 0; i < SIZE; ++i)
  2882. vec[i] -= that;
  2883. }
  2884. template<typename S,bool S_INSTANTIATED>
  2885. SYS_FORCE_INLINE auto operator-(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const -> UT_FixedVector<decltype(vec[0]-that[0]),SIZE>
  2886. {
  2887. using Type = decltype(vec[0]-that[0]);
  2888. UT_FixedVector<Type,SIZE> result;
  2889. for (exint i = 0; i < SIZE; ++i)
  2890. result[i] = vec[i] - that[i];
  2891. return result;
  2892. }
  2893. template<typename S,bool S_INSTANTIATED>
  2894. SYS_FORCE_INLINE void operator*=(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that)
  2895. {
  2896. for (exint i = 0; i < SIZE; ++i)
  2897. vec[i] *= that[i];
  2898. }
  2899. template<typename S,bool S_INSTANTIATED>
  2900. SYS_FORCE_INLINE auto operator*(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const -> UT_FixedVector<decltype(vec[0]*that[0]),SIZE>
  2901. {
  2902. using Type = decltype(vec[0]*that[0]);
  2903. UT_FixedVector<Type,SIZE> result;
  2904. for (exint i = 0; i < SIZE; ++i)
  2905. result[i] = vec[i] * that[i];
  2906. return result;
  2907. }
  2908. SYS_FORCE_INLINE void operator*=(T that)
  2909. {
  2910. for (exint i = 0; i < SIZE; ++i)
  2911. vec[i] *= that;
  2912. }
  2913. SYS_FORCE_INLINE UT_FixedVector<T,SIZE> operator*(T that) const
  2914. {
  2915. UT_FixedVector<T,SIZE> result;
  2916. for (exint i = 0; i < SIZE; ++i)
  2917. result[i] = vec[i] * that;
  2918. return result;
  2919. }
  2920. template<typename S,bool S_INSTANTIATED>
  2921. SYS_FORCE_INLINE void operator/=(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that)
  2922. {
  2923. for (exint i = 0; i < SIZE; ++i)
  2924. vec[i] /= that[i];
  2925. }
  2926. template<typename S,bool S_INSTANTIATED>
  2927. SYS_FORCE_INLINE auto operator/(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const -> UT_FixedVector<decltype(vec[0]/that[0]),SIZE>
  2928. {
  2929. using Type = decltype(vec[0]/that[0]);
  2930. UT_FixedVector<Type,SIZE> result;
  2931. for (exint i = 0; i < SIZE; ++i)
  2932. result[i] = vec[i] / that[i];
  2933. return result;
  2934. }
  2935. SYS_FORCE_INLINE void operator/=(T that)
  2936. {
  2937. if (std::is_integral<T>::value)
  2938. {
  2939. for (exint i = 0; i < SIZE; ++i)
  2940. vec[i] /= that;
  2941. }
  2942. else
  2943. {
  2944. that = 1/that;
  2945. for (exint i = 0; i < SIZE; ++i)
  2946. vec[i] *= that;
  2947. }
  2948. }
  2949. SYS_FORCE_INLINE UT_FixedVector<T,SIZE> operator/(T that) const
  2950. {
  2951. UT_FixedVector<T,SIZE> result;
  2952. if (std::is_integral<T>::value)
  2953. {
  2954. for (exint i = 0; i < SIZE; ++i)
  2955. result[i] = vec[i] / that;
  2956. }
  2957. else
  2958. {
  2959. that = 1/that;
  2960. for (exint i = 0; i < SIZE; ++i)
  2961. result[i] = vec[i] * that;
  2962. }
  2963. return result;
  2964. }
  2965. SYS_FORCE_INLINE void negate()
  2966. {
  2967. for (exint i = 0; i < SIZE; ++i)
  2968. vec[i] = -vec[i];
  2969. }
  2970. SYS_FORCE_INLINE UT_FixedVector<T,SIZE> operator-() const
  2971. {
  2972. UT_FixedVector<T,SIZE> result;
  2973. for (exint i = 0; i < SIZE; ++i)
  2974. result[i] = -vec[i];
  2975. return result;
  2976. }
  2977. template<typename S,bool S_INSTANTIATED>
  2978. SYS_FORCE_INLINE bool operator==(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const noexcept
  2979. {
  2980. for (exint i = 0; i < SIZE; ++i)
  2981. {
  2982. if (vec[i] != T(that[i]))
  2983. return false;
  2984. }
  2985. return true;
  2986. }
  2987. template<typename S,bool S_INSTANTIATED>
  2988. SYS_FORCE_INLINE bool operator!=(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const noexcept
  2989. {
  2990. return !(*this==that);
  2991. }
  2992. SYS_FORCE_INLINE bool isZero() const noexcept
  2993. {
  2994. for (exint i = 0; i < SIZE; ++i)
  2995. {
  2996. if (vec[i] != T(0))
  2997. return false;
  2998. }
  2999. return true;
  3000. }
  3001. SYS_FORCE_INLINE T maxComponent() const
  3002. {
  3003. T v = vec[0];
  3004. for (exint i = 1; i < SIZE; ++i)
  3005. v = (vec[i] > v) ? vec[i] : v;
  3006. return v;
  3007. }
  3008. SYS_FORCE_INLINE T minComponent() const
  3009. {
  3010. T v = vec[0];
  3011. for (exint i = 1; i < SIZE; ++i)
  3012. v = (vec[i] < v) ? vec[i] : v;
  3013. return v;
  3014. }
  3015. SYS_FORCE_INLINE T avgComponent() const
  3016. {
  3017. T v = vec[0];
  3018. for (exint i = 1; i < SIZE; ++i)
  3019. v += vec[i];
  3020. return v / SIZE;
  3021. }
  3022. SYS_FORCE_INLINE T length2() const noexcept
  3023. {
  3024. T a0(vec[0]);
  3025. T result(a0*a0);
  3026. for (exint i = 1; i < SIZE; ++i)
  3027. {
  3028. T ai(vec[i]);
  3029. result += ai*ai;
  3030. }
  3031. return result;
  3032. }
  3033. SYS_FORCE_INLINE T length() const
  3034. {
  3035. T len2 = length2();
  3036. return SYSsqrt(len2);
  3037. }
  3038. template<typename S,bool S_INSTANTIATED>
  3039. SYS_FORCE_INLINE auto dot(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const -> decltype(vec[0]*that[0])
  3040. {
  3041. using TheType = decltype(vec[0]*that.vec[0]);
  3042. TheType result(vec[0]*that[0]);
  3043. for (exint i = 1; i < SIZE; ++i)
  3044. result += vec[i]*that[i];
  3045. return result;
  3046. }
  3047. template<typename S,bool S_INSTANTIATED>
  3048. SYS_FORCE_INLINE auto distance2(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const -> decltype(vec[0]-that[0])
  3049. {
  3050. using TheType = decltype(vec[0]-that[0]);
  3051. TheType v(vec[0] - that[0]);
  3052. TheType result(v*v);
  3053. for (exint i = 1; i < SIZE; ++i)
  3054. {
  3055. v = vec[i] - that[i];
  3056. result += v*v;
  3057. }
  3058. return result;
  3059. }
  3060. template<typename S,bool S_INSTANTIATED>
  3061. SYS_FORCE_INLINE auto distance(const UT_FixedVector<S,SIZE,S_INSTANTIATED> &that) const -> decltype(vec[0]-that[0])
  3062. {
  3063. auto dist2 = distance2(that);
  3064. return SYSsqrt(dist2);
  3065. }
  3066. SYS_FORCE_INLINE T normalize()
  3067. {
  3068. T len2 = length2();
  3069. if (len2 == T(0))
  3070. return T(0);
  3071. if (len2 == T(1))
  3072. return T(1);
  3073. T len = SYSsqrt(len2);
  3074. // Check if the square root is equal 1. sqrt(1+dx) ~ 1+dx/2,
  3075. // so it may get rounded to 1 when it wasn't 1 before.
  3076. if (len != T(1))
  3077. (*this) /= len;
  3078. return len;
  3079. }
  3080. };
  3081. /// NOTE: Strictly speaking, this should use decltype(that*a[0]),
  3082. /// but in the interests of avoiding accidental precision escalation,
  3083. /// it uses T.
  3084. template<typename T,exint SIZE,bool INSTANTIATED,typename S>
  3085. SYS_FORCE_INLINE UT_FixedVector<T,SIZE> operator*(const S &that,const UT_FixedVector<T,SIZE,INSTANTIATED> &a)
  3086. {
  3087. T t(that);
  3088. UT_FixedVector<T,SIZE> result;
  3089. for (exint i = 0; i < SIZE; ++i)
  3090. result[i] = t * a[i];
  3091. return result;
  3092. }
  3093. template<typename T, exint SIZE, bool INSTANTIATED, typename S, bool S_INSTANTIATED>
  3094. SYS_FORCE_INLINE auto
  3095. dot(const UT_FixedVector<T,SIZE,INSTANTIATED> &a, const UT_FixedVector<S,SIZE,S_INSTANTIATED> &b) -> decltype(a[0]*b[0])
  3096. {
  3097. return a.dot(b);
  3098. }
  3099. template<typename T, exint SIZE, bool INSTANTIATED, typename S, bool S_INSTANTIATED>
  3100. SYS_FORCE_INLINE auto
  3101. SYSmin(const UT_FixedVector<T,SIZE,INSTANTIATED> &a, const UT_FixedVector<S,SIZE,S_INSTANTIATED> &b) -> UT_FixedVector<decltype(a[0]+b[1]), SIZE>
  3102. {
  3103. using Type = decltype(a[0]+b[1]);
  3104. UT_FixedVector<Type, SIZE> result;
  3105. for (exint i = 0; i < SIZE; ++i)
  3106. result[i] = SYSmin(Type(a[i]), Type(b[i]));
  3107. return result;
  3108. }
  3109. template<typename T, exint SIZE, bool INSTANTIATED, typename S, bool S_INSTANTIATED>
  3110. SYS_FORCE_INLINE auto
  3111. SYSmax(const UT_FixedVector<T,SIZE,INSTANTIATED> &a, const UT_FixedVector<S,SIZE,S_INSTANTIATED> &b) -> UT_FixedVector<decltype(a[0]+b[1]), SIZE>
  3112. {
  3113. using Type = decltype(a[0]+b[1]);
  3114. UT_FixedVector<Type, SIZE> result;
  3115. for (exint i = 0; i < SIZE; ++i)
  3116. result[i] = SYSmax(Type(a[i]), Type(b[i]));
  3117. return result;
  3118. }
  3119. template<typename T>
  3120. struct UT_FixedVectorTraits
  3121. {
  3122. typedef UT_FixedVector<T,1> FixedVectorType;
  3123. typedef T DataType;
  3124. static const exint TupleSize = 1;
  3125. static const bool isVectorType = false;
  3126. };
  3127. template<typename T,exint SIZE,bool INSTANTIATED>
  3128. struct UT_FixedVectorTraits<UT_FixedVector<T,SIZE,INSTANTIATED> >
  3129. {
  3130. typedef UT_FixedVector<T,SIZE,INSTANTIATED> FixedVectorType;
  3131. typedef T DataType;
  3132. static const exint TupleSize = SIZE;
  3133. static const bool isVectorType = true;
  3134. };
  3135. }}
  3136. #endif
  3137. /*
  3138. * Copyright (c) 2018 Side Effects Software Inc.
  3139. *
  3140. * Permission is hereby granted, free of charge, to any person obtaining a copy
  3141. * of this software and associated documentation files (the "Software"), to deal
  3142. * in the Software without restriction, including without limitation the rights
  3143. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  3144. * copies of the Software, and to permit persons to whom the Software is
  3145. * furnished to do so, subject to the following conditions:
  3146. *
  3147. * The above copyright notice and this permission notice shall be included in all
  3148. * copies or substantial portions of the Software.
  3149. *
  3150. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  3151. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  3152. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  3153. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  3154. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  3155. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  3156. * SOFTWARE.
  3157. *
  3158. * COMMENTS:
  3159. * Simple wrappers on tbb interface
  3160. */
  3161. #ifndef __UT_ParallelUtil__
  3162. #define __UT_ParallelUtil__
  3163. #include <thread> // This is just included for std::thread::hardware_concurrency()
  3164. namespace igl {
  3165. /// @private
  3166. namespace FastWindingNumber {
  3167. namespace UT_Thread { inline int getNumProcessors() {
  3168. return std::thread::hardware_concurrency();
  3169. }}
  3170. //#include "tbb/blocked_range.h"
  3171. //#include "tbb/parallel_for.h"
  3172. ////namespace tbb { class split; }
  3173. //
  3174. ///// Declare prior to use.
  3175. //template <typename T>
  3176. //using UT_BlockedRange = tbb::blocked_range<T>;
  3177. //
  3178. //// Default implementation that calls range.size()
  3179. //template< typename RANGE >
  3180. //struct UT_EstimatorNumItems
  3181. //{
  3182. // UT_EstimatorNumItems() {}
  3183. //
  3184. // size_t operator()(const RANGE& range) const
  3185. // {
  3186. // return range.size();
  3187. // }
  3188. //};
  3189. //
  3190. ///// This is needed by UT_CoarsenedRange
  3191. //template <typename RANGE>
  3192. //inline size_t UTestimatedNumItems(const RANGE& range)
  3193. //{
  3194. // return UT_EstimatorNumItems<RANGE>()(range);
  3195. //}
  3196. //
  3197. ///// UT_CoarsenedRange: This should be used only inside
  3198. ///// UT_ParallelFor and UT_ParallelReduce
  3199. ///// This class wraps an existing range with a new range.
  3200. ///// This allows us to use simple_partitioner, rather than
  3201. ///// auto_partitioner, which has disastrous performance with
  3202. ///// the default grain size in ttb 4.
  3203. //template< typename RANGE >
  3204. //class UT_CoarsenedRange : public RANGE
  3205. //{
  3206. //public:
  3207. // // Compiler-generated versions are fine:
  3208. // // ~UT_CoarsenedRange();
  3209. // // UT_CoarsenedRange(const UT_CoarsenedRange&);
  3210. //
  3211. // // Split into two sub-ranges:
  3212. // UT_CoarsenedRange(UT_CoarsenedRange& range, tbb::split spl) :
  3213. // RANGE(range, spl),
  3214. // myGrainSize(range.myGrainSize)
  3215. // {
  3216. // }
  3217. //
  3218. // // Inherited: bool empty() const
  3219. //
  3220. // bool is_divisible() const
  3221. // {
  3222. // return
  3223. // RANGE::is_divisible() &&
  3224. // (UTestimatedNumItems(static_cast<const RANGE&>(*this)) > myGrainSize);
  3225. // }
  3226. //
  3227. //private:
  3228. // size_t myGrainSize;
  3229. //
  3230. // UT_CoarsenedRange(const RANGE& base_range, const size_t grain_size) :
  3231. // RANGE(base_range),
  3232. // myGrainSize(grain_size)
  3233. // {
  3234. // }
  3235. //
  3236. // template <typename Range, typename Body>
  3237. // friend void UTparallelFor(
  3238. // const Range &range, const Body &body,
  3239. // const int subscribe_ratio, const int min_grain_size
  3240. // );
  3241. //};
  3242. //
  3243. ///// Run the @c body function over a range in parallel.
  3244. ///// UTparallelFor attempts to spread the range out over at most
  3245. ///// subscribe_ratio * num_processor tasks.
  3246. ///// The factor subscribe_ratio can be used to help balance the load.
  3247. ///// UTparallelFor() uses tbb for its implementation.
  3248. ///// The used grain size is the maximum of min_grain_size and
  3249. ///// if UTestimatedNumItems(range) / (subscribe_ratio * num_processor).
  3250. ///// If subscribe_ratio == 0, then a grain size of min_grain_size will be used.
  3251. ///// A range can be split only when UTestimatedNumItems(range) exceeds the
  3252. ///// grain size the range is divisible.
  3253. //
  3254. /////
  3255. ///// Requirements for the Range functor are:
  3256. ///// - the requirements of the tbb Range Concept
  3257. ///// - UT_estimatorNumItems<Range> must return the the estimated number of work items
  3258. ///// for the range. When Range::size() is not the correct estimate, then a
  3259. ///// (partial) specialization of UT_estimatorNumItemsimatorRange must be provided
  3260. ///// for the type Range.
  3261. /////
  3262. ///// Requirements for the Body function are:
  3263. ///// - @code Body(const Body &); @endcode @n
  3264. ///// Copy Constructor
  3265. ///// - @code Body()::~Body(); @endcode @n
  3266. ///// Destructor
  3267. ///// - @code void Body::operator()(const Range &range) const; @endcode
  3268. ///// Function call to perform operation on the range. Note the operator is
  3269. ///// @b const.
  3270. /////
  3271. ///// The requirements for a Range object are:
  3272. ///// - @code Range::Range(const Range&); @endcode @n
  3273. ///// Copy constructor
  3274. ///// - @code Range::~Range(); @endcode @n
  3275. ///// Destructor
  3276. ///// - @code bool Range::is_divisible() const; @endcode @n
  3277. ///// True if the range can be partitioned into two sub-ranges
  3278. ///// - @code bool Range::empty() const; @endcode @n
  3279. ///// True if the range is empty
  3280. ///// - @code Range::Range(Range &r, UT_Split) const; @endcode @n
  3281. ///// Split the range @c r into two sub-ranges (i.e. modify @c r and *this)
  3282. /////
  3283. ///// Example: @code
  3284. ///// class Square {
  3285. ///// public:
  3286. ///// Square(double *data) : myData(data) {}
  3287. ///// ~Square();
  3288. ///// void operator()(const UT_BlockedRange<int64> &range) const
  3289. ///// {
  3290. ///// for (int64 i = range.begin(); i != range.end(); ++i)
  3291. ///// myData[i] *= myData[i];
  3292. ///// }
  3293. ///// double *myData;
  3294. ///// };
  3295. ///// ...
  3296. /////
  3297. ///// void
  3298. ///// parallel_square(double *array, int64 length)
  3299. ///// {
  3300. ///// UTparallelFor(UT_BlockedRange<int64>(0, length), Square(array));
  3301. ///// }
  3302. ///// @endcode
  3303. /////
  3304. ///// @see UTparallelReduce(), UT_BlockedRange()
  3305. //
  3306. //template <typename Range, typename Body>
  3307. //void UTparallelFor(
  3308. // const Range &range, const Body &body,
  3309. // const int subscribe_ratio = 2,
  3310. // const int min_grain_size = 1
  3311. //)
  3312. //{
  3313. // const size_t num_processors( UT_Thread::getNumProcessors() );
  3314. //
  3315. // UT_IGL_ASSERT( num_processors >= 1 );
  3316. // UT_IGL_ASSERT( min_grain_size >= 1 );
  3317. // UT_IGL_ASSERT( subscribe_ratio >= 0 );
  3318. //
  3319. // const size_t est_range_size( UTestimatedNumItems(range) );
  3320. //
  3321. // // Don't run on an empty range!
  3322. // if (est_range_size == 0)
  3323. // return;
  3324. //
  3325. // // Avoid tbb overhead if entire range needs to be single threaded
  3326. // if (num_processors == 1 || est_range_size <= min_grain_size)
  3327. // {
  3328. // body(range);
  3329. // return;
  3330. // }
  3331. //
  3332. // size_t grain_size(min_grain_size);
  3333. // if( subscribe_ratio > 0 )
  3334. // grain_size = std::max(
  3335. // grain_size,
  3336. // est_range_size / (subscribe_ratio * num_processors)
  3337. // );
  3338. //
  3339. // UT_CoarsenedRange< Range > coarsened_range(range, grain_size);
  3340. //
  3341. // tbb::parallel_for(coarsened_range, body, tbb::simple_partitioner());
  3342. //}
  3343. //
  3344. ///// Version of UTparallelFor that is tuned for the case where the range
  3345. ///// consists of lightweight items, for example,
  3346. ///// float additions or matrix-vector multiplications.
  3347. //template <typename Range, typename Body>
  3348. //void
  3349. //UTparallelForLightItems(const Range &range, const Body &body)
  3350. //{
  3351. // UTparallelFor(range, body, 2, 1024);
  3352. //}
  3353. //
  3354. ///// UTserialFor can be used as a debugging tool to quickly replace a parallel
  3355. ///// for with a serial for.
  3356. //template <typename Range, typename Body>
  3357. //void UTserialFor(const Range &range, const Body &body)
  3358. // { body(range); }
  3359. //
  3360. }}
  3361. #endif
  3362. /*
  3363. * Copyright (c) 2018 Side Effects Software Inc.
  3364. *
  3365. * Permission is hereby granted, free of charge, to any person obtaining a copy
  3366. * of this software and associated documentation files (the "Software"), to deal
  3367. * in the Software without restriction, including without limitation the rights
  3368. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  3369. * copies of the Software, and to permit persons to whom the Software is
  3370. * furnished to do so, subject to the following conditions:
  3371. *
  3372. * The above copyright notice and this permission notice shall be included in all
  3373. * copies or substantial portions of the Software.
  3374. *
  3375. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  3376. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  3377. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  3378. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  3379. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  3380. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  3381. * SOFTWARE.
  3382. *
  3383. * COMMENTS:
  3384. * Bounding Volume Hierarchy (BVH) implementation.
  3385. * To call functions not implemented here, also include UT_BVHImpl.h
  3386. */
  3387. #pragma once
  3388. #ifndef __HDK_UT_BVH_h__
  3389. #define __HDK_UT_BVH_h__
  3390. #include <limits>
  3391. #include <memory>
  3392. namespace igl {
  3393. /// @private
  3394. namespace FastWindingNumber {
  3395. template<typename T> class UT_Array;
  3396. class v4uf;
  3397. class v4uu;
  3398. namespace HDK_Sample {
  3399. namespace UT {
  3400. template<typename T,uint NAXES>
  3401. struct Box {
  3402. T vals[NAXES][2];
  3403. SYS_FORCE_INLINE Box() noexcept = default;
  3404. SYS_FORCE_INLINE constexpr Box(const Box &other) noexcept = default;
  3405. SYS_FORCE_INLINE constexpr Box(Box &&other) noexcept = default;
  3406. SYS_FORCE_INLINE Box& operator=(const Box &other) noexcept = default;
  3407. SYS_FORCE_INLINE Box& operator=(Box &&other) noexcept = default;
  3408. template<typename S>
  3409. SYS_FORCE_INLINE Box(const Box<S,NAXES>& other) noexcept {
  3410. static_assert((std::is_pod<Box<T,NAXES>>::value) || !std::is_pod<T>::value,
  3411. "UT::Box should be POD, for better performance in UT_Array, etc.");
  3412. for (uint axis = 0; axis < NAXES; ++axis) {
  3413. vals[axis][0] = T(other.vals[axis][0]);
  3414. vals[axis][1] = T(other.vals[axis][1]);
  3415. }
  3416. }
  3417. template<typename S,bool INSTANTIATED>
  3418. SYS_FORCE_INLINE Box(const UT_FixedVector<S,NAXES,INSTANTIATED>& pt) noexcept {
  3419. for (uint axis = 0; axis < NAXES; ++axis) {
  3420. vals[axis][0] = pt[axis];
  3421. vals[axis][1] = pt[axis];
  3422. }
  3423. }
  3424. template<typename S>
  3425. SYS_FORCE_INLINE Box& operator=(const Box<S,NAXES>& other) noexcept {
  3426. for (uint axis = 0; axis < NAXES; ++axis) {
  3427. vals[axis][0] = T(other.vals[axis][0]);
  3428. vals[axis][1] = T(other.vals[axis][1]);
  3429. }
  3430. return *this;
  3431. }
  3432. SYS_FORCE_INLINE const T* operator[](const size_t axis) const noexcept {
  3433. UT_IGL_ASSERT_P(axis < NAXES);
  3434. return vals[axis];
  3435. }
  3436. SYS_FORCE_INLINE T* operator[](const size_t axis) noexcept {
  3437. UT_IGL_ASSERT_P(axis < NAXES);
  3438. return vals[axis];
  3439. }
  3440. SYS_FORCE_INLINE void initBounds() noexcept {
  3441. for (uint axis = 0; axis < NAXES; ++axis) {
  3442. vals[axis][0] = std::numeric_limits<T>::max();
  3443. vals[axis][1] = -std::numeric_limits<T>::max();
  3444. }
  3445. }
  3446. /// Copy the source box.
  3447. /// NOTE: This is so that in templated code that may have a Box or a
  3448. /// UT_FixedVector, it can call initBounds and still work.
  3449. SYS_FORCE_INLINE void initBounds(const Box<T,NAXES>& src) noexcept {
  3450. for (uint axis = 0; axis < NAXES; ++axis) {
  3451. vals[axis][0] = src.vals[axis][0];
  3452. vals[axis][1] = src.vals[axis][1];
  3453. }
  3454. }
  3455. /// Initialize with the union of the source boxes.
  3456. /// NOTE: This is so that in templated code that may have Box's or a
  3457. /// UT_FixedVector's, it can call initBounds and still work.
  3458. SYS_FORCE_INLINE void initBoundsUnordered(const Box<T,NAXES>& src0, const Box<T,NAXES>& src1) noexcept {
  3459. for (uint axis = 0; axis < NAXES; ++axis) {
  3460. vals[axis][0] = SYSmin(src0.vals[axis][0], src1.vals[axis][0]);
  3461. vals[axis][1] = SYSmax(src0.vals[axis][1], src1.vals[axis][1]);
  3462. }
  3463. }
  3464. SYS_FORCE_INLINE void combine(const Box<T,NAXES>& src) noexcept {
  3465. for (uint axis = 0; axis < NAXES; ++axis) {
  3466. T& minv = vals[axis][0];
  3467. T& maxv = vals[axis][1];
  3468. const T curminv = src.vals[axis][0];
  3469. const T curmaxv = src.vals[axis][1];
  3470. minv = (minv < curminv) ? minv : curminv;
  3471. maxv = (maxv > curmaxv) ? maxv : curmaxv;
  3472. }
  3473. }
  3474. SYS_FORCE_INLINE void enlargeBounds(const Box<T,NAXES>& src) noexcept {
  3475. combine(src);
  3476. }
  3477. template<typename S,bool INSTANTIATED>
  3478. SYS_FORCE_INLINE
  3479. void initBounds(const UT_FixedVector<S,NAXES,INSTANTIATED>& pt) noexcept {
  3480. for (uint axis = 0; axis < NAXES; ++axis) {
  3481. vals[axis][0] = pt[axis];
  3482. vals[axis][1] = pt[axis];
  3483. }
  3484. }
  3485. template<bool INSTANTIATED>
  3486. SYS_FORCE_INLINE
  3487. void initBounds(const UT_FixedVector<T,NAXES,INSTANTIATED>& min, const UT_FixedVector<T,NAXES,INSTANTIATED>& max) noexcept {
  3488. for (uint axis = 0; axis < NAXES; ++axis) {
  3489. vals[axis][0] = min[axis];
  3490. vals[axis][1] = max[axis];
  3491. }
  3492. }
  3493. template<bool INSTANTIATED>
  3494. SYS_FORCE_INLINE
  3495. void initBoundsUnordered(const UT_FixedVector<T,NAXES,INSTANTIATED>& p0, const UT_FixedVector<T,NAXES,INSTANTIATED>& p1) noexcept {
  3496. for (uint axis = 0; axis < NAXES; ++axis) {
  3497. vals[axis][0] = SYSmin(p0[axis], p1[axis]);
  3498. vals[axis][1] = SYSmax(p0[axis], p1[axis]);
  3499. }
  3500. }
  3501. template<bool INSTANTIATED>
  3502. SYS_FORCE_INLINE
  3503. void enlargeBounds(const UT_FixedVector<T,NAXES,INSTANTIATED>& pt) noexcept {
  3504. for (uint axis = 0; axis < NAXES; ++axis) {
  3505. vals[axis][0] = SYSmin(vals[axis][0], pt[axis]);
  3506. vals[axis][1] = SYSmax(vals[axis][1], pt[axis]);
  3507. }
  3508. }
  3509. SYS_FORCE_INLINE
  3510. UT_FixedVector<T,NAXES> getMin() const noexcept {
  3511. UT_FixedVector<T,NAXES> v;
  3512. for (uint axis = 0; axis < NAXES; ++axis) {
  3513. v[axis] = vals[axis][0];
  3514. }
  3515. return v;
  3516. }
  3517. SYS_FORCE_INLINE
  3518. UT_FixedVector<T,NAXES> getMax() const noexcept {
  3519. UT_FixedVector<T,NAXES> v;
  3520. for (uint axis = 0; axis < NAXES; ++axis) {
  3521. v[axis] = vals[axis][1];
  3522. }
  3523. return v;
  3524. }
  3525. T diameter2() const noexcept {
  3526. T diff = (vals[0][1]-vals[0][0]);
  3527. T sum = diff*diff;
  3528. for (uint axis = 1; axis < NAXES; ++axis) {
  3529. diff = (vals[axis][1]-vals[axis][0]);
  3530. sum += diff*diff;
  3531. }
  3532. return sum;
  3533. }
  3534. T volume() const noexcept {
  3535. T product = (vals[0][1]-vals[0][0]);
  3536. for (uint axis = 1; axis < NAXES; ++axis) {
  3537. product *= (vals[axis][1]-vals[axis][0]);
  3538. }
  3539. return product;
  3540. }
  3541. T half_surface_area() const noexcept {
  3542. if (NAXES==1) {
  3543. // NOTE: Although this should technically be 1,
  3544. // that doesn't make any sense as a heuristic,
  3545. // so we fall back to the "volume" of this box.
  3546. return (vals[0][1]-vals[0][0]);
  3547. }
  3548. if (NAXES==2) {
  3549. const T d0 = (vals[0][1]-vals[0][0]);
  3550. const T d1 = (vals[1][1]-vals[1][0]);
  3551. return d0 + d1;
  3552. }
  3553. if (NAXES==3) {
  3554. const T d0 = (vals[0][1]-vals[0][0]);
  3555. const T d1 = (vals[1][1]-vals[1][0]);
  3556. const T d2 = (vals[2][1]-vals[2][0]);
  3557. return d0*d1 + d1*d2 + d2*d0;
  3558. }
  3559. if (NAXES==4) {
  3560. const T d0 = (vals[0][1]-vals[0][0]);
  3561. const T d1 = (vals[1][1]-vals[1][0]);
  3562. const T d2 = (vals[2][1]-vals[2][0]);
  3563. const T d3 = (vals[3][1]-vals[3][0]);
  3564. // This is just d0d1d2 + d1d2d3 + d2d3d0 + d3d0d1 refactored.
  3565. const T d0d1 = d0*d1;
  3566. const T d2d3 = d2*d3;
  3567. return d0d1*(d2+d3) + d2d3*(d0+d1);
  3568. }
  3569. T sum = 0;
  3570. for (uint skipped_axis = 0; skipped_axis < NAXES; ++skipped_axis) {
  3571. T product = 1;
  3572. for (uint axis = 0; axis < NAXES; ++axis) {
  3573. if (axis != skipped_axis) {
  3574. product *= (vals[axis][1]-vals[axis][0]);
  3575. }
  3576. }
  3577. sum += product;
  3578. }
  3579. return sum;
  3580. }
  3581. T axis_sum() const noexcept {
  3582. T sum = (vals[0][1]-vals[0][0]);
  3583. for (uint axis = 1; axis < NAXES; ++axis) {
  3584. sum += (vals[axis][1]-vals[axis][0]);
  3585. }
  3586. return sum;
  3587. }
  3588. template<bool INSTANTIATED0,bool INSTANTIATED1>
  3589. SYS_FORCE_INLINE void intersect(
  3590. T &box_tmin,
  3591. T &box_tmax,
  3592. const UT_FixedVector<uint,NAXES,INSTANTIATED0> &signs,
  3593. const UT_FixedVector<T,NAXES,INSTANTIATED1> &origin,
  3594. const UT_FixedVector<T,NAXES,INSTANTIATED1> &inverse_direction
  3595. ) const noexcept {
  3596. for (int axis = 0; axis < NAXES; ++axis)
  3597. {
  3598. uint sign = signs[axis];
  3599. T t1 = (vals[axis][sign] - origin[axis]) * inverse_direction[axis];
  3600. T t2 = (vals[axis][sign^1] - origin[axis]) * inverse_direction[axis];
  3601. box_tmin = SYSmax(t1, box_tmin);
  3602. box_tmax = SYSmin(t2, box_tmax);
  3603. }
  3604. }
  3605. SYS_FORCE_INLINE void intersect(const Box& other, Box& dest) const noexcept {
  3606. for (int axis = 0; axis < NAXES; ++axis)
  3607. {
  3608. dest.vals[axis][0] = SYSmax(vals[axis][0], other.vals[axis][0]);
  3609. dest.vals[axis][1] = SYSmin(vals[axis][1], other.vals[axis][1]);
  3610. }
  3611. }
  3612. template<bool INSTANTIATED>
  3613. SYS_FORCE_INLINE T minDistance2(
  3614. const UT_FixedVector<T,NAXES,INSTANTIATED> &p
  3615. ) const noexcept {
  3616. T diff = SYSmax(SYSmax(vals[0][0]-p[0], p[0]-vals[0][1]), T(0.0f));
  3617. T d2 = diff*diff;
  3618. for (int axis = 1; axis < NAXES; ++axis)
  3619. {
  3620. diff = SYSmax(SYSmax(vals[axis][0]-p[axis], p[axis]-vals[axis][1]), T(0.0f));
  3621. d2 += diff*diff;
  3622. }
  3623. return d2;
  3624. }
  3625. template<bool INSTANTIATED>
  3626. SYS_FORCE_INLINE T maxDistance2(
  3627. const UT_FixedVector<T,NAXES,INSTANTIATED> &p
  3628. ) const noexcept {
  3629. T diff = SYSmax(p[0]-vals[0][0], vals[0][1]-p[0]);
  3630. T d2 = diff*diff;
  3631. for (int axis = 1; axis < NAXES; ++axis)
  3632. {
  3633. diff = SYSmax(p[axis]-vals[axis][0], vals[axis][1]-p[axis]);
  3634. d2 += diff*diff;
  3635. }
  3636. return d2;
  3637. }
  3638. };
  3639. /// Used by BVH::init to specify the heuristic to use for choosing between different box splits.
  3640. /// I tried putting this inside the BVH class, but I had difficulty getting it to compile.
  3641. enum class BVH_Heuristic {
  3642. /// Tries to minimize the sum of axis lengths of the boxes.
  3643. /// This is useful for applications where the probability of a box being applicable to a
  3644. /// query is proportional to the "length", e.g. the probability of a random infinite plane
  3645. /// intersecting the box.
  3646. BOX_PERIMETER,
  3647. /// Tries to minimize the "surface area" of the boxes.
  3648. /// In 3D, uses the surface area; in 2D, uses the perimeter; in 1D, uses the axis length.
  3649. /// This is what most applications, e.g. ray tracing, should use, particularly when the
  3650. /// probability of a box being applicable to a query is proportional to the surface "area",
  3651. /// e.g. the probability of a random ray hitting the box.
  3652. ///
  3653. /// NOTE: USE THIS ONE IF YOU ARE UNSURE!
  3654. BOX_AREA,
  3655. /// Tries to minimize the "volume" of the boxes.
  3656. /// Uses the product of all axis lengths as a heuristic, (volume in 3D, area in 2D, length in 1D).
  3657. /// This is useful for applications where the probability of a box being applicable to a
  3658. /// query is proportional to the "volume", e.g. the probability of a random point being inside the box.
  3659. BOX_VOLUME,
  3660. /// Tries to minimize the "radii" of the boxes (i.e. the distance from the centre to a corner).
  3661. /// This is useful for applications where the probability of a box being applicable to a
  3662. /// query is proportional to the distance to the box centre, e.g. the probability of a random
  3663. /// infinite plane being within the "radius" of the centre.
  3664. BOX_RADIUS,
  3665. /// Tries to minimize the squared "radii" of the boxes (i.e. the squared distance from the centre to a corner).
  3666. /// This is useful for applications where the probability of a box being applicable to a
  3667. /// query is proportional to the squared distance to the box centre, e.g. the probability of a random
  3668. /// ray passing within the "radius" of the centre.
  3669. BOX_RADIUS2,
  3670. /// Tries to minimize the cubed "radii" of the boxes (i.e. the cubed distance from the centre to a corner).
  3671. /// This is useful for applications where the probability of a box being applicable to a
  3672. /// query is proportional to the cubed distance to the box centre, e.g. the probability of a random
  3673. /// point being within the "radius" of the centre.
  3674. BOX_RADIUS3,
  3675. /// Tries to minimize the depth of the tree by primarily splitting at the median of the max axis.
  3676. /// It may fall back to minimizing the area, but the tree depth should be unaffected.
  3677. ///
  3678. /// FIXME: This is not fully implemented yet.
  3679. MEDIAN_MAX_AXIS
  3680. };
  3681. template<uint N>
  3682. class BVH {
  3683. public:
  3684. using INT_TYPE = uint;
  3685. struct Node {
  3686. INT_TYPE child[N];
  3687. static constexpr INT_TYPE theN = N;
  3688. static constexpr INT_TYPE EMPTY = INT_TYPE(-1);
  3689. static constexpr INT_TYPE INTERNAL_BIT = (INT_TYPE(1)<<(sizeof(INT_TYPE)*8 - 1));
  3690. SYS_FORCE_INLINE static INT_TYPE markInternal(INT_TYPE internal_node_num) noexcept {
  3691. return internal_node_num | INTERNAL_BIT;
  3692. }
  3693. SYS_FORCE_INLINE static bool isInternal(INT_TYPE node_int) noexcept {
  3694. return (node_int & INTERNAL_BIT) != 0;
  3695. }
  3696. SYS_FORCE_INLINE static INT_TYPE getInternalNum(INT_TYPE node_int) noexcept {
  3697. return node_int & ~INTERNAL_BIT;
  3698. }
  3699. };
  3700. private:
  3701. struct FreeDeleter {
  3702. SYS_FORCE_INLINE void operator()(Node* p) const {
  3703. if (p) {
  3704. // The pointer was allocated with malloc by UT_Array,
  3705. // so it must be freed with free.
  3706. free(p);
  3707. }
  3708. }
  3709. };
  3710. std::unique_ptr<Node[],FreeDeleter> myRoot;
  3711. INT_TYPE myNumNodes;
  3712. public:
  3713. SYS_FORCE_INLINE BVH() noexcept : myRoot(nullptr), myNumNodes(0) {}
  3714. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE=INT_TYPE>
  3715. inline void init(const BOX_TYPE* boxes, const INT_TYPE nboxes, SRC_INT_TYPE* indices=nullptr, bool reorder_indices=false, INT_TYPE max_items_per_leaf=1) noexcept;
  3716. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE=INT_TYPE>
  3717. inline void init(Box<T,NAXES> axes_minmax, const BOX_TYPE* boxes, INT_TYPE nboxes, SRC_INT_TYPE* indices=nullptr, bool reorder_indices=false, INT_TYPE max_items_per_leaf=1) noexcept;
  3718. SYS_FORCE_INLINE
  3719. INT_TYPE getNumNodes() const noexcept
  3720. {
  3721. return myNumNodes;
  3722. }
  3723. SYS_FORCE_INLINE
  3724. const Node *getNodes() const noexcept
  3725. {
  3726. return myRoot.get();
  3727. }
  3728. SYS_FORCE_INLINE
  3729. void clear() noexcept {
  3730. myRoot.reset();
  3731. myNumNodes = 0;
  3732. }
  3733. /// For each node, this effectively does:
  3734. /// LOCAL_DATA local_data[MAX_ORDER];
  3735. /// bool descend = functors.pre(nodei, parent_data);
  3736. /// if (!descend)
  3737. /// return;
  3738. /// for each child {
  3739. /// if (isitem(child))
  3740. /// functors.item(getitemi(child), nodei, local_data[child]);
  3741. /// else if (isnode(child))
  3742. /// recurse(getnodei(child), local_data);
  3743. /// }
  3744. /// functors.post(nodei, parent_nodei, data_for_parent, num_children, local_data);
  3745. template<typename LOCAL_DATA,typename FUNCTORS>
  3746. inline void traverse(
  3747. FUNCTORS &functors,
  3748. LOCAL_DATA *data_for_parent=nullptr) const noexcept;
  3749. /// This acts like the traverse function, except if the number of nodes in two subtrees
  3750. /// of a node contain at least parallel_threshold nodes, they may be executed in parallel.
  3751. /// If parallel_threshold is 0, even item_functor may be executed on items in parallel.
  3752. /// NOTE: Make sure that your functors don't depend on the order that they're executed in,
  3753. /// e.g. don't add values from sibling nodes together except in post functor,
  3754. /// else they might have nondeterministic roundoff or miss some values entirely.
  3755. template<typename LOCAL_DATA,typename FUNCTORS>
  3756. inline void traverseParallel(
  3757. INT_TYPE parallel_threshold,
  3758. FUNCTORS &functors,
  3759. LOCAL_DATA *data_for_parent=nullptr) const noexcept;
  3760. /// For each node, this effectively does:
  3761. /// LOCAL_DATA local_data[MAX_ORDER];
  3762. /// uint descend = functors.pre(nodei, parent_data);
  3763. /// if (!descend)
  3764. /// return;
  3765. /// for each child {
  3766. /// if (!(descend & (1<<child)))
  3767. /// continue;
  3768. /// if (isitem(child))
  3769. /// functors.item(getitemi(child), nodei, local_data[child]);
  3770. /// else if (isnode(child))
  3771. /// recurse(getnodei(child), local_data);
  3772. /// }
  3773. /// functors.post(nodei, parent_nodei, data_for_parent, num_children, local_data);
  3774. template<typename LOCAL_DATA,typename FUNCTORS>
  3775. inline void traverseVector(
  3776. FUNCTORS &functors,
  3777. LOCAL_DATA *data_for_parent=nullptr) const noexcept;
  3778. /// Prints a text representation of the tree to stdout.
  3779. inline void debugDump() const;
  3780. template<typename SRC_INT_TYPE>
  3781. static inline void createTrivialIndices(SRC_INT_TYPE* indices, const INT_TYPE n) noexcept;
  3782. private:
  3783. template<typename LOCAL_DATA,typename FUNCTORS>
  3784. inline void traverseHelper(
  3785. INT_TYPE nodei,
  3786. INT_TYPE parent_nodei,
  3787. FUNCTORS &functors,
  3788. LOCAL_DATA *data_for_parent=nullptr) const noexcept;
  3789. template<typename LOCAL_DATA,typename FUNCTORS>
  3790. inline void traverseParallelHelper(
  3791. INT_TYPE nodei,
  3792. INT_TYPE parent_nodei,
  3793. INT_TYPE parallel_threshold,
  3794. INT_TYPE next_node_id,
  3795. FUNCTORS &functors,
  3796. LOCAL_DATA *data_for_parent=nullptr) const noexcept;
  3797. template<typename LOCAL_DATA,typename FUNCTORS>
  3798. inline void traverseVectorHelper(
  3799. INT_TYPE nodei,
  3800. INT_TYPE parent_nodei,
  3801. FUNCTORS &functors,
  3802. LOCAL_DATA *data_for_parent=nullptr) const noexcept;
  3803. template<typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  3804. static inline void computeFullBoundingBox(Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, const INT_TYPE nboxes, SRC_INT_TYPE* indices) noexcept;
  3805. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  3806. static inline void initNode(UT_Array<Node>& nodes, Node &node, const Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, SRC_INT_TYPE* indices, const INT_TYPE nboxes) noexcept;
  3807. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  3808. static inline void initNodeReorder(UT_Array<Node>& nodes, Node &node, const Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, SRC_INT_TYPE* indices, const INT_TYPE nboxes, const INT_TYPE indices_offset, const INT_TYPE max_items_per_leaf) noexcept;
  3809. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  3810. static inline void multiSplit(const Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE* sub_indices[N+1], Box<T,NAXES> sub_boxes[N]) noexcept;
  3811. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  3812. static inline void split(const Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE*& split_indices, Box<T,NAXES>* split_boxes) noexcept;
  3813. template<INT_TYPE PARALLEL_THRESHOLD, typename SRC_INT_TYPE>
  3814. static inline void adjustParallelChildNodes(INT_TYPE nparallel, UT_Array<Node>& nodes, Node& node, UT_Array<Node>* parallel_nodes, SRC_INT_TYPE* sub_indices) noexcept;
  3815. template<typename T,typename BOX_TYPE,typename SRC_INT_TYPE>
  3816. static inline void nthElement(const BOX_TYPE* boxes, SRC_INT_TYPE* indices, const SRC_INT_TYPE* indices_end, const uint axis, SRC_INT_TYPE*const nth) noexcept;
  3817. template<typename T,typename BOX_TYPE,typename SRC_INT_TYPE>
  3818. static inline void partitionByCentre(const BOX_TYPE* boxes, SRC_INT_TYPE*const indices, const SRC_INT_TYPE*const indices_end, const uint axis, const T pivotx2, SRC_INT_TYPE*& ppivot_start, SRC_INT_TYPE*& ppivot_end) noexcept;
  3819. /// An overestimate of the number of nodes needed.
  3820. /// At worst, we could have only 2 children in every leaf, and
  3821. /// then above that, we have a geometric series with r=1/N and a=(sub_nboxes/2)/N
  3822. /// The true worst case might be a little worst than this, but
  3823. /// it's probably fairly unlikely.
  3824. SYS_FORCE_INLINE static INT_TYPE nodeEstimate(const INT_TYPE nboxes) noexcept {
  3825. return nboxes/2 + nboxes/(2*(N-1));
  3826. }
  3827. template<BVH_Heuristic H,typename T, uint NAXES>
  3828. SYS_FORCE_INLINE static T unweightedHeuristic(const Box<T, NAXES>& box) noexcept {
  3829. if (H == BVH_Heuristic::BOX_PERIMETER) {
  3830. return box.axis_sum();
  3831. }
  3832. if (H == BVH_Heuristic::BOX_AREA) {
  3833. return box.half_surface_area();
  3834. }
  3835. if (H == BVH_Heuristic::BOX_VOLUME) {
  3836. return box.volume();
  3837. }
  3838. if (H == BVH_Heuristic::BOX_RADIUS) {
  3839. T diameter2 = box.diameter2();
  3840. return SYSsqrt(diameter2);
  3841. }
  3842. if (H == BVH_Heuristic::BOX_RADIUS2) {
  3843. return box.diameter2();
  3844. }
  3845. if (H == BVH_Heuristic::BOX_RADIUS3) {
  3846. T diameter2 = box.diameter2();
  3847. return diameter2*SYSsqrt(diameter2);
  3848. }
  3849. UT_IGL_ASSERT_MSG(0, "BVH_Heuristic::MEDIAN_MAX_AXIS should be handled separately by caller!");
  3850. return T(1);
  3851. }
  3852. /// 16 equal-length spans (15 evenly-spaced splits) should be enough for a decent heuristic
  3853. static constexpr INT_TYPE NSPANS = 16;
  3854. static constexpr INT_TYPE NSPLITS = NSPANS-1;
  3855. /// At least 1/16 of all boxes must be on each side, else we could end up with a very deep tree
  3856. static constexpr INT_TYPE MIN_FRACTION = 16;
  3857. };
  3858. } // UT namespace
  3859. template<uint N>
  3860. using UT_BVH = UT::BVH<N>;
  3861. } // End HDK_Sample namespace
  3862. }}
  3863. #endif
  3864. /*
  3865. * Copyright (c) 2018 Side Effects Software Inc.
  3866. *
  3867. * Permission is hereby granted, free of charge, to any person obtaining a copy
  3868. * of this software and associated documentation files (the "Software"), to deal
  3869. * in the Software without restriction, including without limitation the rights
  3870. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  3871. * copies of the Software, and to permit persons to whom the Software is
  3872. * furnished to do so, subject to the following conditions:
  3873. *
  3874. * The above copyright notice and this permission notice shall be included in all
  3875. * copies or substantial portions of the Software.
  3876. *
  3877. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  3878. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  3879. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  3880. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  3881. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  3882. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  3883. * SOFTWARE.
  3884. *
  3885. * COMMENTS:
  3886. * Bounding Volume Hierarchy (BVH) implementation.
  3887. * The main file is UT_BVH.h; this file is separate so that
  3888. * files that don't actually need to call functions on the BVH
  3889. * won't have unnecessary headers and functions included.
  3890. */
  3891. #pragma once
  3892. #ifndef __HDK_UT_BVHImpl_h__
  3893. #define __HDK_UT_BVHImpl_h__
  3894. #include "parallel_for.h"
  3895. #include <iostream>
  3896. #include <algorithm>
  3897. namespace igl {
  3898. /// @private
  3899. namespace FastWindingNumber {
  3900. namespace HDK_Sample {
  3901. namespace UT {
  3902. template<typename T,uint NAXES>
  3903. SYS_FORCE_INLINE bool utBoxExclude(const UT::Box<T,NAXES>& box) noexcept {
  3904. bool has_nan_or_inf = !SYSisFinite(box[0][0]);
  3905. has_nan_or_inf |= !SYSisFinite(box[0][1]);
  3906. for (uint axis = 1; axis < NAXES; ++axis)
  3907. {
  3908. has_nan_or_inf |= !SYSisFinite(box[axis][0]);
  3909. has_nan_or_inf |= !SYSisFinite(box[axis][1]);
  3910. }
  3911. return has_nan_or_inf;
  3912. }
  3913. template<uint NAXES>
  3914. SYS_FORCE_INLINE bool utBoxExclude(const UT::Box<fpreal32,NAXES>& box) noexcept {
  3915. const int32 *pboxints = reinterpret_cast<const int32*>(&box);
  3916. // Fast check for NaN or infinity: check if exponent bits are 0xFF.
  3917. bool has_nan_or_inf = ((pboxints[0] & 0x7F800000) == 0x7F800000);
  3918. has_nan_or_inf |= ((pboxints[1] & 0x7F800000) == 0x7F800000);
  3919. for (uint axis = 1; axis < NAXES; ++axis)
  3920. {
  3921. has_nan_or_inf |= ((pboxints[2*axis] & 0x7F800000) == 0x7F800000);
  3922. has_nan_or_inf |= ((pboxints[2*axis + 1] & 0x7F800000) == 0x7F800000);
  3923. }
  3924. return has_nan_or_inf;
  3925. }
  3926. template<typename T,uint NAXES>
  3927. SYS_FORCE_INLINE T utBoxCenter(const UT::Box<T,NAXES>& box, uint axis) noexcept {
  3928. const T* v = box.vals[axis];
  3929. return v[0] + v[1];
  3930. }
  3931. template<typename T>
  3932. struct ut_BoxCentre {
  3933. constexpr static uint scale = 2;
  3934. };
  3935. template<typename T,uint NAXES,bool INSTANTIATED>
  3936. SYS_FORCE_INLINE T utBoxExclude(const UT_FixedVector<T,NAXES,INSTANTIATED>& position) noexcept {
  3937. bool has_nan_or_inf = !SYSisFinite(position[0]);
  3938. for (uint axis = 1; axis < NAXES; ++axis)
  3939. has_nan_or_inf |= !SYSisFinite(position[axis]);
  3940. return has_nan_or_inf;
  3941. }
  3942. template<uint NAXES,bool INSTANTIATED>
  3943. SYS_FORCE_INLINE bool utBoxExclude(const UT_FixedVector<fpreal32,NAXES,INSTANTIATED>& position) noexcept {
  3944. const int32 *ppositionints = reinterpret_cast<const int32*>(&position);
  3945. // Fast check for NaN or infinity: check if exponent bits are 0xFF.
  3946. bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
  3947. for (uint axis = 1; axis < NAXES; ++axis)
  3948. has_nan_or_inf |= ((ppositionints[axis] & 0x7F800000) == 0x7F800000);
  3949. return has_nan_or_inf;
  3950. }
  3951. template<typename T,uint NAXES,bool INSTANTIATED>
  3952. SYS_FORCE_INLINE T utBoxCenter(const UT_FixedVector<T,NAXES,INSTANTIATED>& position, uint axis) noexcept {
  3953. return position[axis];
  3954. }
  3955. template<typename T,uint NAXES,bool INSTANTIATED>
  3956. struct ut_BoxCentre<UT_FixedVector<T,NAXES,INSTANTIATED>> {
  3957. constexpr static uint scale = 1;
  3958. };
  3959. template<typename BOX_TYPE,typename SRC_INT_TYPE,typename INT_TYPE>
  3960. inline INT_TYPE utExcludeNaNInfBoxIndices(const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE& nboxes) noexcept
  3961. {
  3962. //constexpr INT_TYPE PARALLEL_THRESHOLD = 65536;
  3963. //INT_TYPE ntasks = 1;
  3964. //if (nboxes >= PARALLEL_THRESHOLD)
  3965. //{
  3966. // INT_TYPE nprocessors = UT_Thread::getNumProcessors();
  3967. // ntasks = (nprocessors > 1) ? SYSmin(4*nprocessors, nboxes/(PARALLEL_THRESHOLD/2)) : 1;
  3968. //}
  3969. //if (ntasks == 1)
  3970. {
  3971. // Serial: easy case; just loop through.
  3972. const SRC_INT_TYPE* indices_end = indices + nboxes;
  3973. // Loop through forward once
  3974. SRC_INT_TYPE* psrc_index = indices;
  3975. for (; psrc_index != indices_end; ++psrc_index)
  3976. {
  3977. const bool exclude = utBoxExclude(boxes[*psrc_index]);
  3978. if (exclude)
  3979. break;
  3980. }
  3981. if (psrc_index == indices_end)
  3982. return 0;
  3983. // First NaN or infinite box
  3984. SRC_INT_TYPE* nan_start = psrc_index;
  3985. for (++psrc_index; psrc_index != indices_end; ++psrc_index)
  3986. {
  3987. const bool exclude = utBoxExclude(boxes[*psrc_index]);
  3988. if (!exclude)
  3989. {
  3990. *nan_start = *psrc_index;
  3991. ++nan_start;
  3992. }
  3993. }
  3994. nboxes = nan_start-indices;
  3995. return indices_end - nan_start;
  3996. }
  3997. }
  3998. template<uint N>
  3999. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  4000. inline void BVH<N>::init(const BOX_TYPE* boxes, const INT_TYPE nboxes, SRC_INT_TYPE* indices, bool reorder_indices, INT_TYPE max_items_per_leaf) noexcept {
  4001. Box<T,NAXES> axes_minmax;
  4002. computeFullBoundingBox(axes_minmax, boxes, nboxes, indices);
  4003. init<H>(axes_minmax, boxes, nboxes, indices, reorder_indices, max_items_per_leaf);
  4004. }
  4005. template<uint N>
  4006. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  4007. inline void BVH<N>::init(Box<T,NAXES> axes_minmax, const BOX_TYPE* boxes, INT_TYPE nboxes, SRC_INT_TYPE* indices, bool reorder_indices, INT_TYPE max_items_per_leaf) noexcept {
  4008. // Clear the tree in advance to save memory.
  4009. myRoot.reset();
  4010. if (nboxes == 0) {
  4011. myNumNodes = 0;
  4012. return;
  4013. }
  4014. UT_Array<INT_TYPE> local_indices;
  4015. if (!indices) {
  4016. local_indices.setSizeNoInit(nboxes);
  4017. indices = local_indices.array();
  4018. createTrivialIndices(indices, nboxes);
  4019. }
  4020. // Exclude any boxes with NaNs or infinities by shifting down indices
  4021. // over the bad box indices and updating nboxes.
  4022. INT_TYPE nexcluded = utExcludeNaNInfBoxIndices(boxes, indices, nboxes);
  4023. if (nexcluded != 0) {
  4024. if (nboxes == 0) {
  4025. myNumNodes = 0;
  4026. return;
  4027. }
  4028. computeFullBoundingBox(axes_minmax, boxes, nboxes, indices);
  4029. }
  4030. UT_Array<Node> nodes;
  4031. // Preallocate an overestimate of the number of nodes needed.
  4032. nodes.setCapacity(nodeEstimate(nboxes));
  4033. nodes.setSize(1);
  4034. if (reorder_indices)
  4035. initNodeReorder<H>(nodes, nodes[0], axes_minmax, boxes, indices, nboxes, 0, max_items_per_leaf);
  4036. else
  4037. initNode<H>(nodes, nodes[0], axes_minmax, boxes, indices, nboxes);
  4038. // If capacity is more than 12.5% over the size, rellocate.
  4039. if (8*nodes.capacity() > 9*nodes.size()) {
  4040. nodes.setCapacity(nodes.size());
  4041. }
  4042. // Steal ownership of the array from the UT_Array
  4043. myRoot.reset(nodes.array());
  4044. myNumNodes = nodes.size();
  4045. nodes.unsafeClearData();
  4046. }
  4047. template<uint N>
  4048. template<typename LOCAL_DATA,typename FUNCTORS>
  4049. inline void BVH<N>::traverse(
  4050. FUNCTORS &functors,
  4051. LOCAL_DATA* data_for_parent) const noexcept
  4052. {
  4053. if (!myRoot)
  4054. return;
  4055. // NOTE: The root is always index 0.
  4056. traverseHelper(0, INT_TYPE(-1), functors, data_for_parent);
  4057. }
  4058. template<uint N>
  4059. template<typename LOCAL_DATA,typename FUNCTORS>
  4060. inline void BVH<N>::traverseHelper(
  4061. INT_TYPE nodei,
  4062. INT_TYPE parent_nodei,
  4063. FUNCTORS &functors,
  4064. LOCAL_DATA* data_for_parent) const noexcept
  4065. {
  4066. const Node &node = myRoot[nodei];
  4067. bool descend = functors.pre(nodei, data_for_parent);
  4068. if (!descend)
  4069. return;
  4070. LOCAL_DATA local_data[N];
  4071. INT_TYPE s;
  4072. for (s = 0; s < N; ++s) {
  4073. const INT_TYPE node_int = node.child[s];
  4074. if (Node::isInternal(node_int)) {
  4075. if (node_int == Node::EMPTY) {
  4076. // NOTE: Anything after this will be empty too, so we can break.
  4077. break;
  4078. }
  4079. traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
  4080. }
  4081. else {
  4082. functors.item(node_int, nodei, local_data[s]);
  4083. }
  4084. }
  4085. // NOTE: s is now the number of non-empty entries in this node.
  4086. functors.post(nodei, parent_nodei, data_for_parent, s, local_data);
  4087. }
  4088. template<uint N>
  4089. template<typename LOCAL_DATA,typename FUNCTORS>
  4090. inline void BVH<N>::traverseParallel(
  4091. INT_TYPE parallel_threshold,
  4092. FUNCTORS& functors,
  4093. LOCAL_DATA* data_for_parent) const noexcept
  4094. {
  4095. if (!myRoot)
  4096. return;
  4097. // NOTE: The root is always index 0.
  4098. traverseParallelHelper(0, INT_TYPE(-1), parallel_threshold, myNumNodes, functors, data_for_parent);
  4099. }
  4100. template<uint N>
  4101. template<typename LOCAL_DATA,typename FUNCTORS>
  4102. inline void BVH<N>::traverseParallelHelper(
  4103. INT_TYPE nodei,
  4104. INT_TYPE parent_nodei,
  4105. INT_TYPE parallel_threshold,
  4106. INT_TYPE next_node_id,
  4107. FUNCTORS& functors,
  4108. LOCAL_DATA* data_for_parent) const noexcept
  4109. {
  4110. const Node &node = myRoot[nodei];
  4111. bool descend = functors.pre(nodei, data_for_parent);
  4112. if (!descend)
  4113. return;
  4114. // To determine the number of nodes in a child's subtree, we take the next
  4115. // node ID minus the current child's node ID.
  4116. INT_TYPE next_nodes[N];
  4117. INT_TYPE nnodes[N];
  4118. INT_TYPE nchildren = N;
  4119. INT_TYPE nparallel = 0;
  4120. // s is currently unsigned, so we check s < N for bounds check.
  4121. // The s >= 0 check is in case s ever becomes signed, and should be
  4122. // automatically removed by the compiler for unsigned s.
  4123. for (INT_TYPE s = N-1; (std::is_signed<INT_TYPE>::value ? (s >= 0) : (s < N)); --s) {
  4124. const INT_TYPE node_int = node.child[s];
  4125. if (node_int == Node::EMPTY) {
  4126. --nchildren;
  4127. continue;
  4128. }
  4129. next_nodes[s] = next_node_id;
  4130. if (Node::isInternal(node_int)) {
  4131. // NOTE: This depends on BVH<N>::initNode appending the child nodes
  4132. // in between their content, instead of all at once.
  4133. INT_TYPE child_node_id = Node::getInternalNum(node_int);
  4134. nnodes[s] = next_node_id - child_node_id;
  4135. next_node_id = child_node_id;
  4136. }
  4137. else {
  4138. nnodes[s] = 0;
  4139. }
  4140. nparallel += (nnodes[s] >= parallel_threshold);
  4141. }
  4142. LOCAL_DATA local_data[N];
  4143. if (nparallel >= 2) {
  4144. // Do any non-parallel ones first
  4145. if (nparallel < nchildren) {
  4146. for (INT_TYPE s = 0; s < N; ++s) {
  4147. if (nnodes[s] >= parallel_threshold) {
  4148. continue;
  4149. }
  4150. const INT_TYPE node_int = node.child[s];
  4151. if (Node::isInternal(node_int)) {
  4152. if (node_int == Node::EMPTY) {
  4153. // NOTE: Anything after this will be empty too, so we can break.
  4154. break;
  4155. }
  4156. traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
  4157. }
  4158. else {
  4159. functors.item(node_int, nodei, local_data[s]);
  4160. }
  4161. }
  4162. }
  4163. // Now do the parallel ones
  4164. igl::parallel_for(
  4165. nparallel,
  4166. [this,nodei,&node,&nnodes,&next_nodes,&parallel_threshold,&functors,&local_data](int taski)
  4167. {
  4168. INT_TYPE parallel_count = 0;
  4169. // NOTE: The check for s < N is just so that the compiler can
  4170. // (hopefully) figure out that it can fully unroll the loop.
  4171. INT_TYPE s;
  4172. for (s = 0; s < N; ++s) {
  4173. if (nnodes[s] < parallel_threshold) {
  4174. continue;
  4175. }
  4176. if (parallel_count == taski) {
  4177. break;
  4178. }
  4179. ++parallel_count;
  4180. }
  4181. const INT_TYPE node_int = node.child[s];
  4182. if (Node::isInternal(node_int)) {
  4183. UT_IGL_ASSERT_MSG_P(node_int != Node::EMPTY, "Empty entries should have been excluded above.");
  4184. traverseParallelHelper(Node::getInternalNum(node_int), nodei, parallel_threshold, next_nodes[s], functors, &local_data[s]);
  4185. }
  4186. else {
  4187. functors.item(node_int, nodei, local_data[s]);
  4188. }
  4189. });
  4190. }
  4191. else {
  4192. // All in serial
  4193. for (INT_TYPE s = 0; s < N; ++s) {
  4194. const INT_TYPE node_int = node.child[s];
  4195. if (Node::isInternal(node_int)) {
  4196. if (node_int == Node::EMPTY) {
  4197. // NOTE: Anything after this will be empty too, so we can break.
  4198. break;
  4199. }
  4200. traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
  4201. }
  4202. else {
  4203. functors.item(node_int, nodei, local_data[s]);
  4204. }
  4205. }
  4206. }
  4207. functors.post(nodei, parent_nodei, data_for_parent, nchildren, local_data);
  4208. }
  4209. template<uint N>
  4210. template<typename LOCAL_DATA,typename FUNCTORS>
  4211. inline void BVH<N>::traverseVector(
  4212. FUNCTORS &functors,
  4213. LOCAL_DATA* data_for_parent) const noexcept
  4214. {
  4215. if (!myRoot)
  4216. return;
  4217. // NOTE: The root is always index 0.
  4218. traverseVectorHelper(0, INT_TYPE(-1), functors, data_for_parent);
  4219. }
  4220. template<uint N>
  4221. template<typename LOCAL_DATA,typename FUNCTORS>
  4222. inline void BVH<N>::traverseVectorHelper(
  4223. INT_TYPE nodei,
  4224. INT_TYPE parent_nodei,
  4225. FUNCTORS &functors,
  4226. LOCAL_DATA* data_for_parent) const noexcept
  4227. {
  4228. const Node &node = myRoot[nodei];
  4229. INT_TYPE descend = functors.pre(nodei, data_for_parent);
  4230. if (!descend)
  4231. return;
  4232. LOCAL_DATA local_data[N];
  4233. INT_TYPE s;
  4234. for (s = 0; s < N; ++s) {
  4235. if ((descend>>s) & 1) {
  4236. const INT_TYPE node_int = node.child[s];
  4237. if (Node::isInternal(node_int)) {
  4238. if (node_int == Node::EMPTY) {
  4239. // NOTE: Anything after this will be empty too, so we can break.
  4240. descend &= (INT_TYPE(1)<<s)-1;
  4241. break;
  4242. }
  4243. traverseVectorHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
  4244. }
  4245. else {
  4246. functors.item(node_int, nodei, local_data[s]);
  4247. }
  4248. }
  4249. }
  4250. // NOTE: s is now the number of non-empty entries in this node.
  4251. functors.post(nodei, parent_nodei, data_for_parent, s, local_data, descend);
  4252. }
  4253. template<uint N>
  4254. template<typename SRC_INT_TYPE>
  4255. inline void BVH<N>::createTrivialIndices(SRC_INT_TYPE* indices, const INT_TYPE n) noexcept {
  4256. igl::parallel_for(n, [indices](INT_TYPE i) { indices[i] = i; }, 65536);
  4257. }
  4258. template<uint N>
  4259. template<typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  4260. inline void BVH<N>::computeFullBoundingBox(Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, const INT_TYPE nboxes, SRC_INT_TYPE* indices) noexcept {
  4261. if (!nboxes) {
  4262. axes_minmax.initBounds();
  4263. return;
  4264. }
  4265. INT_TYPE ntasks = 1;
  4266. if (nboxes >= 2*4096) {
  4267. INT_TYPE nprocessors = UT_Thread::getNumProcessors();
  4268. ntasks = (nprocessors > 1) ? SYSmin(4*nprocessors, nboxes/4096) : 1;
  4269. }
  4270. if (ntasks == 1) {
  4271. Box<T,NAXES> box;
  4272. if (indices) {
  4273. box.initBounds(boxes[indices[0]]);
  4274. for (INT_TYPE i = 1; i < nboxes; ++i) {
  4275. box.combine(boxes[indices[i]]);
  4276. }
  4277. }
  4278. else {
  4279. box.initBounds(boxes[0]);
  4280. for (INT_TYPE i = 1; i < nboxes; ++i) {
  4281. box.combine(boxes[i]);
  4282. }
  4283. }
  4284. axes_minmax = box;
  4285. }
  4286. else {
  4287. UT_SmallArray<Box<T,NAXES>> parallel_boxes;
  4288. Box<T,NAXES> box;
  4289. igl::parallel_for(
  4290. nboxes,
  4291. [&parallel_boxes](int n){parallel_boxes.setSize(n);},
  4292. [&parallel_boxes,indices,&boxes](int i, int t)
  4293. {
  4294. if(indices)
  4295. {
  4296. parallel_boxes[t].combine(boxes[indices[i]]);
  4297. }else
  4298. {
  4299. parallel_boxes[t].combine(boxes[i]);
  4300. }
  4301. },
  4302. [&parallel_boxes,&box](int t)
  4303. {
  4304. if(t == 0)
  4305. {
  4306. box = parallel_boxes[0];
  4307. }else
  4308. {
  4309. box.combine(parallel_boxes[t]);
  4310. }
  4311. });
  4312. axes_minmax = box;
  4313. }
  4314. }
  4315. template<uint N>
  4316. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  4317. inline void BVH<N>::initNode(UT_Array<Node>& nodes, Node &node, const Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, SRC_INT_TYPE* indices, const INT_TYPE nboxes) noexcept {
  4318. if (nboxes <= N) {
  4319. // Fits in one node
  4320. for (INT_TYPE i = 0; i < nboxes; ++i) {
  4321. node.child[i] = indices[i];
  4322. }
  4323. for (INT_TYPE i = nboxes; i < N; ++i) {
  4324. node.child[i] = Node::EMPTY;
  4325. }
  4326. return;
  4327. }
  4328. SRC_INT_TYPE* sub_indices[N+1];
  4329. Box<T,NAXES> sub_boxes[N];
  4330. if (N == 2) {
  4331. sub_indices[0] = indices;
  4332. sub_indices[2] = indices+nboxes;
  4333. split<H>(axes_minmax, boxes, indices, nboxes, sub_indices[1], &sub_boxes[0]);
  4334. }
  4335. else {
  4336. multiSplit<H>(axes_minmax, boxes, indices, nboxes, sub_indices, sub_boxes);
  4337. }
  4338. // Count the number of nodes to run in parallel and fill in single items in this node
  4339. INT_TYPE nparallel = 0;
  4340. static constexpr INT_TYPE PARALLEL_THRESHOLD = 1024;
  4341. for (INT_TYPE i = 0; i < N; ++i) {
  4342. INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
  4343. if (sub_nboxes == 1) {
  4344. node.child[i] = sub_indices[i][0];
  4345. }
  4346. else if (sub_nboxes >= PARALLEL_THRESHOLD) {
  4347. ++nparallel;
  4348. }
  4349. }
  4350. // NOTE: Child nodes of this node need to be placed just before the nodes in
  4351. // their corresponding subtree, in between the subtrees, because
  4352. // traverseParallel uses the difference between the child node IDs
  4353. // to determine the number of nodes in the subtree.
  4354. // Recurse
  4355. if (nparallel >= 2) {
  4356. UT_SmallArray<UT_Array<Node>> parallel_nodes;
  4357. UT_SmallArray<Node> parallel_parent_nodes;
  4358. parallel_nodes.setSize(nparallel);
  4359. parallel_parent_nodes.setSize(nparallel);
  4360. igl::parallel_for(
  4361. nparallel,
  4362. [&parallel_nodes,&parallel_parent_nodes,&sub_indices,boxes,&sub_boxes](int taski)
  4363. {
  4364. // First, find which child this is
  4365. INT_TYPE counted_parallel = 0;
  4366. INT_TYPE sub_nboxes;
  4367. INT_TYPE childi;
  4368. for (childi = 0; childi < N; ++childi) {
  4369. sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
  4370. if (sub_nboxes >= PARALLEL_THRESHOLD) {
  4371. if (counted_parallel == taski) {
  4372. break;
  4373. }
  4374. ++counted_parallel;
  4375. }
  4376. }
  4377. UT_IGL_ASSERT_P(counted_parallel == taski);
  4378. UT_Array<Node>& local_nodes = parallel_nodes[taski];
  4379. // Preallocate an overestimate of the number of nodes needed.
  4380. // At worst, we could have only 2 children in every leaf, and
  4381. // then above that, we have a geometric series with r=1/N and a=(sub_nboxes/2)/N
  4382. // The true worst case might be a little worst than this, but
  4383. // it's probably fairly unlikely.
  4384. local_nodes.setCapacity(nodeEstimate(sub_nboxes));
  4385. Node& parent_node = parallel_parent_nodes[taski];
  4386. // We'll have to fix the internal node numbers in parent_node and local_nodes later
  4387. initNode<H>(local_nodes, parent_node, sub_boxes[childi], boxes, sub_indices[childi], sub_nboxes);
  4388. });
  4389. INT_TYPE counted_parallel = 0;
  4390. for (INT_TYPE i = 0; i < N; ++i) {
  4391. INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
  4392. if (sub_nboxes != 1) {
  4393. INT_TYPE local_nodes_start = nodes.size();
  4394. node.child[i] = Node::markInternal(local_nodes_start);
  4395. if (sub_nboxes >= PARALLEL_THRESHOLD) {
  4396. // First, adjust the root child node
  4397. Node child_node = parallel_parent_nodes[counted_parallel];
  4398. ++local_nodes_start;
  4399. for (INT_TYPE childi = 0; childi < N; ++childi) {
  4400. INT_TYPE child_child = child_node.child[childi];
  4401. if (Node::isInternal(child_child) && child_child != Node::EMPTY) {
  4402. child_child += local_nodes_start;
  4403. child_node.child[childi] = child_child;
  4404. }
  4405. }
  4406. // Make space in the array for the sub-child nodes
  4407. const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
  4408. ++counted_parallel;
  4409. INT_TYPE n = local_nodes.size();
  4410. nodes.bumpCapacity(local_nodes_start + n);
  4411. nodes.setSizeNoInit(local_nodes_start + n);
  4412. nodes[local_nodes_start-1] = child_node;
  4413. }
  4414. else {
  4415. nodes.bumpCapacity(local_nodes_start + 1);
  4416. nodes.setSizeNoInit(local_nodes_start + 1);
  4417. initNode<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes);
  4418. }
  4419. }
  4420. }
  4421. // Now, adjust and copy all sub-child nodes that were made in parallel
  4422. adjustParallelChildNodes<PARALLEL_THRESHOLD>(nparallel, nodes, node, parallel_nodes.array(), sub_indices);
  4423. }
  4424. else {
  4425. for (INT_TYPE i = 0; i < N; ++i) {
  4426. INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
  4427. if (sub_nboxes != 1) {
  4428. INT_TYPE local_nodes_start = nodes.size();
  4429. node.child[i] = Node::markInternal(local_nodes_start);
  4430. nodes.bumpCapacity(local_nodes_start + 1);
  4431. nodes.setSizeNoInit(local_nodes_start + 1);
  4432. initNode<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes);
  4433. }
  4434. }
  4435. }
  4436. }
  4437. template<uint N>
  4438. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  4439. inline void BVH<N>::initNodeReorder(UT_Array<Node>& nodes, Node &node, const Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, const INT_TYPE indices_offset, const INT_TYPE max_items_per_leaf) noexcept {
  4440. if (nboxes <= N) {
  4441. // Fits in one node
  4442. for (INT_TYPE i = 0; i < nboxes; ++i) {
  4443. node.child[i] = indices_offset+i;
  4444. }
  4445. for (INT_TYPE i = nboxes; i < N; ++i) {
  4446. node.child[i] = Node::EMPTY;
  4447. }
  4448. return;
  4449. }
  4450. SRC_INT_TYPE* sub_indices[N+1];
  4451. Box<T,NAXES> sub_boxes[N];
  4452. if (N == 2) {
  4453. sub_indices[0] = indices;
  4454. sub_indices[2] = indices+nboxes;
  4455. split<H>(axes_minmax, boxes, indices, nboxes, sub_indices[1], &sub_boxes[0]);
  4456. }
  4457. else {
  4458. multiSplit<H>(axes_minmax, boxes, indices, nboxes, sub_indices, sub_boxes);
  4459. }
  4460. // Move any children with max_items_per_leaf or fewer indices before any children with more,
  4461. // for better cache coherence when we're accessing data in a corresponding array.
  4462. INT_TYPE nleaves = 0;
  4463. UT_SmallArray<SRC_INT_TYPE> leaf_indices;
  4464. SRC_INT_TYPE leaf_sizes[N];
  4465. INT_TYPE sub_nboxes0 = sub_indices[1]-sub_indices[0];
  4466. if (sub_nboxes0 <= max_items_per_leaf) {
  4467. leaf_sizes[0] = sub_nboxes0;
  4468. for (int j = 0; j < sub_nboxes0; ++j)
  4469. leaf_indices.append(sub_indices[0][j]);
  4470. ++nleaves;
  4471. }
  4472. INT_TYPE sub_nboxes1 = sub_indices[2]-sub_indices[1];
  4473. if (sub_nboxes1 <= max_items_per_leaf) {
  4474. leaf_sizes[nleaves] = sub_nboxes1;
  4475. for (int j = 0; j < sub_nboxes1; ++j)
  4476. leaf_indices.append(sub_indices[1][j]);
  4477. ++nleaves;
  4478. }
  4479. for (INT_TYPE i = 2; i < N; ++i) {
  4480. INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
  4481. if (sub_nboxes <= max_items_per_leaf) {
  4482. leaf_sizes[nleaves] = sub_nboxes;
  4483. for (int j = 0; j < sub_nboxes; ++j)
  4484. leaf_indices.append(sub_indices[i][j]);
  4485. ++nleaves;
  4486. }
  4487. }
  4488. if (nleaves > 0) {
  4489. // NOTE: i < N condition is because INT_TYPE is unsigned.
  4490. // i >= 0 condition is in case INT_TYPE is changed to signed.
  4491. INT_TYPE move_distance = 0;
  4492. INT_TYPE index_move_distance = 0;
  4493. for (INT_TYPE i = N-1; (std::is_signed<INT_TYPE>::value ? (i >= 0) : (i < N)); --i) {
  4494. INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
  4495. if (sub_nboxes <= max_items_per_leaf) {
  4496. ++move_distance;
  4497. index_move_distance += sub_nboxes;
  4498. }
  4499. else if (move_distance > 0) {
  4500. SRC_INT_TYPE *start_src_index = sub_indices[i];
  4501. for (SRC_INT_TYPE *src_index = sub_indices[i+1]-1; src_index >= start_src_index; --src_index) {
  4502. src_index[index_move_distance] = src_index[0];
  4503. }
  4504. sub_indices[i+move_distance] = sub_indices[i]+index_move_distance;
  4505. }
  4506. }
  4507. index_move_distance = 0;
  4508. for (INT_TYPE i = 0; i < nleaves; ++i) {
  4509. INT_TYPE sub_nboxes = leaf_sizes[i];
  4510. sub_indices[i] = indices+index_move_distance;
  4511. for (int j = 0; j < sub_nboxes; ++j)
  4512. indices[index_move_distance+j] = leaf_indices[index_move_distance+j];
  4513. index_move_distance += sub_nboxes;
  4514. }
  4515. }
  4516. // Count the number of nodes to run in parallel and fill in single items in this node
  4517. INT_TYPE nparallel = 0;
  4518. static constexpr INT_TYPE PARALLEL_THRESHOLD = 1024;
  4519. for (INT_TYPE i = 0; i < N; ++i) {
  4520. INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
  4521. if (sub_nboxes <= max_items_per_leaf) {
  4522. node.child[i] = indices_offset+(sub_indices[i]-sub_indices[0]);
  4523. }
  4524. else if (sub_nboxes >= PARALLEL_THRESHOLD) {
  4525. ++nparallel;
  4526. }
  4527. }
  4528. // NOTE: Child nodes of this node need to be placed just before the nodes in
  4529. // their corresponding subtree, in between the subtrees, because
  4530. // traverseParallel uses the difference between the child node IDs
  4531. // to determine the number of nodes in the subtree.
  4532. // Recurse
  4533. if (nparallel >= 2 && false) {
  4534. assert(false && "Not implemented; should never get here");
  4535. exit(1);
  4536. // // Do the parallel ones first, so that they can be inserted in the right place.
  4537. // // Although the choice may seem somewhat arbitrary, we need the results to be
  4538. // // identical whether we choose to parallelize or not, and in case we change the
  4539. // // threshold later.
  4540. // UT_SmallArray<UT_Array<Node>,4*sizeof(UT_Array<Node>)> parallel_nodes;
  4541. // parallel_nodes.setSize(nparallel);
  4542. // UT_SmallArray<Node,4*sizeof(Node)> parallel_parent_nodes;
  4543. // parallel_parent_nodes.setSize(nparallel);
  4544. // UTparallelFor(UT_BlockedRange<INT_TYPE>(0,nparallel), [&parallel_nodes,&parallel_parent_nodes,&sub_indices,boxes,&sub_boxes,indices_offset,max_items_per_leaf](const UT_BlockedRange<INT_TYPE>& r) {
  4545. // for (INT_TYPE taski = r.begin(), end = r.end(); taski < end; ++taski) {
  4546. // // First, find which child this is
  4547. // INT_TYPE counted_parallel = 0;
  4548. // INT_TYPE sub_nboxes;
  4549. // INT_TYPE childi;
  4550. // for (childi = 0; childi < N; ++childi) {
  4551. // sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
  4552. // if (sub_nboxes >= PARALLEL_THRESHOLD) {
  4553. // if (counted_parallel == taski) {
  4554. // break;
  4555. // }
  4556. // ++counted_parallel;
  4557. // }
  4558. // }
  4559. // UT_IGL_ASSERT_P(counted_parallel == taski);
  4560. // UT_Array<Node>& local_nodes = parallel_nodes[taski];
  4561. // // Preallocate an overestimate of the number of nodes needed.
  4562. // // At worst, we could have only 2 children in every leaf, and
  4563. // // then above that, we have a geometric series with r=1/N and a=(sub_nboxes/2)/N
  4564. // // The true worst case might be a little worst than this, but
  4565. // // it's probably fairly unlikely.
  4566. // local_nodes.setCapacity(nodeEstimate(sub_nboxes));
  4567. // Node& parent_node = parallel_parent_nodes[taski];
  4568. // // We'll have to fix the internal node numbers in parent_node and local_nodes later
  4569. // initNodeReorder<H>(local_nodes, parent_node, sub_boxes[childi], boxes, sub_indices[childi], sub_nboxes,
  4570. // indices_offset+(sub_indices[childi]-sub_indices[0]), max_items_per_leaf);
  4571. // }
  4572. // }, 0, 1);
  4573. // INT_TYPE counted_parallel = 0;
  4574. // for (INT_TYPE i = 0; i < N; ++i) {
  4575. // INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
  4576. // if (sub_nboxes > max_items_per_leaf) {
  4577. // INT_TYPE local_nodes_start = nodes.size();
  4578. // node.child[i] = Node::markInternal(local_nodes_start);
  4579. // if (sub_nboxes >= PARALLEL_THRESHOLD) {
  4580. // // First, adjust the root child node
  4581. // Node child_node = parallel_parent_nodes[counted_parallel];
  4582. // ++local_nodes_start;
  4583. // for (INT_TYPE childi = 0; childi < N; ++childi) {
  4584. // INT_TYPE child_child = child_node.child[childi];
  4585. // if (Node::isInternal(child_child) && child_child != Node::EMPTY) {
  4586. // child_child += local_nodes_start;
  4587. // child_node.child[childi] = child_child;
  4588. // }
  4589. // }
  4590. // // Make space in the array for the sub-child nodes
  4591. // const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
  4592. // ++counted_parallel;
  4593. // INT_TYPE n = local_nodes.size();
  4594. // nodes.bumpCapacity(local_nodes_start + n);
  4595. // nodes.setSizeNoInit(local_nodes_start + n);
  4596. // nodes[local_nodes_start-1] = child_node;
  4597. // }
  4598. // else {
  4599. // nodes.bumpCapacity(local_nodes_start + 1);
  4600. // nodes.setSizeNoInit(local_nodes_start + 1);
  4601. // initNodeReorder<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes,
  4602. // indices_offset+(sub_indices[i]-sub_indices[0]), max_items_per_leaf);
  4603. // }
  4604. // }
  4605. // }
  4606. // // Now, adjust and copy all sub-child nodes that were made in parallel
  4607. // adjustParallelChildNodes<PARALLEL_THRESHOLD>(nparallel, nodes, node, parallel_nodes.array(), sub_indices);
  4608. }
  4609. else {
  4610. for (INT_TYPE i = 0; i < N; ++i) {
  4611. INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
  4612. if (sub_nboxes > max_items_per_leaf) {
  4613. INT_TYPE local_nodes_start = nodes.size();
  4614. node.child[i] = Node::markInternal(local_nodes_start);
  4615. nodes.bumpCapacity(local_nodes_start + 1);
  4616. nodes.setSizeNoInit(local_nodes_start + 1);
  4617. initNodeReorder<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes,
  4618. indices_offset+(sub_indices[i]-sub_indices[0]), max_items_per_leaf);
  4619. }
  4620. }
  4621. }
  4622. }
  4623. template<uint N>
  4624. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  4625. inline void BVH<N>::multiSplit(const Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE* sub_indices[N+1], Box<T,NAXES> sub_boxes[N]) noexcept {
  4626. sub_indices[0] = indices;
  4627. sub_indices[2] = indices+nboxes;
  4628. split<H>(axes_minmax, boxes, indices, nboxes, sub_indices[1], &sub_boxes[0]);
  4629. if (N == 2) {
  4630. return;
  4631. }
  4632. if (H == BVH_Heuristic::MEDIAN_MAX_AXIS) {
  4633. SRC_INT_TYPE* sub_indices_startend[2*N];
  4634. Box<T,NAXES> sub_boxes_unsorted[N];
  4635. sub_boxes_unsorted[0] = sub_boxes[0];
  4636. sub_boxes_unsorted[1] = sub_boxes[1];
  4637. sub_indices_startend[0] = sub_indices[0];
  4638. sub_indices_startend[1] = sub_indices[1];
  4639. sub_indices_startend[2] = sub_indices[1];
  4640. sub_indices_startend[3] = sub_indices[2];
  4641. for (INT_TYPE nsub = 2; nsub < N; ++nsub) {
  4642. SRC_INT_TYPE* selected_start = sub_indices_startend[0];
  4643. SRC_INT_TYPE* selected_end = sub_indices_startend[1];
  4644. Box<T,NAXES> sub_box = sub_boxes_unsorted[0];
  4645. // Shift results back.
  4646. for (INT_TYPE i = 0; i < nsub-1; ++i) {
  4647. sub_indices_startend[2*i ] = sub_indices_startend[2*i+2];
  4648. sub_indices_startend[2*i+1] = sub_indices_startend[2*i+3];
  4649. }
  4650. for (INT_TYPE i = 0; i < nsub-1; ++i) {
  4651. sub_boxes_unsorted[i] = sub_boxes_unsorted[i-1];
  4652. }
  4653. // Do the split
  4654. split<H>(sub_box, boxes, selected_start, selected_end-selected_start, sub_indices_startend[2*nsub-1], &sub_boxes_unsorted[nsub]);
  4655. sub_indices_startend[2*nsub-2] = selected_start;
  4656. sub_indices_startend[2*nsub] = sub_indices_startend[2*nsub-1];
  4657. sub_indices_startend[2*nsub+1] = selected_end;
  4658. // Sort pointers so that they're in the correct order
  4659. sub_indices[N] = indices+nboxes;
  4660. for (INT_TYPE i = 0; i < N; ++i) {
  4661. SRC_INT_TYPE* prev_pointer = (i != 0) ? sub_indices[i-1] : nullptr;
  4662. SRC_INT_TYPE* min_pointer = nullptr;
  4663. Box<T,NAXES> box;
  4664. for (INT_TYPE j = 0; j < N; ++j) {
  4665. SRC_INT_TYPE* cur_pointer = sub_indices_startend[2*j];
  4666. if ((cur_pointer > prev_pointer) && (!min_pointer || (cur_pointer < min_pointer))) {
  4667. min_pointer = cur_pointer;
  4668. box = sub_boxes_unsorted[j];
  4669. }
  4670. }
  4671. UT_IGL_ASSERT_P(min_pointer);
  4672. sub_indices[i] = min_pointer;
  4673. sub_boxes[i] = box;
  4674. }
  4675. }
  4676. }
  4677. else {
  4678. T sub_box_areas[N];
  4679. sub_box_areas[0] = unweightedHeuristic<H>(sub_boxes[0]);
  4680. sub_box_areas[1] = unweightedHeuristic<H>(sub_boxes[1]);
  4681. for (INT_TYPE nsub = 2; nsub < N; ++nsub) {
  4682. // Choose which one to split
  4683. INT_TYPE split_choice = INT_TYPE(-1);
  4684. T max_heuristic;
  4685. for (INT_TYPE i = 0; i < nsub; ++i) {
  4686. const INT_TYPE index_count = (sub_indices[i+1]-sub_indices[i]);
  4687. if (index_count > 1) {
  4688. const T heuristic = sub_box_areas[i]*index_count;
  4689. if (split_choice == INT_TYPE(-1) || heuristic > max_heuristic) {
  4690. split_choice = i;
  4691. max_heuristic = heuristic;
  4692. }
  4693. }
  4694. }
  4695. UT_IGL_ASSERT_MSG_P(split_choice != INT_TYPE(-1), "There should always be at least one that can be split!");
  4696. SRC_INT_TYPE* selected_start = sub_indices[split_choice];
  4697. SRC_INT_TYPE* selected_end = sub_indices[split_choice+1];
  4698. // Shift results over; we can skip the one we selected.
  4699. for (INT_TYPE i = nsub; i > split_choice; --i) {
  4700. sub_indices[i+1] = sub_indices[i];
  4701. }
  4702. for (INT_TYPE i = nsub-1; i > split_choice; --i) {
  4703. sub_boxes[i+1] = sub_boxes[i];
  4704. }
  4705. for (INT_TYPE i = nsub-1; i > split_choice; --i) {
  4706. sub_box_areas[i+1] = sub_box_areas[i];
  4707. }
  4708. // Do the split
  4709. split<H>(sub_boxes[split_choice], boxes, selected_start, selected_end-selected_start, sub_indices[split_choice+1], &sub_boxes[split_choice]);
  4710. sub_box_areas[split_choice] = unweightedHeuristic<H>(sub_boxes[split_choice]);
  4711. sub_box_areas[split_choice+1] = unweightedHeuristic<H>(sub_boxes[split_choice+1]);
  4712. }
  4713. }
  4714. }
  4715. template<uint N>
  4716. template<BVH_Heuristic H,typename T,uint NAXES,typename BOX_TYPE,typename SRC_INT_TYPE>
  4717. inline void BVH<N>::split(const Box<T,NAXES>& axes_minmax, const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE*& split_indices, Box<T,NAXES>* split_boxes) noexcept {
  4718. if (nboxes == 2) {
  4719. split_boxes[0].initBounds(boxes[indices[0]]);
  4720. split_boxes[1].initBounds(boxes[indices[1]]);
  4721. split_indices = indices+1;
  4722. return;
  4723. }
  4724. UT_IGL_ASSERT_MSG_P(nboxes > 2, "Cases with less than 3 boxes should have already been handled!");
  4725. if (H == BVH_Heuristic::MEDIAN_MAX_AXIS) {
  4726. UT_IGL_ASSERT_MSG(0, "FIXME: Implement this!!!");
  4727. }
  4728. constexpr INT_TYPE SMALL_LIMIT = 6;
  4729. if (nboxes <= SMALL_LIMIT) {
  4730. // Special case for a small number of boxes: check all (2^(n-1))-1 partitions.
  4731. // Without loss of generality, we assume that box 0 is in partition 0,
  4732. // and that not all boxes are in partition 0.
  4733. Box<T,NAXES> local_boxes[SMALL_LIMIT];
  4734. for (INT_TYPE box = 0; box < nboxes; ++box) {
  4735. local_boxes[box].initBounds(boxes[indices[box]]);
  4736. //printf("Box %u: (%f-%f)x(%f-%f)x(%f-%f)\n", uint(box), local_boxes[box].vals[0][0], local_boxes[box].vals[0][1], local_boxes[box].vals[1][0], local_boxes[box].vals[1][1], local_boxes[box].vals[2][0], local_boxes[box].vals[2][1]);
  4737. }
  4738. const INT_TYPE partition_limit = (INT_TYPE(1)<<(nboxes-1));
  4739. INT_TYPE best_partition = INT_TYPE(-1);
  4740. T best_heuristic;
  4741. for (INT_TYPE partition_bits = 1; partition_bits < partition_limit; ++partition_bits) {
  4742. Box<T,NAXES> sub_boxes[2];
  4743. sub_boxes[0] = local_boxes[0];
  4744. sub_boxes[1].initBounds();
  4745. INT_TYPE sub_counts[2] = {1,0};
  4746. for (INT_TYPE bit = 0; bit < nboxes-1; ++bit) {
  4747. INT_TYPE dest = (partition_bits>>bit)&1;
  4748. sub_boxes[dest].combine(local_boxes[bit+1]);
  4749. ++sub_counts[dest];
  4750. }
  4751. //printf("Partition bits %u: sub_box[0]: (%f-%f)x(%f-%f)x(%f-%f)\n", uint(partition_bits), sub_boxes[0].vals[0][0], sub_boxes[0].vals[0][1], sub_boxes[0].vals[1][0], sub_boxes[0].vals[1][1], sub_boxes[0].vals[2][0], sub_boxes[0].vals[2][1]);
  4752. //printf("Partition bits %u: sub_box[1]: (%f-%f)x(%f-%f)x(%f-%f)\n", uint(partition_bits), sub_boxes[1].vals[0][0], sub_boxes[1].vals[0][1], sub_boxes[1].vals[1][0], sub_boxes[1].vals[1][1], sub_boxes[1].vals[2][0], sub_boxes[1].vals[2][1]);
  4753. const T heuristic =
  4754. unweightedHeuristic<H>(sub_boxes[0])*sub_counts[0] +
  4755. unweightedHeuristic<H>(sub_boxes[1])*sub_counts[1];
  4756. //printf("Partition bits %u: heuristic = %f (= %f*%u + %f*%u)\n",uint(partition_bits),heuristic, unweightedHeuristic<H>(sub_boxes[0]), uint(sub_counts[0]), unweightedHeuristic<H>(sub_boxes[1]), uint(sub_counts[1]));
  4757. if (best_partition == INT_TYPE(-1) || heuristic < best_heuristic) {
  4758. //printf(" New best\n");
  4759. best_partition = partition_bits;
  4760. best_heuristic = heuristic;
  4761. split_boxes[0] = sub_boxes[0];
  4762. split_boxes[1] = sub_boxes[1];
  4763. }
  4764. }
  4765. #if 0 // This isn't actually necessary with the current design, because I changed how the number of subtree nodes is determined.
  4766. // If best_partition is partition_limit-1, there's only 1 box
  4767. // in partition 0. We should instead put this in partition 1,
  4768. // so that we can help always have the internal node indices first
  4769. // in each node. That gets used to (fairly) quickly determine
  4770. // the number of nodes in a sub-tree.
  4771. if (best_partition == partition_limit - 1) {
  4772. // Put the first index last.
  4773. SRC_INT_TYPE last_index = indices[0];
  4774. SRC_INT_TYPE* dest_indices = indices;
  4775. SRC_INT_TYPE* local_split_indices = indices + nboxes-1;
  4776. for (; dest_indices != local_split_indices; ++dest_indices) {
  4777. dest_indices[0] = dest_indices[1];
  4778. }
  4779. *local_split_indices = last_index;
  4780. split_indices = local_split_indices;
  4781. // Swap the boxes
  4782. const Box<T,NAXES> temp_box = sub_boxes[0];
  4783. sub_boxes[0] = sub_boxes[1];
  4784. sub_boxes[1] = temp_box;
  4785. return;
  4786. }
  4787. #endif
  4788. // Reorder the indices.
  4789. // NOTE: Index 0 is always in partition 0, so can stay put.
  4790. SRC_INT_TYPE local_indices[SMALL_LIMIT-1];
  4791. for (INT_TYPE box = 0; box < nboxes-1; ++box) {
  4792. local_indices[box] = indices[box+1];
  4793. }
  4794. SRC_INT_TYPE* dest_indices = indices+1;
  4795. SRC_INT_TYPE* src_indices = local_indices;
  4796. // Copy partition 0
  4797. for (INT_TYPE bit = 0; bit < nboxes-1; ++bit, ++src_indices) {
  4798. if (!((best_partition>>bit)&1)) {
  4799. //printf("Copying %u into partition 0\n",uint(*src_indices));
  4800. *dest_indices = *src_indices;
  4801. ++dest_indices;
  4802. }
  4803. }
  4804. split_indices = dest_indices;
  4805. // Copy partition 1
  4806. src_indices = local_indices;
  4807. for (INT_TYPE bit = 0; bit < nboxes-1; ++bit, ++src_indices) {
  4808. if ((best_partition>>bit)&1) {
  4809. //printf("Copying %u into partition 1\n",uint(*src_indices));
  4810. *dest_indices = *src_indices;
  4811. ++dest_indices;
  4812. }
  4813. }
  4814. return;
  4815. }
  4816. uint max_axis = 0;
  4817. T max_axis_length = axes_minmax.vals[0][1] - axes_minmax.vals[0][0];
  4818. for (uint axis = 1; axis < NAXES; ++axis) {
  4819. const T axis_length = axes_minmax.vals[axis][1] - axes_minmax.vals[axis][0];
  4820. if (axis_length > max_axis_length) {
  4821. max_axis = axis;
  4822. max_axis_length = axis_length;
  4823. }
  4824. }
  4825. if (!(max_axis_length > T(0))) {
  4826. // All boxes are a single point or NaN.
  4827. // Pick an arbitrary split point.
  4828. split_indices = indices + nboxes/2;
  4829. split_boxes[0] = axes_minmax;
  4830. split_boxes[1] = axes_minmax;
  4831. return;
  4832. }
  4833. const INT_TYPE axis = max_axis;
  4834. constexpr INT_TYPE MID_LIMIT = 2*NSPANS;
  4835. if (nboxes <= MID_LIMIT) {
  4836. // Sort along axis, and try all possible splits.
  4837. #if 1
  4838. // First, compute midpoints
  4839. T midpointsx2[MID_LIMIT];
  4840. for (INT_TYPE i = 0; i < nboxes; ++i) {
  4841. midpointsx2[i] = utBoxCenter(boxes[indices[i]], axis);
  4842. }
  4843. SRC_INT_TYPE local_indices[MID_LIMIT];
  4844. for (INT_TYPE i = 0; i < nboxes; ++i) {
  4845. local_indices[i] = i;
  4846. }
  4847. const INT_TYPE chunk_starts[5] = {0, nboxes/4, nboxes/2, INT_TYPE((3*uint64(nboxes))/4), nboxes};
  4848. // For sorting, insertion sort 4 chunks and merge them
  4849. for (INT_TYPE chunk = 0; chunk < 4; ++chunk) {
  4850. const INT_TYPE start = chunk_starts[chunk];
  4851. const INT_TYPE end = chunk_starts[chunk+1];
  4852. for (INT_TYPE i = start+1; i < end; ++i) {
  4853. SRC_INT_TYPE indexi = local_indices[i];
  4854. T vi = midpointsx2[indexi];
  4855. for (INT_TYPE j = start; j < i; ++j) {
  4856. SRC_INT_TYPE indexj = local_indices[j];
  4857. T vj = midpointsx2[indexj];
  4858. if (vi < vj) {
  4859. do {
  4860. local_indices[j] = indexi;
  4861. indexi = indexj;
  4862. ++j;
  4863. if (j == i) {
  4864. local_indices[j] = indexi;
  4865. break;
  4866. }
  4867. indexj = local_indices[j];
  4868. } while (true);
  4869. break;
  4870. }
  4871. }
  4872. }
  4873. }
  4874. // Merge chunks into another buffer
  4875. SRC_INT_TYPE local_indices_temp[MID_LIMIT];
  4876. std::merge(local_indices, local_indices+chunk_starts[1],
  4877. local_indices+chunk_starts[1], local_indices+chunk_starts[2],
  4878. local_indices_temp, [&midpointsx2](const SRC_INT_TYPE a, const SRC_INT_TYPE b)->bool {
  4879. return midpointsx2[a] < midpointsx2[b];
  4880. });
  4881. std::merge(local_indices+chunk_starts[2], local_indices+chunk_starts[3],
  4882. local_indices+chunk_starts[3], local_indices+chunk_starts[4],
  4883. local_indices_temp+chunk_starts[2], [&midpointsx2](const SRC_INT_TYPE a, const SRC_INT_TYPE b)->bool {
  4884. return midpointsx2[a] < midpointsx2[b];
  4885. });
  4886. std::merge(local_indices_temp, local_indices_temp+chunk_starts[2],
  4887. local_indices_temp+chunk_starts[2], local_indices_temp+chunk_starts[4],
  4888. local_indices, [&midpointsx2](const SRC_INT_TYPE a, const SRC_INT_TYPE b)->bool {
  4889. return midpointsx2[a] < midpointsx2[b];
  4890. });
  4891. // Translate local_indices into indices
  4892. for (INT_TYPE i = 0; i < nboxes; ++i) {
  4893. local_indices[i] = indices[local_indices[i]];
  4894. }
  4895. // Copy back
  4896. for (INT_TYPE i = 0; i < nboxes; ++i) {
  4897. indices[i] = local_indices[i];
  4898. }
  4899. #else
  4900. std::stable_sort(indices, indices+nboxes, [boxes,max_axis](SRC_INT_TYPE a, SRC_INT_TYPE b)->bool {
  4901. return utBoxCenter(boxes[a], max_axis) < utBoxCenter(boxes[b], max_axis);
  4902. });
  4903. #endif
  4904. // Accumulate boxes
  4905. Box<T,NAXES> left_boxes[MID_LIMIT-1];
  4906. Box<T,NAXES> right_boxes[MID_LIMIT-1];
  4907. const INT_TYPE nsplits = nboxes-1;
  4908. Box<T,NAXES> box_accumulator(boxes[local_indices[0]]);
  4909. left_boxes[0] = box_accumulator;
  4910. for (INT_TYPE i = 1; i < nsplits; ++i) {
  4911. box_accumulator.combine(boxes[local_indices[i]]);
  4912. left_boxes[i] = box_accumulator;
  4913. }
  4914. box_accumulator.initBounds(boxes[local_indices[nsplits-1]]);
  4915. right_boxes[nsplits-1] = box_accumulator;
  4916. for (INT_TYPE i = nsplits-1; i > 0; --i) {
  4917. box_accumulator.combine(boxes[local_indices[i]]);
  4918. right_boxes[i-1] = box_accumulator;
  4919. }
  4920. INT_TYPE best_split = 0;
  4921. T best_local_heuristic =
  4922. unweightedHeuristic<H>(left_boxes[0]) +
  4923. unweightedHeuristic<H>(right_boxes[0])*(nboxes-1);
  4924. for (INT_TYPE split = 1; split < nsplits; ++split) {
  4925. const T heuristic =
  4926. unweightedHeuristic<H>(left_boxes[split])*(split+1) +
  4927. unweightedHeuristic<H>(right_boxes[split])*(nboxes-(split+1));
  4928. if (heuristic < best_local_heuristic) {
  4929. best_split = split;
  4930. best_local_heuristic = heuristic;
  4931. }
  4932. }
  4933. split_indices = indices+best_split+1;
  4934. split_boxes[0] = left_boxes[best_split];
  4935. split_boxes[1] = right_boxes[best_split];
  4936. return;
  4937. }
  4938. const T axis_min = axes_minmax.vals[max_axis][0];
  4939. const T axis_length = max_axis_length;
  4940. Box<T,NAXES> span_boxes[NSPANS];
  4941. for (INT_TYPE i = 0; i < NSPANS; ++i) {
  4942. span_boxes[i].initBounds();
  4943. }
  4944. INT_TYPE span_counts[NSPANS];
  4945. for (INT_TYPE i = 0; i < NSPANS; ++i) {
  4946. span_counts[i] = 0;
  4947. }
  4948. const T axis_min_x2 = ut_BoxCentre<BOX_TYPE>::scale*axis_min;
  4949. // NOTE: Factor of 0.5 is factored out of the average when using the average value to determine the span that a box lies in.
  4950. const T axis_index_scale = (T(1.0/ut_BoxCentre<BOX_TYPE>::scale)*NSPANS)/axis_length;
  4951. constexpr INT_TYPE BOX_SPANS_PARALLEL_THRESHOLD = 2048;
  4952. INT_TYPE ntasks = 1;
  4953. if (nboxes >= BOX_SPANS_PARALLEL_THRESHOLD) {
  4954. INT_TYPE nprocessors = UT_Thread::getNumProcessors();
  4955. ntasks = (nprocessors > 1) ? SYSmin(4*nprocessors, nboxes/(BOX_SPANS_PARALLEL_THRESHOLD/2)) : 1;
  4956. }
  4957. if (ntasks == 1) {
  4958. for (INT_TYPE indexi = 0; indexi < nboxes; ++indexi) {
  4959. const auto& box = boxes[indices[indexi]];
  4960. const T sum = utBoxCenter(box, axis);
  4961. const uint span_index = SYSclamp(int((sum-axis_min_x2)*axis_index_scale), int(0), int(NSPANS-1));
  4962. ++span_counts[span_index];
  4963. Box<T,NAXES>& span_box = span_boxes[span_index];
  4964. span_box.combine(box);
  4965. }
  4966. }
  4967. else {
  4968. UT_SmallArray<Box<T,NAXES>> parallel_boxes;
  4969. UT_SmallArray<INT_TYPE> parallel_counts;
  4970. igl::parallel_for(
  4971. nboxes,
  4972. [&parallel_boxes,&parallel_counts](int n)
  4973. {
  4974. parallel_boxes.setSize( NSPANS*n);
  4975. parallel_counts.setSize(NSPANS*n);
  4976. for(int t = 0;t<n;t++)
  4977. {
  4978. for (INT_TYPE i = 0; i < NSPANS; ++i)
  4979. {
  4980. parallel_boxes[t*NSPANS+i].initBounds();
  4981. parallel_counts[t*NSPANS+i] = 0;
  4982. }
  4983. }
  4984. },
  4985. [&parallel_boxes,&parallel_counts,&boxes,indices,axis,axis_min_x2,axis_index_scale](int j, int t)
  4986. {
  4987. const auto& box = boxes[indices[j]];
  4988. const T sum = utBoxCenter(box, axis);
  4989. const uint span_index = SYSclamp(int((sum-axis_min_x2)*axis_index_scale), int(0), int(NSPANS-1));
  4990. ++parallel_counts[t*NSPANS+span_index];
  4991. Box<T,NAXES>& span_box = parallel_boxes[t*NSPANS+span_index];
  4992. span_box.combine(box);
  4993. },
  4994. [&parallel_boxes,&parallel_counts,&span_boxes,&span_counts](int t)
  4995. {
  4996. for(int i = 0;i<NSPANS;i++)
  4997. {
  4998. span_counts[i] += parallel_counts[t*NSPANS + i];
  4999. span_boxes[i].combine(parallel_boxes[t*NSPANS + i]);
  5000. }
  5001. });
  5002. }
  5003. // Spans 0 to NSPANS-2
  5004. Box<T,NAXES> left_boxes[NSPLITS];
  5005. // Spans 1 to NSPANS-1
  5006. Box<T,NAXES> right_boxes[NSPLITS];
  5007. // Accumulate boxes
  5008. Box<T,NAXES> box_accumulator = span_boxes[0];
  5009. left_boxes[0] = box_accumulator;
  5010. for (INT_TYPE i = 1; i < NSPLITS; ++i) {
  5011. box_accumulator.combine(span_boxes[i]);
  5012. left_boxes[i] = box_accumulator;
  5013. }
  5014. box_accumulator = span_boxes[NSPANS-1];
  5015. right_boxes[NSPLITS-1] = box_accumulator;
  5016. for (INT_TYPE i = NSPLITS-1; i > 0; --i) {
  5017. box_accumulator.combine(span_boxes[i]);
  5018. right_boxes[i-1] = box_accumulator;
  5019. }
  5020. INT_TYPE left_counts[NSPLITS];
  5021. // Accumulate counts
  5022. INT_TYPE count_accumulator = span_counts[0];
  5023. left_counts[0] = count_accumulator;
  5024. for (INT_TYPE spliti = 1; spliti < NSPLITS; ++spliti) {
  5025. count_accumulator += span_counts[spliti];
  5026. left_counts[spliti] = count_accumulator;
  5027. }
  5028. // Check which split is optimal, making sure that at least 1/MIN_FRACTION of all boxes are on each side.
  5029. const INT_TYPE min_count = nboxes/MIN_FRACTION;
  5030. UT_IGL_ASSERT_MSG_P(min_count > 0, "MID_LIMIT above should have been large enough that nboxes would be > MIN_FRACTION");
  5031. const INT_TYPE max_count = ((MIN_FRACTION-1)*uint64(nboxes))/MIN_FRACTION;
  5032. UT_IGL_ASSERT_MSG_P(max_count < nboxes, "I'm not sure how this could happen mathematically, but it needs to be checked.");
  5033. T smallest_heuristic = std::numeric_limits<T>::infinity();
  5034. INT_TYPE split_index = -1;
  5035. for (INT_TYPE spliti = 0; spliti < NSPLITS; ++spliti) {
  5036. const INT_TYPE left_count = left_counts[spliti];
  5037. if (left_count < min_count || left_count > max_count) {
  5038. continue;
  5039. }
  5040. const INT_TYPE right_count = nboxes-left_count;
  5041. const T heuristic =
  5042. left_count*unweightedHeuristic<H>(left_boxes[spliti]) +
  5043. right_count*unweightedHeuristic<H>(right_boxes[spliti]);
  5044. if (heuristic < smallest_heuristic) {
  5045. smallest_heuristic = heuristic;
  5046. split_index = spliti;
  5047. }
  5048. }
  5049. SRC_INT_TYPE*const indices_end = indices+nboxes;
  5050. if (split_index == -1) {
  5051. // No split was anywhere close to balanced, so we fall back to searching for one.
  5052. // First, find the span containing the "balance" point, namely where left_counts goes from
  5053. // being less than min_count to more than max_count.
  5054. // If that's span 0, use max_count as the ordered index to select,
  5055. // if it's span NSPANS-1, use min_count as the ordered index to select,
  5056. // else use nboxes/2 as the ordered index to select.
  5057. //T min_pivotx2 = -std::numeric_limits<T>::infinity();
  5058. //T max_pivotx2 = std::numeric_limits<T>::infinity();
  5059. SRC_INT_TYPE* nth_index;
  5060. if (left_counts[0] > max_count) {
  5061. // Search for max_count ordered index
  5062. nth_index = indices+max_count;
  5063. //max_pivotx2 = max_axis_min_x2 + max_axis_length/(NSPANS/ut_BoxCentre<BOX_TYPE>::scale);
  5064. }
  5065. else if (left_counts[NSPLITS-1] < min_count) {
  5066. // Search for min_count ordered index
  5067. nth_index = indices+min_count;
  5068. //min_pivotx2 = max_axis_min_x2 + max_axis_length - max_axis_length/(NSPANS/ut_BoxCentre<BOX_TYPE>::scale);
  5069. }
  5070. else {
  5071. // Search for nboxes/2 ordered index
  5072. nth_index = indices+nboxes/2;
  5073. //for (INT_TYPE spliti = 1; spliti < NSPLITS; ++spliti) {
  5074. // // The second condition should be redundant, but is just in case.
  5075. // if (left_counts[spliti] > max_count || spliti == NSPLITS-1) {
  5076. // min_pivotx2 = max_axis_min_x2 + spliti*max_axis_length/(NSPANS/ut_BoxCentre<BOX_TYPE>::scale);
  5077. // max_pivotx2 = max_axis_min_x2 + (spliti+1)*max_axis_length/(NSPANS/ut_BoxCentre<BOX_TYPE>::scale);
  5078. // break;
  5079. // }
  5080. //}
  5081. }
  5082. nthElement<T>(boxes,indices,indices+nboxes,max_axis,nth_index);//,min_pivotx2,max_pivotx2);
  5083. split_indices = nth_index;
  5084. Box<T,NAXES> left_box(boxes[indices[0]]);
  5085. for (SRC_INT_TYPE* left_indices = indices+1; left_indices < nth_index; ++left_indices) {
  5086. left_box.combine(boxes[*left_indices]);
  5087. }
  5088. Box<T,NAXES> right_box(boxes[nth_index[0]]);
  5089. for (SRC_INT_TYPE* right_indices = nth_index+1; right_indices < indices_end; ++right_indices) {
  5090. right_box.combine(boxes[*right_indices]);
  5091. }
  5092. split_boxes[0] = left_box;
  5093. split_boxes[1] = right_box;
  5094. }
  5095. else {
  5096. const T pivotx2 = axis_min_x2 + (split_index+1)*axis_length/(NSPANS/ut_BoxCentre<BOX_TYPE>::scale);
  5097. SRC_INT_TYPE* ppivot_start;
  5098. SRC_INT_TYPE* ppivot_end;
  5099. partitionByCentre(boxes,indices,indices+nboxes,max_axis,pivotx2,ppivot_start,ppivot_end);
  5100. split_indices = indices + left_counts[split_index];
  5101. // Ignoring roundoff error, we would have
  5102. // split_indices >= ppivot_start && split_indices <= ppivot_end,
  5103. // but it may not always be in practice.
  5104. if (split_indices >= ppivot_start && split_indices <= ppivot_end) {
  5105. split_boxes[0] = left_boxes[split_index];
  5106. split_boxes[1] = right_boxes[split_index];
  5107. return;
  5108. }
  5109. // Roundoff error changed the split, so we need to recompute the boxes.
  5110. if (split_indices < ppivot_start) {
  5111. split_indices = ppivot_start;
  5112. }
  5113. else {//(split_indices > ppivot_end)
  5114. split_indices = ppivot_end;
  5115. }
  5116. // Emergency checks, just in case
  5117. if (split_indices == indices) {
  5118. ++split_indices;
  5119. }
  5120. else if (split_indices == indices_end) {
  5121. --split_indices;
  5122. }
  5123. Box<T,NAXES> left_box(boxes[indices[0]]);
  5124. for (SRC_INT_TYPE* left_indices = indices+1; left_indices < split_indices; ++left_indices) {
  5125. left_box.combine(boxes[*left_indices]);
  5126. }
  5127. Box<T,NAXES> right_box(boxes[split_indices[0]]);
  5128. for (SRC_INT_TYPE* right_indices = split_indices+1; right_indices < indices_end; ++right_indices) {
  5129. right_box.combine(boxes[*right_indices]);
  5130. }
  5131. split_boxes[0] = left_box;
  5132. split_boxes[1] = right_box;
  5133. }
  5134. }
  5135. template<uint N>
  5136. template<uint PARALLEL_THRESHOLD, typename SRC_INT_TYPE>
  5137. inline void BVH<N>::adjustParallelChildNodes(INT_TYPE nparallel, UT_Array<Node>& nodes, Node& node, UT_Array<Node>* parallel_nodes, SRC_INT_TYPE* sub_indices) noexcept
  5138. {
  5139. // Alec: No need to parallelize this...
  5140. //UTparallelFor(UT_BlockedRange<INT_TYPE>(0,nparallel), [&node,&nodes,&parallel_nodes,&sub_indices](const UT_BlockedRange<INT_TYPE>& r) {
  5141. INT_TYPE counted_parallel = 0;
  5142. INT_TYPE childi = 0;
  5143. for(int taski = 0;taski < nparallel; taski++)
  5144. {
  5145. //for (INT_TYPE taski = r.begin(), end = r.end(); taski < end; ++taski) {
  5146. // First, find which child this is
  5147. INT_TYPE sub_nboxes;
  5148. for (; childi < N; ++childi) {
  5149. sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
  5150. if (sub_nboxes >= PARALLEL_THRESHOLD) {
  5151. if (counted_parallel == taski) {
  5152. break;
  5153. }
  5154. ++counted_parallel;
  5155. }
  5156. }
  5157. UT_IGL_ASSERT_P(counted_parallel == taski);
  5158. const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
  5159. INT_TYPE n = local_nodes.size();
  5160. INT_TYPE local_nodes_start = Node::getInternalNum(node.child[childi])+1;
  5161. ++counted_parallel;
  5162. ++childi;
  5163. for (INT_TYPE j = 0; j < n; ++j) {
  5164. Node local_node = local_nodes[j];
  5165. for (INT_TYPE childj = 0; childj < N; ++childj) {
  5166. INT_TYPE local_child = local_node.child[childj];
  5167. if (Node::isInternal(local_child) && local_child != Node::EMPTY) {
  5168. local_child += local_nodes_start;
  5169. local_node.child[childj] = local_child;
  5170. }
  5171. }
  5172. nodes[local_nodes_start+j] = local_node;
  5173. }
  5174. }
  5175. }
  5176. template<uint N>
  5177. template<typename T,typename BOX_TYPE,typename SRC_INT_TYPE>
  5178. void BVH<N>::nthElement(const BOX_TYPE* boxes, SRC_INT_TYPE* indices, const SRC_INT_TYPE* indices_end, const uint axis, SRC_INT_TYPE*const nth) noexcept {//, const T min_pivotx2, const T max_pivotx2) noexcept {
  5179. while (true) {
  5180. // Choose median of first, middle, and last as the pivot
  5181. T pivots[3] = {
  5182. utBoxCenter(boxes[indices[0]], axis),
  5183. utBoxCenter(boxes[indices[(indices_end-indices)/2]], axis),
  5184. utBoxCenter(boxes[*(indices_end-1)], axis)
  5185. };
  5186. if (pivots[0] < pivots[1]) {
  5187. const T temp = pivots[0];
  5188. pivots[0] = pivots[1];
  5189. pivots[1] = temp;
  5190. }
  5191. if (pivots[0] < pivots[2]) {
  5192. const T temp = pivots[0];
  5193. pivots[0] = pivots[2];
  5194. pivots[2] = temp;
  5195. }
  5196. if (pivots[1] < pivots[2]) {
  5197. const T temp = pivots[1];
  5198. pivots[1] = pivots[2];
  5199. pivots[2] = temp;
  5200. }
  5201. T mid_pivotx2 = pivots[1];
  5202. #if 0
  5203. // We limit the pivot, because we know that the true value is between min and max
  5204. if (mid_pivotx2 < min_pivotx2) {
  5205. mid_pivotx2 = min_pivotx2;
  5206. }
  5207. else if (mid_pivotx2 > max_pivotx2) {
  5208. mid_pivotx2 = max_pivotx2;
  5209. }
  5210. #endif
  5211. SRC_INT_TYPE* pivot_start;
  5212. SRC_INT_TYPE* pivot_end;
  5213. partitionByCentre(boxes,indices,indices_end,axis,mid_pivotx2,pivot_start,pivot_end);
  5214. if (nth < pivot_start) {
  5215. indices_end = pivot_start;
  5216. }
  5217. else if (nth < pivot_end) {
  5218. // nth is in the middle of the pivot range,
  5219. // which is in the right place, so we're done.
  5220. return;
  5221. }
  5222. else {
  5223. indices = pivot_end;
  5224. }
  5225. if (indices_end <= indices+1) {
  5226. return;
  5227. }
  5228. }
  5229. }
  5230. template<uint N>
  5231. template<typename T,typename BOX_TYPE,typename SRC_INT_TYPE>
  5232. void BVH<N>::partitionByCentre(const BOX_TYPE* boxes, SRC_INT_TYPE*const indices, const SRC_INT_TYPE*const indices_end, const uint axis, const T pivotx2, SRC_INT_TYPE*& ppivot_start, SRC_INT_TYPE*& ppivot_end) noexcept {
  5233. // TODO: Consider parallelizing this!
  5234. // First element >= pivot
  5235. SRC_INT_TYPE* pivot_start = indices;
  5236. // First element > pivot
  5237. SRC_INT_TYPE* pivot_end = indices;
  5238. // Loop through forward once
  5239. for (SRC_INT_TYPE* psrc_index = indices; psrc_index != indices_end; ++psrc_index) {
  5240. const T srcsum = utBoxCenter(boxes[*psrc_index], axis);
  5241. if (srcsum < pivotx2) {
  5242. if (psrc_index != pivot_start) {
  5243. if (pivot_start == pivot_end) {
  5244. // Common case: nothing equal to the pivot
  5245. const SRC_INT_TYPE temp = *psrc_index;
  5246. *psrc_index = *pivot_start;
  5247. *pivot_start = temp;
  5248. }
  5249. else {
  5250. // Less common case: at least one thing equal to the pivot
  5251. const SRC_INT_TYPE temp = *psrc_index;
  5252. *psrc_index = *pivot_end;
  5253. *pivot_end = *pivot_start;
  5254. *pivot_start = temp;
  5255. }
  5256. }
  5257. ++pivot_start;
  5258. ++pivot_end;
  5259. }
  5260. else if (srcsum == pivotx2) {
  5261. // Add to the pivot area
  5262. if (psrc_index != pivot_end) {
  5263. const SRC_INT_TYPE temp = *psrc_index;
  5264. *psrc_index = *pivot_end;
  5265. *pivot_end = temp;
  5266. }
  5267. ++pivot_end;
  5268. }
  5269. }
  5270. ppivot_start = pivot_start;
  5271. ppivot_end = pivot_end;
  5272. }
  5273. #if 0
  5274. template<uint N>
  5275. void BVH<N>::debugDump() const {
  5276. printf("\nNode 0: {\n");
  5277. UT_WorkBuffer indent;
  5278. indent.append(80, ' ');
  5279. UT_Array<INT_TYPE> stack;
  5280. stack.append(0);
  5281. stack.append(0);
  5282. while (!stack.isEmpty()) {
  5283. int depth = stack.size()/2;
  5284. if (indent.length() < 4*depth) {
  5285. indent.append(4, ' ');
  5286. }
  5287. INT_TYPE cur_nodei = stack[stack.size()-2];
  5288. INT_TYPE cur_i = stack[stack.size()-1];
  5289. if (cur_i == N) {
  5290. printf(indent.buffer()+indent.length()-(4*(depth-1)));
  5291. printf("}\n");
  5292. stack.removeLast();
  5293. stack.removeLast();
  5294. continue;
  5295. }
  5296. ++stack[stack.size()-1];
  5297. Node& cur_node = myRoot[cur_nodei];
  5298. INT_TYPE child_nodei = cur_node.child[cur_i];
  5299. if (Node::isInternal(child_nodei)) {
  5300. if (child_nodei == Node::EMPTY) {
  5301. printf(indent.buffer()+indent.length()-(4*(depth-1)));
  5302. printf("}\n");
  5303. stack.removeLast();
  5304. stack.removeLast();
  5305. continue;
  5306. }
  5307. INT_TYPE internal_node = Node::getInternalNum(child_nodei);
  5308. printf(indent.buffer()+indent.length()-(4*depth));
  5309. printf("Node %u: {\n", uint(internal_node));
  5310. stack.append(internal_node);
  5311. stack.append(0);
  5312. continue;
  5313. }
  5314. else {
  5315. printf(indent.buffer()+indent.length()-(4*depth));
  5316. printf("Tri %u\n", uint(child_nodei));
  5317. }
  5318. }
  5319. }
  5320. #endif
  5321. } // UT namespace
  5322. } // End HDK_Sample namespace
  5323. }}
  5324. #endif
  5325. /*
  5326. * Copyright (c) 2018 Side Effects Software Inc.
  5327. *
  5328. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5329. * of this software and associated documentation files (the "Software"), to deal
  5330. * in the Software without restriction, including without limitation the rights
  5331. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  5332. * copies of the Software, and to permit persons to whom the Software is
  5333. * furnished to do so, subject to the following conditions:
  5334. *
  5335. * The above copyright notice and this permission notice shall be included in all
  5336. * copies or substantial portions of the Software.
  5337. *
  5338. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  5339. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  5340. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  5341. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  5342. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  5343. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  5344. * SOFTWARE.
  5345. *
  5346. * COMMENTS:
  5347. * Functions and structures for computing solid angles.
  5348. */
  5349. #pragma once
  5350. #ifndef __HDK_UT_SolidAngle_h__
  5351. #define __HDK_UT_SolidAngle_h__
  5352. #include <memory>
  5353. namespace igl {
  5354. /// @private
  5355. namespace FastWindingNumber {
  5356. namespace HDK_Sample {
  5357. template<typename T>
  5358. using UT_Vector2T = UT_FixedVector<T,2>;
  5359. template<typename T>
  5360. using UT_Vector3T = UT_FixedVector<T,3>;
  5361. template <typename T>
  5362. SYS_FORCE_INLINE T cross(const UT_Vector2T<T> &v1, const UT_Vector2T<T> &v2)
  5363. {
  5364. return v1[0]*v2[1] - v1[1]*v2[0];
  5365. }
  5366. template <typename T>
  5367. SYS_FORCE_INLINE
  5368. UT_Vector3T<T> cross(const UT_Vector3T<T> &v1, const UT_Vector3T<T> &v2)
  5369. {
  5370. UT_Vector3T<T> result;
  5371. // compute the cross product:
  5372. result[0] = v1[1]*v2[2] - v1[2]*v2[1];
  5373. result[1] = v1[2]*v2[0] - v1[0]*v2[2];
  5374. result[2] = v1[0]*v2[1] - v1[1]*v2[0];
  5375. return result;
  5376. }
  5377. /// Returns the signed solid angle subtended by triangle abc
  5378. /// from query point.
  5379. ///
  5380. /// WARNING: This uses the right-handed normal convention, whereas most of
  5381. /// Houdini uses the left-handed normal convention, so either
  5382. /// negate the output, or swap b and c if you want it to be
  5383. /// positive inside and negative outside.
  5384. template<typename T>
  5385. inline T UTsignedSolidAngleTri(
  5386. const UT_Vector3T<T> &a,
  5387. const UT_Vector3T<T> &b,
  5388. const UT_Vector3T<T> &c,
  5389. const UT_Vector3T<T> &query)
  5390. {
  5391. // Make a, b, and c relative to query
  5392. UT_Vector3T<T> qa = a-query;
  5393. UT_Vector3T<T> qb = b-query;
  5394. UT_Vector3T<T> qc = c-query;
  5395. const T alength = qa.length();
  5396. const T blength = qb.length();
  5397. const T clength = qc.length();
  5398. // If any triangle vertices are coincident with query,
  5399. // query is on the surface, which we treat as no solid angle.
  5400. if (alength == 0 || blength == 0 || clength == 0)
  5401. return T(0);
  5402. // Normalize the vectors
  5403. qa /= alength;
  5404. qb /= blength;
  5405. qc /= clength;
  5406. // The formula on Wikipedia has roughly dot(qa,cross(qb,qc)),
  5407. // but that's unstable when qa, qb, and qc are very close,
  5408. // (e.g. if the input triangle was very far away).
  5409. // This should be equivalent, but more stable.
  5410. const T numerator = dot(qa, cross(qb-qa, qc-qa));
  5411. // If numerator is 0, regardless of denominator, query is on the
  5412. // surface, which we treat as no solid angle.
  5413. if (numerator == 0)
  5414. return T(0);
  5415. const T denominator = T(1) + dot(qa,qb) + dot(qa,qc) + dot(qb,qc);
  5416. return T(2)*SYSatan2(numerator, denominator);
  5417. }
  5418. template<typename T>
  5419. inline T UTsignedSolidAngleQuad(
  5420. const UT_Vector3T<T> &a,
  5421. const UT_Vector3T<T> &b,
  5422. const UT_Vector3T<T> &c,
  5423. const UT_Vector3T<T> &d,
  5424. const UT_Vector3T<T> &query)
  5425. {
  5426. // Make a, b, c, and d relative to query
  5427. UT_Vector3T<T> v[4] = {
  5428. a-query,
  5429. b-query,
  5430. c-query,
  5431. d-query
  5432. };
  5433. const T lengths[4] = {
  5434. v[0].length(),
  5435. v[1].length(),
  5436. v[2].length(),
  5437. v[3].length()
  5438. };
  5439. // If any quad vertices are coincident with query,
  5440. // query is on the surface, which we treat as no solid angle.
  5441. // We could add the contribution from the non-planar part,
  5442. // but in the context of a mesh, we'd still miss some, like
  5443. // we do in the triangle case.
  5444. if (lengths[0] == T(0) || lengths[1] == T(0) || lengths[2] == T(0) || lengths[3] == T(0))
  5445. return T(0);
  5446. // Normalize the vectors
  5447. v[0] /= lengths[0];
  5448. v[1] /= lengths[1];
  5449. v[2] /= lengths[2];
  5450. v[3] /= lengths[3];
  5451. // Compute (unnormalized, but consistently-scaled) barycentric coordinates
  5452. // for the query point inside the tetrahedron of points.
  5453. // If 0 or 4 of the coordinates are positive, (or slightly negative), the
  5454. // query is (approximately) inside, so the choice of triangulation matters.
  5455. // Otherwise, the triangulation doesn't matter.
  5456. const UT_Vector3T<T> diag02 = v[2]-v[0];
  5457. const UT_Vector3T<T> diag13 = v[3]-v[1];
  5458. const UT_Vector3T<T> v01 = v[1]-v[0];
  5459. const UT_Vector3T<T> v23 = v[3]-v[2];
  5460. T bary[4];
  5461. bary[0] = dot(v[3],cross(v23,diag13));
  5462. bary[1] = -dot(v[2],cross(v23,diag02));
  5463. bary[2] = -dot(v[1],cross(v01,diag13));
  5464. bary[3] = dot(v[0],cross(v01,diag02));
  5465. const T dot01 = dot(v[0],v[1]);
  5466. const T dot12 = dot(v[1],v[2]);
  5467. const T dot23 = dot(v[2],v[3]);
  5468. const T dot30 = dot(v[3],v[0]);
  5469. T omega = T(0);
  5470. // Equation of a bilinear patch in barycentric coordinates of its
  5471. // tetrahedron is x0*x2 = x1*x3. Less is one side; greater is other.
  5472. if (bary[0]*bary[2] < bary[1]*bary[3])
  5473. {
  5474. // Split 0-2: triangles 0,1,2 and 0,2,3
  5475. const T numerator012 = bary[3];
  5476. const T numerator023 = bary[1];
  5477. const T dot02 = dot(v[0],v[2]);
  5478. // If numerator is 0, regardless of denominator, query is on the
  5479. // surface, which we treat as no solid angle.
  5480. if (numerator012 != T(0))
  5481. {
  5482. const T denominator012 = T(1) + dot01 + dot12 + dot02;
  5483. omega = SYSatan2(numerator012, denominator012);
  5484. }
  5485. if (numerator023 != T(0))
  5486. {
  5487. const T denominator023 = T(1) + dot02 + dot23 + dot30;
  5488. omega += SYSatan2(numerator023, denominator023);
  5489. }
  5490. }
  5491. else
  5492. {
  5493. // Split 1-3: triangles 0,1,3 and 1,2,3
  5494. const T numerator013 = -bary[2];
  5495. const T numerator123 = -bary[0];
  5496. const T dot13 = dot(v[1],v[3]);
  5497. // If numerator is 0, regardless of denominator, query is on the
  5498. // surface, which we treat as no solid angle.
  5499. if (numerator013 != T(0))
  5500. {
  5501. const T denominator013 = T(1) + dot01 + dot13 + dot30;
  5502. omega = SYSatan2(numerator013, denominator013);
  5503. }
  5504. if (numerator123 != T(0))
  5505. {
  5506. const T denominator123 = T(1) + dot12 + dot23 + dot13;
  5507. omega += SYSatan2(numerator123, denominator123);
  5508. }
  5509. }
  5510. return T(2)*omega;
  5511. }
  5512. /// Class for quickly approximating signed solid angle of a large mesh
  5513. /// from many query points. This is useful for computing the
  5514. /// generalized winding number at many points.
  5515. ///
  5516. /// NOTE: This is currently only instantiated for <float,float>.
  5517. template<typename T,typename S>
  5518. class UT_SolidAngle
  5519. {
  5520. public:
  5521. /// This is outlined so that we don't need to include UT_BVHImpl.h
  5522. inline UT_SolidAngle();
  5523. /// This is outlined so that we don't need to include UT_BVHImpl.h
  5524. inline ~UT_SolidAngle();
  5525. /// NOTE: This does not take ownership over triangle_points or positions,
  5526. /// but does keep pointers to them, so the caller must keep them in
  5527. /// scope for the lifetime of this structure.
  5528. UT_SolidAngle(
  5529. const int ntriangles,
  5530. const int *const triangle_points,
  5531. const int npoints,
  5532. const UT_Vector3T<S> *const positions,
  5533. const int order = 2)
  5534. : UT_SolidAngle()
  5535. { init(ntriangles, triangle_points, npoints, positions, order); }
  5536. /// Initialize the tree and data.
  5537. /// NOTE: It is safe to call init on a UT_SolidAngle that has had init
  5538. /// called on it before, to re-initialize it.
  5539. inline void init(
  5540. const int ntriangles,
  5541. const int *const triangle_points,
  5542. const int npoints,
  5543. const UT_Vector3T<S> *const positions,
  5544. const int order = 2);
  5545. /// Frees myTree and myData, and clears the rest.
  5546. inline void clear();
  5547. /// Returns true if this is clear
  5548. bool isClear() const
  5549. { return myNTriangles == 0; }
  5550. /// Returns an approximation of the signed solid angle of the mesh from the specified query_point
  5551. /// accuracy_scale is the value of (maxP/q) beyond which the approximation of the box will be used.
  5552. inline T computeSolidAngle(const UT_Vector3T<T> &query_point, const T accuracy_scale = T(2.0)) const;
  5553. private:
  5554. struct BoxData;
  5555. static constexpr uint BVH_N = 4;
  5556. UT_BVH<BVH_N> myTree;
  5557. int myNBoxes;
  5558. int myOrder;
  5559. std::unique_ptr<BoxData[]> myData;
  5560. int myNTriangles;
  5561. const int *myTrianglePoints;
  5562. int myNPoints;
  5563. const UT_Vector3T<S> *myPositions;
  5564. };
  5565. template<typename T>
  5566. inline T UTsignedAngleSegment(
  5567. const UT_Vector2T<T> &a,
  5568. const UT_Vector2T<T> &b,
  5569. const UT_Vector2T<T> &query)
  5570. {
  5571. // Make a and b relative to query
  5572. UT_Vector2T<T> qa = a-query;
  5573. UT_Vector2T<T> qb = b-query;
  5574. // If any segment vertices are coincident with query,
  5575. // query is on the segment, which we treat as no angle.
  5576. if (qa.isZero() || qb.isZero())
  5577. return T(0);
  5578. // numerator = |qa||qb|sin(theta)
  5579. const T numerator = cross(qa, qb);
  5580. // If numerator is 0, regardless of denominator, query is on the
  5581. // surface, which we treat as no solid angle.
  5582. if (numerator == 0)
  5583. return T(0);
  5584. // denominator = |qa||qb|cos(theta)
  5585. const T denominator = dot(qa,qb);
  5586. // numerator/denominator = tan(theta)
  5587. return SYSatan2(numerator, denominator);
  5588. }
  5589. /// Class for quickly approximating signed subtended angle of a large curve
  5590. /// from many query points. This is useful for computing the
  5591. /// generalized winding number at many points.
  5592. ///
  5593. /// NOTE: This is currently only instantiated for <float,float>.
  5594. template<typename T,typename S>
  5595. class UT_SubtendedAngle
  5596. {
  5597. public:
  5598. /// This is outlined so that we don't need to include UT_BVHImpl.h
  5599. inline UT_SubtendedAngle();
  5600. /// This is outlined so that we don't need to include UT_BVHImpl.h
  5601. inline ~UT_SubtendedAngle();
  5602. /// NOTE: This does not take ownership over segment_points or positions,
  5603. /// but does keep pointers to them, so the caller must keep them in
  5604. /// scope for the lifetime of this structure.
  5605. UT_SubtendedAngle(
  5606. const int nsegments,
  5607. const int *const segment_points,
  5608. const int npoints,
  5609. const UT_Vector2T<S> *const positions,
  5610. const int order = 2)
  5611. : UT_SubtendedAngle()
  5612. { init(nsegments, segment_points, npoints, positions, order); }
  5613. /// Initialize the tree and data.
  5614. /// NOTE: It is safe to call init on a UT_SolidAngle that has had init
  5615. /// called on it before, to re-initialize it.
  5616. inline void init(
  5617. const int nsegments,
  5618. const int *const segment_points,
  5619. const int npoints,
  5620. const UT_Vector2T<S> *const positions,
  5621. const int order = 2);
  5622. /// Frees myTree and myData, and clears the rest.
  5623. inline void clear();
  5624. /// Returns true if this is clear
  5625. bool isClear() const
  5626. { return myNSegments == 0; }
  5627. /// Returns an approximation of the signed solid angle of the mesh from the specified query_point
  5628. /// accuracy_scale is the value of (maxP/q) beyond which the approximation of the box will be used.
  5629. inline T computeAngle(const UT_Vector2T<T> &query_point, const T accuracy_scale = T(2.0)) const;
  5630. private:
  5631. struct BoxData;
  5632. static constexpr uint BVH_N = 4;
  5633. UT_BVH<BVH_N> myTree;
  5634. int myNBoxes;
  5635. int myOrder;
  5636. std::unique_ptr<BoxData[]> myData;
  5637. int myNSegments;
  5638. const int *mySegmentPoints;
  5639. int myNPoints;
  5640. const UT_Vector2T<S> *myPositions;
  5641. };
  5642. } // End HDK_Sample namespace
  5643. }}
  5644. #endif
  5645. /*
  5646. * Copyright (c) 2018 Side Effects Software Inc.
  5647. *
  5648. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5649. * of this software and associated documentation files (the "Software"), to deal
  5650. * in the Software without restriction, including without limitation the rights
  5651. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  5652. * copies of the Software, and to permit persons to whom the Software is
  5653. * furnished to do so, subject to the following conditions:
  5654. *
  5655. * The above copyright notice and this permission notice shall be included in all
  5656. * copies or substantial portions of the Software.
  5657. *
  5658. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  5659. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  5660. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  5661. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  5662. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  5663. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  5664. * SOFTWARE.
  5665. *
  5666. * COMMENTS:
  5667. * A wrapper function for the "free" function, used by UT_(Small)Array
  5668. */
  5669. #include <stdlib.h>
  5670. namespace igl {
  5671. /// @private
  5672. namespace FastWindingNumber {
  5673. // This needs to be here or else the warning suppression doesn't work because
  5674. // the templated calling code won't otherwise be compiled until after we've
  5675. // already popped the warning.state. So we just always disable this at file
  5676. // scope here.
  5677. #if defined(__GNUC__) && !defined(__clang__)
  5678. _Pragma("GCC diagnostic push")
  5679. _Pragma("GCC diagnostic ignored \"-Wfree-nonheap-object\"")
  5680. #endif
  5681. inline void ut_ArrayImplFree(void *p)
  5682. {
  5683. free(p);
  5684. }
  5685. #if defined(__GNUC__) && !defined(__clang__)
  5686. _Pragma("GCC diagnostic pop")
  5687. #endif
  5688. } }
  5689. /*
  5690. * Copyright (c) 2018 Side Effects Software Inc.
  5691. *
  5692. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5693. * of this software and associated documentation files (the "Software"), to deal
  5694. * in the Software without restriction, including without limitation the rights
  5695. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  5696. * copies of the Software, and to permit persons to whom the Software is
  5697. * furnished to do so, subject to the following conditions:
  5698. *
  5699. * The above copyright notice and this permission notice shall be included in all
  5700. * copies or substantial portions of the Software.
  5701. *
  5702. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  5703. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  5704. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  5705. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  5706. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  5707. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  5708. * SOFTWARE.
  5709. *
  5710. * COMMENTS:
  5711. * Functions and structures for computing solid angles.
  5712. */
  5713. #include "parallel_for.h"
  5714. #include <type_traits>
  5715. #include <utility>
  5716. #define SOLID_ANGLE_TIME_PRECOMPUTE 0
  5717. #if SOLID_ANGLE_TIME_PRECOMPUTE
  5718. #include <UT/UT_StopWatch.h>
  5719. #endif
  5720. #define SOLID_ANGLE_DEBUG 0
  5721. #if SOLID_ANGLE_DEBUG
  5722. #include <UT/UT_Debug.h>
  5723. #endif
  5724. #define TAYLOR_SERIES_ORDER 2
  5725. namespace igl {
  5726. /// @private
  5727. namespace FastWindingNumber {
  5728. namespace HDK_Sample {
  5729. template<typename T,typename S>
  5730. struct UT_SolidAngle<T,S>::BoxData
  5731. {
  5732. void clear()
  5733. {
  5734. // Set everything to zero
  5735. memset(this,0,sizeof(*this));
  5736. }
  5737. using Type = typename std::conditional<BVH_N==4 && std::is_same<T,float>::value, v4uf, UT_FixedVector<T,BVH_N>>::type;
  5738. using SType = typename std::conditional<BVH_N==4 && std::is_same<S,float>::value, v4uf, UT_FixedVector<S,BVH_N>>::type;
  5739. /// An upper bound on the squared distance from myAverageP to the farthest point in the box.
  5740. SType myMaxPDist2;
  5741. /// Centre of mass of the mesh surface in this box
  5742. UT_FixedVector<Type,3> myAverageP;
  5743. /// Unnormalized, area-weighted normal of the mesh in this box
  5744. UT_FixedVector<Type,3> myN;
  5745. #if TAYLOR_SERIES_ORDER >= 1
  5746. /// Values for Omega_1
  5747. /// @{
  5748. UT_FixedVector<Type,3> myNijDiag; // Nxx, Nyy, Nzz
  5749. Type myNxy_Nyx; // Nxy+Nyx
  5750. Type myNyz_Nzy; // Nyz+Nzy
  5751. Type myNzx_Nxz; // Nzx+Nxz
  5752. /// @}
  5753. #endif
  5754. #if TAYLOR_SERIES_ORDER >= 2
  5755. /// Values for Omega_2
  5756. /// @{
  5757. UT_FixedVector<Type,3> myNijkDiag; // Nxxx, Nyyy, Nzzz
  5758. Type mySumPermuteNxyz; // (Nxyz+Nxzy+Nyzx+Nyxz+Nzxy+Nzyx) = 2*(Nxyz+Nyzx+Nzxy)
  5759. Type my2Nxxy_Nyxx; // Nxxy+Nxyx+Nyxx = 2Nxxy+Nyxx
  5760. Type my2Nxxz_Nzxx; // Nxxz+Nxzx+Nzxx = 2Nxxz+Nzxx
  5761. Type my2Nyyz_Nzyy; // Nyyz+Nyzy+Nzyy = 2Nyyz+Nzyy
  5762. Type my2Nyyx_Nxyy; // Nyyx+Nyxy+Nxyy = 2Nyyx+Nxyy
  5763. Type my2Nzzx_Nxzz; // Nzzx+Nzxz+Nxzz = 2Nzzx+Nxzz
  5764. Type my2Nzzy_Nyzz; // Nzzy+Nzyz+Nyzz = 2Nzzy+Nyzz
  5765. /// @}
  5766. #endif
  5767. };
  5768. template<typename T,typename S>
  5769. inline UT_SolidAngle<T,S>::UT_SolidAngle()
  5770. : myTree()
  5771. , myNBoxes(0)
  5772. , myOrder(2)
  5773. , myData(nullptr)
  5774. , myNTriangles(0)
  5775. , myTrianglePoints(nullptr)
  5776. , myNPoints(0)
  5777. , myPositions(nullptr)
  5778. {}
  5779. template<typename T,typename S>
  5780. inline UT_SolidAngle<T,S>::~UT_SolidAngle()
  5781. {
  5782. // Default destruction works, but this needs to be outlined
  5783. // to avoid having to include UT_BVHImpl.h in the header,
  5784. // (for the UT_UniquePtr destructor.)
  5785. }
  5786. template<typename T,typename S>
  5787. inline void UT_SolidAngle<T,S>::init(
  5788. const int ntriangles,
  5789. const int *const triangle_points,
  5790. const int npoints,
  5791. const UT_Vector3T<S> *const positions,
  5792. const int order)
  5793. {
  5794. #if SOLID_ANGLE_DEBUG
  5795. UTdebugFormat("");
  5796. UTdebugFormat("");
  5797. UTdebugFormat("Building BVH for {} ntriangles on {} points:", ntriangles, npoints);
  5798. #endif
  5799. myOrder = order;
  5800. myNTriangles = ntriangles;
  5801. myTrianglePoints = triangle_points;
  5802. myNPoints = npoints;
  5803. myPositions = positions;
  5804. #if SOLID_ANGLE_TIME_PRECOMPUTE
  5805. UT_StopWatch timer;
  5806. timer.start();
  5807. #endif
  5808. UT_SmallArray<UT::Box<S,3>> triangle_boxes;
  5809. triangle_boxes.setSizeNoInit(ntriangles);
  5810. if (ntriangles < 16*1024)
  5811. {
  5812. const int *cur_triangle_points = triangle_points;
  5813. for (int i = 0; i < ntriangles; ++i, cur_triangle_points += 3)
  5814. {
  5815. UT::Box<S,3> &box = triangle_boxes[i];
  5816. box.initBounds(positions[cur_triangle_points[0]]);
  5817. box.enlargeBounds(positions[cur_triangle_points[1]]);
  5818. box.enlargeBounds(positions[cur_triangle_points[2]]);
  5819. }
  5820. }
  5821. else
  5822. {
  5823. igl::parallel_for(ntriangles,
  5824. [triangle_points,&triangle_boxes,positions](int i)
  5825. {
  5826. const int *cur_triangle_points = triangle_points + i*3;
  5827. UT::Box<S,3> &box = triangle_boxes[i];
  5828. box.initBounds(positions[cur_triangle_points[0]]);
  5829. box.enlargeBounds(positions[cur_triangle_points[1]]);
  5830. box.enlargeBounds(positions[cur_triangle_points[2]]);
  5831. });
  5832. }
  5833. #if SOLID_ANGLE_TIME_PRECOMPUTE
  5834. double time = timer.stop();
  5835. UTdebugFormat("{} s to create bounding boxes.", time);
  5836. timer.start();
  5837. #endif
  5838. myTree.template init<UT::BVH_Heuristic::BOX_AREA,S,3>(triangle_boxes.array(), ntriangles);
  5839. #if SOLID_ANGLE_TIME_PRECOMPUTE
  5840. time = timer.stop();
  5841. UTdebugFormat("{} s to initialize UT_BVH structure. {} nodes", time, myTree.getNumNodes());
  5842. #endif
  5843. //myTree.debugDump();
  5844. const int nnodes = myTree.getNumNodes();
  5845. myNBoxes = nnodes;
  5846. BoxData *box_data = new BoxData[nnodes];
  5847. myData.reset(box_data);
  5848. // Some data are only needed during initialization.
  5849. struct LocalData
  5850. {
  5851. // Bounding box
  5852. UT::Box<S,3> myBox;
  5853. // P and N are needed from each child for computing Nij.
  5854. UT_Vector3T<T> myAverageP;
  5855. UT_Vector3T<T> myAreaP;
  5856. UT_Vector3T<T> myN;
  5857. // Unsigned area is needed for computing the average position.
  5858. T myArea;
  5859. #if TAYLOR_SERIES_ORDER >= 1
  5860. // These are needed for computing Nijk.
  5861. UT_Vector3T<T> myNijDiag;
  5862. T myNxy; T myNyx;
  5863. T myNyz; T myNzy;
  5864. T myNzx; T myNxz;
  5865. #endif
  5866. #if TAYLOR_SERIES_ORDER >= 2
  5867. UT_Vector3T<T> myNijkDiag; // Nxxx, Nyyy, Nzzz
  5868. T mySumPermuteNxyz; // (Nxyz+Nxzy+Nyzx+Nyxz+Nzxy+Nzyx) = 2*(Nxyz+Nyzx+Nzxy)
  5869. T my2Nxxy_Nyxx; // Nxxy+Nxyx+Nyxx = 2Nxxy+Nyxx
  5870. T my2Nxxz_Nzxx; // Nxxz+Nxzx+Nzxx = 2Nxxz+Nzxx
  5871. T my2Nyyz_Nzyy; // Nyyz+Nyzy+Nzyy = 2Nyyz+Nzyy
  5872. T my2Nyyx_Nxyy; // Nyyx+Nyxy+Nxyy = 2Nyyx+Nxyy
  5873. T my2Nzzx_Nxzz; // Nzzx+Nzxz+Nxzz = 2Nzzx+Nxzz
  5874. T my2Nzzy_Nyzz; // Nzzy+Nzyz+Nyzz = 2Nzzy+Nyzz
  5875. #endif
  5876. };
  5877. struct PrecomputeFunctors
  5878. {
  5879. BoxData *const myBoxData;
  5880. const UT::Box<S,3> *const myTriangleBoxes;
  5881. const int *const myTrianglePoints;
  5882. const UT_Vector3T<S> *const myPositions;
  5883. const int myOrder;
  5884. PrecomputeFunctors(
  5885. BoxData *box_data,
  5886. const UT::Box<S,3> *triangle_boxes,
  5887. const int *triangle_points,
  5888. const UT_Vector3T<S> *positions,
  5889. const int order)
  5890. : myBoxData(box_data)
  5891. , myTriangleBoxes(triangle_boxes)
  5892. , myTrianglePoints(triangle_points)
  5893. , myPositions(positions)
  5894. , myOrder(order)
  5895. {}
  5896. constexpr SYS_FORCE_INLINE bool pre(const int /*nodei*/, LocalData * /*data_for_parent*/) const
  5897. {
  5898. return true;
  5899. }
  5900. void item(const int itemi, const int /*parent_nodei*/, LocalData &data_for_parent) const
  5901. {
  5902. const UT_Vector3T<S> *const positions = myPositions;
  5903. const int *const cur_triangle_points = myTrianglePoints + 3*itemi;
  5904. const UT_Vector3T<T> a = positions[cur_triangle_points[0]];
  5905. const UT_Vector3T<T> b = positions[cur_triangle_points[1]];
  5906. const UT_Vector3T<T> c = positions[cur_triangle_points[2]];
  5907. const UT_Vector3T<T> ab = b-a;
  5908. const UT_Vector3T<T> ac = c-a;
  5909. const UT::Box<S,3> &triangle_box = myTriangleBoxes[itemi];
  5910. data_for_parent.myBox.initBounds(triangle_box.getMin(), triangle_box.getMax());
  5911. // Area-weighted normal (unnormalized)
  5912. const UT_Vector3T<T> N = T(0.5)*cross(ab,ac);
  5913. const T area2 = N.length2();
  5914. const T area = SYSsqrt(area2);
  5915. const UT_Vector3T<T> P = (a+b+c)/3;
  5916. data_for_parent.myAverageP = P;
  5917. data_for_parent.myAreaP = P*area;
  5918. data_for_parent.myN = N;
  5919. #if SOLID_ANGLE_DEBUG
  5920. UTdebugFormat("");
  5921. UTdebugFormat("Triangle {}: P = {}; N = {}; area = {}", itemi, P, N, area);
  5922. UTdebugFormat(" box = {}", data_for_parent.myBox);
  5923. #endif
  5924. data_for_parent.myArea = area;
  5925. #if TAYLOR_SERIES_ORDER >= 1
  5926. const int order = myOrder;
  5927. if (order < 1)
  5928. return;
  5929. // NOTE: Due to P being at the centroid, triangles have Nij = 0
  5930. // contributions to Nij.
  5931. data_for_parent.myNijDiag = T(0);
  5932. data_for_parent.myNxy = 0; data_for_parent.myNyx = 0;
  5933. data_for_parent.myNyz = 0; data_for_parent.myNzy = 0;
  5934. data_for_parent.myNzx = 0; data_for_parent.myNxz = 0;
  5935. #endif
  5936. #if TAYLOR_SERIES_ORDER >= 2
  5937. if (order < 2)
  5938. return;
  5939. // If it's zero-length, the results are zero, so we can skip.
  5940. if (area == 0)
  5941. {
  5942. data_for_parent.myNijkDiag = T(0);
  5943. data_for_parent.mySumPermuteNxyz = 0;
  5944. data_for_parent.my2Nxxy_Nyxx = 0;
  5945. data_for_parent.my2Nxxz_Nzxx = 0;
  5946. data_for_parent.my2Nyyz_Nzyy = 0;
  5947. data_for_parent.my2Nyyx_Nxyy = 0;
  5948. data_for_parent.my2Nzzx_Nxzz = 0;
  5949. data_for_parent.my2Nzzy_Nyzz = 0;
  5950. return;
  5951. }
  5952. // We need to use the NORMALIZED normal to multiply the integrals by.
  5953. UT_Vector3T<T> n = N/area;
  5954. // Figure out the order of a, b, and c in x, y, and z
  5955. // for use in computing the integrals for Nijk.
  5956. UT_Vector3T<T> values[3] = {a, b, c};
  5957. int order_x[3] = {0,1,2};
  5958. if (a[0] > b[0])
  5959. std::swap(order_x[0],order_x[1]);
  5960. if (values[order_x[0]][0] > c[0])
  5961. std::swap(order_x[0],order_x[2]);
  5962. if (values[order_x[1]][0] > values[order_x[2]][0])
  5963. std::swap(order_x[1],order_x[2]);
  5964. T dx = values[order_x[2]][0] - values[order_x[0]][0];
  5965. int order_y[3] = {0,1,2};
  5966. if (a[1] > b[1])
  5967. std::swap(order_y[0],order_y[1]);
  5968. if (values[order_y[0]][1] > c[1])
  5969. std::swap(order_y[0],order_y[2]);
  5970. if (values[order_y[1]][1] > values[order_y[2]][1])
  5971. std::swap(order_y[1],order_y[2]);
  5972. T dy = values[order_y[2]][1] - values[order_y[0]][1];
  5973. int order_z[3] = {0,1,2};
  5974. if (a[2] > b[2])
  5975. std::swap(order_z[0],order_z[1]);
  5976. if (values[order_z[0]][2] > c[2])
  5977. std::swap(order_z[0],order_z[2]);
  5978. if (values[order_z[1]][2] > values[order_z[2]][2])
  5979. std::swap(order_z[1],order_z[2]);
  5980. T dz = values[order_z[2]][2] - values[order_z[0]][2];
  5981. auto &&compute_integrals = [](
  5982. const UT_Vector3T<T> &a,
  5983. const UT_Vector3T<T> &b,
  5984. const UT_Vector3T<T> &c,
  5985. const UT_Vector3T<T> &P,
  5986. T *integral_ii,
  5987. T *integral_ij,
  5988. T *integral_ik,
  5989. const int i)
  5990. {
  5991. #if SOLID_ANGLE_DEBUG
  5992. UTdebugFormat(" Splitting on {}; a = {}; b = {}; c = {}", char('x'+i), a, b, c);
  5993. #endif
  5994. // NOTE: a, b, and c must be in order of the i axis.
  5995. // We're splitting the triangle at the middle i coordinate.
  5996. const UT_Vector3T<T> oab = b - a;
  5997. const UT_Vector3T<T> oac = c - a;
  5998. const UT_Vector3T<T> ocb = b - c;
  5999. UT_IGL_ASSERT_MSG_P(oac[i] > 0, "This should have been checked by the caller.");
  6000. const T t = oab[i]/oac[i];
  6001. UT_IGL_ASSERT_MSG_P(t >= 0 && t <= 1, "Either sorting must have gone wrong, or there are input NaNs.");
  6002. const int j = (i==2) ? 0 : (i+1);
  6003. const int k = (j==2) ? 0 : (j+1);
  6004. const T jdiff = t*oac[j] - oab[j];
  6005. const T kdiff = t*oac[k] - oab[k];
  6006. UT_Vector3T<T> cross_a;
  6007. cross_a[0] = (jdiff*oab[k] - kdiff*oab[j]);
  6008. cross_a[1] = kdiff*oab[i];
  6009. cross_a[2] = jdiff*oab[i];
  6010. UT_Vector3T<T> cross_c;
  6011. cross_c[0] = (jdiff*ocb[k] - kdiff*ocb[j]);
  6012. cross_c[1] = kdiff*ocb[i];
  6013. cross_c[2] = jdiff*ocb[i];
  6014. const T area_scale_a = cross_a.length();
  6015. const T area_scale_c = cross_c.length();
  6016. const T Pai = a[i] - P[i];
  6017. const T Pci = c[i] - P[i];
  6018. // Integral over the area of the triangle of (pi^2)dA,
  6019. // by splitting the triangle into two at b, the a side
  6020. // and the c side.
  6021. const T int_ii_a = area_scale_a*(T(0.5)*Pai*Pai + T(2.0/3.0)*Pai*oab[i] + T(0.25)*oab[i]*oab[i]);
  6022. const T int_ii_c = area_scale_c*(T(0.5)*Pci*Pci + T(2.0/3.0)*Pci*ocb[i] + T(0.25)*ocb[i]*ocb[i]);
  6023. *integral_ii = int_ii_a + int_ii_c;
  6024. #if SOLID_ANGLE_DEBUG
  6025. UTdebugFormat(" integral_{}{}_a = {}; integral_{}{}_c = {}", char('x'+i), char('x'+i), int_ii_a, char('x'+i), char('x'+i), int_ii_c);
  6026. #endif
  6027. int jk = j;
  6028. T *integral = integral_ij;
  6029. T diff = jdiff;
  6030. while (true) // This only does 2 iterations, one for j and one for k
  6031. {
  6032. if (integral)
  6033. {
  6034. T obmidj = b[jk] + T(0.5)*diff;
  6035. T oabmidj = obmidj - a[jk];
  6036. T ocbmidj = obmidj - c[jk];
  6037. T Paj = a[jk] - P[jk];
  6038. T Pcj = c[jk] - P[jk];
  6039. // Integral over the area of the triangle of (pi*pj)dA
  6040. const T int_ij_a = area_scale_a*(T(0.5)*Pai*Paj + T(1.0/3.0)*Pai*oabmidj + T(1.0/3.0)*Paj*oab[i] + T(0.25)*oab[i]*oabmidj);
  6041. const T int_ij_c = area_scale_c*(T(0.5)*Pci*Pcj + T(1.0/3.0)*Pci*ocbmidj + T(1.0/3.0)*Pcj*ocb[i] + T(0.25)*ocb[i]*ocbmidj);
  6042. *integral = int_ij_a + int_ij_c;
  6043. #if SOLID_ANGLE_DEBUG
  6044. UTdebugFormat(" integral_{}{}_a = {}; integral_{}{}_c = {}", char('x'+i), char('x'+jk), int_ij_a, char('x'+i), char('x'+jk), int_ij_c);
  6045. #endif
  6046. }
  6047. if (jk == k)
  6048. break;
  6049. jk = k;
  6050. integral = integral_ik;
  6051. diff = kdiff;
  6052. }
  6053. };
  6054. T integral_xx = 0;
  6055. T integral_xy = 0;
  6056. T integral_yy = 0;
  6057. T integral_yz = 0;
  6058. T integral_zz = 0;
  6059. T integral_zx = 0;
  6060. // Note that if the span of any axis is zero, the integral must be zero,
  6061. // since there's a factor of (p_i-P_i), i.e. value minus average,
  6062. // and every value must be equal to the average, giving zero.
  6063. if (dx > 0)
  6064. {
  6065. compute_integrals(
  6066. values[order_x[0]], values[order_x[1]], values[order_x[2]], P,
  6067. &integral_xx, ((dx >= dy && dy > 0) ? &integral_xy : nullptr), ((dx >= dz && dz > 0) ? &integral_zx : nullptr), 0);
  6068. }
  6069. if (dy > 0)
  6070. {
  6071. compute_integrals(
  6072. values[order_y[0]], values[order_y[1]], values[order_y[2]], P,
  6073. &integral_yy, ((dy >= dz && dz > 0) ? &integral_yz : nullptr), ((dx < dy && dx > 0) ? &integral_xy : nullptr), 1);
  6074. }
  6075. if (dz > 0)
  6076. {
  6077. compute_integrals(
  6078. values[order_z[0]], values[order_z[1]], values[order_z[2]], P,
  6079. &integral_zz, ((dx < dz && dx > 0) ? &integral_zx : nullptr), ((dy < dz && dy > 0) ? &integral_yz : nullptr), 2);
  6080. }
  6081. UT_Vector3T<T> Niii;
  6082. Niii[0] = integral_xx;
  6083. Niii[1] = integral_yy;
  6084. Niii[2] = integral_zz;
  6085. Niii *= n;
  6086. data_for_parent.myNijkDiag = Niii;
  6087. data_for_parent.mySumPermuteNxyz = 2*(n[0]*integral_yz + n[1]*integral_zx + n[2]*integral_xy);
  6088. T Nxxy = n[0]*integral_xy;
  6089. T Nxxz = n[0]*integral_zx;
  6090. T Nyyz = n[1]*integral_yz;
  6091. T Nyyx = n[1]*integral_xy;
  6092. T Nzzx = n[2]*integral_zx;
  6093. T Nzzy = n[2]*integral_yz;
  6094. data_for_parent.my2Nxxy_Nyxx = 2*Nxxy + n[1]*integral_xx;
  6095. data_for_parent.my2Nxxz_Nzxx = 2*Nxxz + n[2]*integral_xx;
  6096. data_for_parent.my2Nyyz_Nzyy = 2*Nyyz + n[2]*integral_yy;
  6097. data_for_parent.my2Nyyx_Nxyy = 2*Nyyx + n[0]*integral_yy;
  6098. data_for_parent.my2Nzzx_Nxzz = 2*Nzzx + n[0]*integral_zz;
  6099. data_for_parent.my2Nzzy_Nyzz = 2*Nzzy + n[1]*integral_zz;
  6100. #if SOLID_ANGLE_DEBUG
  6101. UTdebugFormat(" integral_xx = {}; yy = {}; zz = {}", integral_xx, integral_yy, integral_zz);
  6102. UTdebugFormat(" integral_xy = {}; yz = {}; zx = {}", integral_xy, integral_yz, integral_zx);
  6103. #endif
  6104. #endif
  6105. }
  6106. void post(const int nodei, const int /*parent_nodei*/, LocalData *data_for_parent, const int nchildren, const LocalData *child_data_array) const
  6107. {
  6108. // NOTE: Although in the general case, data_for_parent may be null for the root call,
  6109. // this functor assumes that it's non-null, so the call below must pass a non-null pointer.
  6110. BoxData &current_box_data = myBoxData[nodei];
  6111. UT_Vector3T<T> N = child_data_array[0].myN;
  6112. ((T*)&current_box_data.myN[0])[0] = N[0];
  6113. ((T*)&current_box_data.myN[1])[0] = N[1];
  6114. ((T*)&current_box_data.myN[2])[0] = N[2];
  6115. UT_Vector3T<T> areaP = child_data_array[0].myAreaP;
  6116. T area = child_data_array[0].myArea;
  6117. UT_Vector3T<T> local_P = child_data_array[0].myAverageP;
  6118. ((T*)&current_box_data.myAverageP[0])[0] = local_P[0];
  6119. ((T*)&current_box_data.myAverageP[1])[0] = local_P[1];
  6120. ((T*)&current_box_data.myAverageP[2])[0] = local_P[2];
  6121. for (int i = 1; i < nchildren; ++i)
  6122. {
  6123. const UT_Vector3T<T> local_N = child_data_array[i].myN;
  6124. N += local_N;
  6125. ((T*)&current_box_data.myN[0])[i] = local_N[0];
  6126. ((T*)&current_box_data.myN[1])[i] = local_N[1];
  6127. ((T*)&current_box_data.myN[2])[i] = local_N[2];
  6128. areaP += child_data_array[i].myAreaP;
  6129. area += child_data_array[i].myArea;
  6130. const UT_Vector3T<T> local_P = child_data_array[i].myAverageP;
  6131. ((T*)&current_box_data.myAverageP[0])[i] = local_P[0];
  6132. ((T*)&current_box_data.myAverageP[1])[i] = local_P[1];
  6133. ((T*)&current_box_data.myAverageP[2])[i] = local_P[2];
  6134. }
  6135. for (int i = nchildren; i < BVH_N; ++i)
  6136. {
  6137. // Set to zero, just to avoid false positives for uses of uninitialized memory.
  6138. ((T*)&current_box_data.myN[0])[i] = 0;
  6139. ((T*)&current_box_data.myN[1])[i] = 0;
  6140. ((T*)&current_box_data.myN[2])[i] = 0;
  6141. ((T*)&current_box_data.myAverageP[0])[i] = 0;
  6142. ((T*)&current_box_data.myAverageP[1])[i] = 0;
  6143. ((T*)&current_box_data.myAverageP[2])[i] = 0;
  6144. }
  6145. data_for_parent->myN = N;
  6146. data_for_parent->myAreaP = areaP;
  6147. data_for_parent->myArea = area;
  6148. UT::Box<S,3> box(child_data_array[0].myBox);
  6149. for (int i = 1; i < nchildren; ++i)
  6150. box.enlargeBounds(child_data_array[i].myBox);
  6151. // Normalize P
  6152. UT_Vector3T<T> averageP;
  6153. if (area > 0)
  6154. averageP = areaP/area;
  6155. else
  6156. averageP = T(0.5)*(box.getMin() + box.getMax());
  6157. data_for_parent->myAverageP = averageP;
  6158. data_for_parent->myBox = box;
  6159. for (int i = 0; i < nchildren; ++i)
  6160. {
  6161. const UT::Box<S,3> &local_box(child_data_array[i].myBox);
  6162. const UT_Vector3T<T> &local_P = child_data_array[i].myAverageP;
  6163. const UT_Vector3T<T> maxPDiff = SYSmax(local_P-UT_Vector3T<T>(local_box.getMin()), UT_Vector3T<T>(local_box.getMax())-local_P);
  6164. ((T*)&current_box_data.myMaxPDist2)[i] = maxPDiff.length2();
  6165. }
  6166. for (int i = nchildren; i < BVH_N; ++i)
  6167. {
  6168. // This child is non-existent. If we set myMaxPDist2 to infinity, it will never
  6169. // use the approximation, and the traverseVector function can check for EMPTY.
  6170. ((T*)&current_box_data.myMaxPDist2)[i] = std::numeric_limits<T>::infinity();
  6171. }
  6172. #if TAYLOR_SERIES_ORDER >= 1
  6173. const int order = myOrder;
  6174. if (order >= 1)
  6175. {
  6176. // We now have the current box's P, so we can adjust Nij and Nijk
  6177. data_for_parent->myNijDiag = child_data_array[0].myNijDiag;
  6178. data_for_parent->myNxy = 0;
  6179. data_for_parent->myNyx = 0;
  6180. data_for_parent->myNyz = 0;
  6181. data_for_parent->myNzy = 0;
  6182. data_for_parent->myNzx = 0;
  6183. data_for_parent->myNxz = 0;
  6184. #if TAYLOR_SERIES_ORDER >= 2
  6185. data_for_parent->myNijkDiag = child_data_array[0].myNijkDiag;
  6186. data_for_parent->mySumPermuteNxyz = child_data_array[0].mySumPermuteNxyz;
  6187. data_for_parent->my2Nxxy_Nyxx = child_data_array[0].my2Nxxy_Nyxx;
  6188. data_for_parent->my2Nxxz_Nzxx = child_data_array[0].my2Nxxz_Nzxx;
  6189. data_for_parent->my2Nyyz_Nzyy = child_data_array[0].my2Nyyz_Nzyy;
  6190. data_for_parent->my2Nyyx_Nxyy = child_data_array[0].my2Nyyx_Nxyy;
  6191. data_for_parent->my2Nzzx_Nxzz = child_data_array[0].my2Nzzx_Nxzz;
  6192. data_for_parent->my2Nzzy_Nyzz = child_data_array[0].my2Nzzy_Nyzz;
  6193. #endif
  6194. for (int i = 1; i < nchildren; ++i)
  6195. {
  6196. data_for_parent->myNijDiag += child_data_array[i].myNijDiag;
  6197. #if TAYLOR_SERIES_ORDER >= 2
  6198. data_for_parent->myNijkDiag += child_data_array[i].myNijkDiag;
  6199. data_for_parent->mySumPermuteNxyz += child_data_array[i].mySumPermuteNxyz;
  6200. data_for_parent->my2Nxxy_Nyxx += child_data_array[i].my2Nxxy_Nyxx;
  6201. data_for_parent->my2Nxxz_Nzxx += child_data_array[i].my2Nxxz_Nzxx;
  6202. data_for_parent->my2Nyyz_Nzyy += child_data_array[i].my2Nyyz_Nzyy;
  6203. data_for_parent->my2Nyyx_Nxyy += child_data_array[i].my2Nyyx_Nxyy;
  6204. data_for_parent->my2Nzzx_Nxzz += child_data_array[i].my2Nzzx_Nxzz;
  6205. data_for_parent->my2Nzzy_Nyzz += child_data_array[i].my2Nzzy_Nyzz;
  6206. #endif
  6207. }
  6208. for (int j = 0; j < 3; ++j)
  6209. ((T*)&current_box_data.myNijDiag[j])[0] = child_data_array[0].myNijDiag[j];
  6210. ((T*)&current_box_data.myNxy_Nyx)[0] = child_data_array[0].myNxy + child_data_array[0].myNyx;
  6211. ((T*)&current_box_data.myNyz_Nzy)[0] = child_data_array[0].myNyz + child_data_array[0].myNzy;
  6212. ((T*)&current_box_data.myNzx_Nxz)[0] = child_data_array[0].myNzx + child_data_array[0].myNxz;
  6213. for (int j = 0; j < 3; ++j)
  6214. ((T*)&current_box_data.myNijkDiag[j])[0] = child_data_array[0].myNijkDiag[j];
  6215. ((T*)&current_box_data.mySumPermuteNxyz)[0] = child_data_array[0].mySumPermuteNxyz;
  6216. ((T*)&current_box_data.my2Nxxy_Nyxx)[0] = child_data_array[0].my2Nxxy_Nyxx;
  6217. ((T*)&current_box_data.my2Nxxz_Nzxx)[0] = child_data_array[0].my2Nxxz_Nzxx;
  6218. ((T*)&current_box_data.my2Nyyz_Nzyy)[0] = child_data_array[0].my2Nyyz_Nzyy;
  6219. ((T*)&current_box_data.my2Nyyx_Nxyy)[0] = child_data_array[0].my2Nyyx_Nxyy;
  6220. ((T*)&current_box_data.my2Nzzx_Nxzz)[0] = child_data_array[0].my2Nzzx_Nxzz;
  6221. ((T*)&current_box_data.my2Nzzy_Nyzz)[0] = child_data_array[0].my2Nzzy_Nyzz;
  6222. for (int i = 1; i < nchildren; ++i)
  6223. {
  6224. for (int j = 0; j < 3; ++j)
  6225. ((T*)&current_box_data.myNijDiag[j])[i] = child_data_array[i].myNijDiag[j];
  6226. ((T*)&current_box_data.myNxy_Nyx)[i] = child_data_array[i].myNxy + child_data_array[i].myNyx;
  6227. ((T*)&current_box_data.myNyz_Nzy)[i] = child_data_array[i].myNyz + child_data_array[i].myNzy;
  6228. ((T*)&current_box_data.myNzx_Nxz)[i] = child_data_array[i].myNzx + child_data_array[i].myNxz;
  6229. for (int j = 0; j < 3; ++j)
  6230. ((T*)&current_box_data.myNijkDiag[j])[i] = child_data_array[i].myNijkDiag[j];
  6231. ((T*)&current_box_data.mySumPermuteNxyz)[i] = child_data_array[i].mySumPermuteNxyz;
  6232. ((T*)&current_box_data.my2Nxxy_Nyxx)[i] = child_data_array[i].my2Nxxy_Nyxx;
  6233. ((T*)&current_box_data.my2Nxxz_Nzxx)[i] = child_data_array[i].my2Nxxz_Nzxx;
  6234. ((T*)&current_box_data.my2Nyyz_Nzyy)[i] = child_data_array[i].my2Nyyz_Nzyy;
  6235. ((T*)&current_box_data.my2Nyyx_Nxyy)[i] = child_data_array[i].my2Nyyx_Nxyy;
  6236. ((T*)&current_box_data.my2Nzzx_Nxzz)[i] = child_data_array[i].my2Nzzx_Nxzz;
  6237. ((T*)&current_box_data.my2Nzzy_Nyzz)[i] = child_data_array[i].my2Nzzy_Nyzz;
  6238. }
  6239. for (int i = nchildren; i < BVH_N; ++i)
  6240. {
  6241. // Set to zero, just to avoid false positives for uses of uninitialized memory.
  6242. for (int j = 0; j < 3; ++j)
  6243. ((T*)&current_box_data.myNijDiag[j])[i] = 0;
  6244. ((T*)&current_box_data.myNxy_Nyx)[i] = 0;
  6245. ((T*)&current_box_data.myNyz_Nzy)[i] = 0;
  6246. ((T*)&current_box_data.myNzx_Nxz)[i] = 0;
  6247. for (int j = 0; j < 3; ++j)
  6248. ((T*)&current_box_data.myNijkDiag[j])[i] = 0;
  6249. ((T*)&current_box_data.mySumPermuteNxyz)[i] = 0;
  6250. ((T*)&current_box_data.my2Nxxy_Nyxx)[i] = 0;
  6251. ((T*)&current_box_data.my2Nxxz_Nzxx)[i] = 0;
  6252. ((T*)&current_box_data.my2Nyyz_Nzyy)[i] = 0;
  6253. ((T*)&current_box_data.my2Nyyx_Nxyy)[i] = 0;
  6254. ((T*)&current_box_data.my2Nzzx_Nxzz)[i] = 0;
  6255. ((T*)&current_box_data.my2Nzzy_Nyzz)[i] = 0;
  6256. }
  6257. for (int i = 0; i < nchildren; ++i)
  6258. {
  6259. const LocalData &child_data = child_data_array[i];
  6260. UT_Vector3T<T> displacement = child_data.myAverageP - UT_Vector3T<T>(data_for_parent->myAverageP);
  6261. UT_Vector3T<T> N = child_data.myN;
  6262. // Adjust Nij for the change in centre P
  6263. data_for_parent->myNijDiag += N*displacement;
  6264. T Nxy = child_data.myNxy + N[0]*displacement[1];
  6265. T Nyx = child_data.myNyx + N[1]*displacement[0];
  6266. T Nyz = child_data.myNyz + N[1]*displacement[2];
  6267. T Nzy = child_data.myNzy + N[2]*displacement[1];
  6268. T Nzx = child_data.myNzx + N[2]*displacement[0];
  6269. T Nxz = child_data.myNxz + N[0]*displacement[2];
  6270. data_for_parent->myNxy += Nxy;
  6271. data_for_parent->myNyx += Nyx;
  6272. data_for_parent->myNyz += Nyz;
  6273. data_for_parent->myNzy += Nzy;
  6274. data_for_parent->myNzx += Nzx;
  6275. data_for_parent->myNxz += Nxz;
  6276. #if TAYLOR_SERIES_ORDER >= 2
  6277. if (order >= 2)
  6278. {
  6279. // Adjust Nijk for the change in centre P
  6280. data_for_parent->myNijkDiag += T(2)*displacement*child_data.myNijDiag + displacement*displacement*child_data.myN;
  6281. data_for_parent->mySumPermuteNxyz += (displacement[0]*(Nyz+Nzy) + displacement[1]*(Nzx+Nxz) + displacement[2]*(Nxy+Nyx));
  6282. data_for_parent->my2Nxxy_Nyxx +=
  6283. 2*(displacement[1]*child_data.myNijDiag[0] + displacement[0]*child_data.myNxy + N[0]*displacement[0]*displacement[1])
  6284. + 2*child_data.myNyx*displacement[0] + N[1]*displacement[0]*displacement[0];
  6285. data_for_parent->my2Nxxz_Nzxx +=
  6286. 2*(displacement[2]*child_data.myNijDiag[0] + displacement[0]*child_data.myNxz + N[0]*displacement[0]*displacement[2])
  6287. + 2*child_data.myNzx*displacement[0] + N[2]*displacement[0]*displacement[0];
  6288. data_for_parent->my2Nyyz_Nzyy +=
  6289. 2*(displacement[2]*child_data.myNijDiag[1] + displacement[1]*child_data.myNyz + N[1]*displacement[1]*displacement[2])
  6290. + 2*child_data.myNzy*displacement[1] + N[2]*displacement[1]*displacement[1];
  6291. data_for_parent->my2Nyyx_Nxyy +=
  6292. 2*(displacement[0]*child_data.myNijDiag[1] + displacement[1]*child_data.myNyx + N[1]*displacement[1]*displacement[0])
  6293. + 2*child_data.myNxy*displacement[1] + N[0]*displacement[1]*displacement[1];
  6294. data_for_parent->my2Nzzx_Nxzz +=
  6295. 2*(displacement[0]*child_data.myNijDiag[2] + displacement[2]*child_data.myNzx + N[2]*displacement[2]*displacement[0])
  6296. + 2*child_data.myNxz*displacement[2] + N[0]*displacement[2]*displacement[2];
  6297. data_for_parent->my2Nzzy_Nyzz +=
  6298. 2*(displacement[1]*child_data.myNijDiag[2] + displacement[2]*child_data.myNzy + N[2]*displacement[2]*displacement[1])
  6299. + 2*child_data.myNyz*displacement[2] + N[1]*displacement[2]*displacement[2];
  6300. }
  6301. #endif
  6302. }
  6303. }
  6304. #endif
  6305. #if SOLID_ANGLE_DEBUG
  6306. UTdebugFormat("");
  6307. UTdebugFormat("Node {}: nchildren = {}; maxP = {}", nodei, nchildren, SYSsqrt(current_box_data.myMaxPDist2));
  6308. UTdebugFormat(" P = {}; N = {}", current_box_data.myAverageP, current_box_data.myN);
  6309. #if TAYLOR_SERIES_ORDER >= 1
  6310. UTdebugFormat(" Nii = {}", current_box_data.myNijDiag);
  6311. UTdebugFormat(" Nxy+Nyx = {}; Nyz+Nzy = {}; Nyz+Nzy = {}", current_box_data.myNxy_Nyx, current_box_data.myNyz_Nzy, current_box_data.myNzx_Nxz);
  6312. #if TAYLOR_SERIES_ORDER >= 2
  6313. UTdebugFormat(" Niii = {}; 2(Nxyz+Nyzx+Nzxy) = {}", current_box_data.myNijkDiag, current_box_data.mySumPermuteNxyz);
  6314. UTdebugFormat(" 2Nxxy+Nyxx = {}; 2Nxxz+Nzxx = {}", current_box_data.my2Nxxy_Nyxx, current_box_data.my2Nxxz_Nzxx);
  6315. UTdebugFormat(" 2Nyyz+Nzyy = {}; 2Nyyx+Nxyy = {}", current_box_data.my2Nyyz_Nzyy, current_box_data.my2Nyyx_Nxyy);
  6316. UTdebugFormat(" 2Nzzx+Nxzz = {}; 2Nzzy+Nyzz = {}", current_box_data.my2Nzzx_Nxzz, current_box_data.my2Nzzy_Nyzz);
  6317. #endif
  6318. #endif
  6319. #endif
  6320. }
  6321. };
  6322. #if SOLID_ANGLE_TIME_PRECOMPUTE
  6323. timer.start();
  6324. #endif
  6325. const PrecomputeFunctors functors(box_data, triangle_boxes.array(), triangle_points, positions, order);
  6326. // NOTE: post-functor relies on non-null data_for_parent, so we have to pass one.
  6327. LocalData local_data;
  6328. myTree.template traverseParallel<LocalData>(4096, functors, &local_data);
  6329. //myTree.template traverse<LocalData>(functors);
  6330. #if SOLID_ANGLE_TIME_PRECOMPUTE
  6331. time = timer.stop();
  6332. UTdebugFormat("{} s to precompute coefficients.", time);
  6333. #endif
  6334. }
  6335. template<typename T,typename S>
  6336. inline void UT_SolidAngle<T, S>::clear()
  6337. {
  6338. myTree.clear();
  6339. myNBoxes = 0;
  6340. myOrder = 2;
  6341. myData.reset();
  6342. myNTriangles = 0;
  6343. myTrianglePoints = nullptr;
  6344. myNPoints = 0;
  6345. myPositions = nullptr;
  6346. }
  6347. template<typename T,typename S>
  6348. inline T UT_SolidAngle<T, S>::computeSolidAngle(const UT_Vector3T<T> &query_point, const T accuracy_scale) const
  6349. {
  6350. const T accuracy_scale2 = accuracy_scale*accuracy_scale;
  6351. struct SolidAngleFunctors
  6352. {
  6353. const BoxData *const myBoxData;
  6354. const UT_Vector3T<T> myQueryPoint;
  6355. const T myAccuracyScale2;
  6356. const UT_Vector3T<S> *const myPositions;
  6357. const int *const myTrianglePoints;
  6358. const int myOrder;
  6359. SolidAngleFunctors(
  6360. const BoxData *const box_data,
  6361. const UT_Vector3T<T> &query_point,
  6362. const T accuracy_scale2,
  6363. const int order,
  6364. const UT_Vector3T<S> *const positions,
  6365. const int *const triangle_points)
  6366. : myBoxData(box_data)
  6367. , myQueryPoint(query_point)
  6368. , myAccuracyScale2(accuracy_scale2)
  6369. , myPositions(positions)
  6370. , myTrianglePoints(triangle_points)
  6371. , myOrder(order)
  6372. {}
  6373. uint pre(const int nodei, T *data_for_parent) const
  6374. {
  6375. const BoxData &data = myBoxData[nodei];
  6376. const typename BoxData::Type maxP2 = data.myMaxPDist2;
  6377. UT_FixedVector<typename BoxData::Type,3> q;
  6378. q[0] = typename BoxData::Type(myQueryPoint[0]);
  6379. q[1] = typename BoxData::Type(myQueryPoint[1]);
  6380. q[2] = typename BoxData::Type(myQueryPoint[2]);
  6381. q -= data.myAverageP;
  6382. const typename BoxData::Type qlength2 = q[0]*q[0] + q[1]*q[1] + q[2]*q[2];
  6383. // If the query point is within a factor of accuracy_scale of the box radius,
  6384. // it's assumed to be not a good enough approximation, so it needs to descend.
  6385. // TODO: Is there a way to estimate the error?
  6386. static_assert((std::is_same<typename BoxData::Type,v4uf>::value), "FIXME: Implement support for other tuple types!");
  6387. v4uu descend_mask = (qlength2 <= maxP2*myAccuracyScale2);
  6388. uint descend_bitmask = _mm_movemask_ps(V4SF(descend_mask.vector));
  6389. constexpr uint allchildbits = ((uint(1)<<BVH_N)-1);
  6390. if (descend_bitmask == allchildbits)
  6391. {
  6392. *data_for_parent = 0;
  6393. return allchildbits;
  6394. }
  6395. // qlength2 must be non-zero, since it's strictly greater than something.
  6396. // We still need to be careful for NaNs, though, because the 4th power might cause problems.
  6397. const typename BoxData::Type qlength_m2 = typename BoxData::Type(1.0)/qlength2;
  6398. const typename BoxData::Type qlength_m1 = sqrt(qlength_m2);
  6399. // Normalize q to reduce issues with overflow/underflow, since we'd need the 7th power
  6400. // if we didn't normalize, and (1e-6)^-7 = 1e42, which overflows single-precision.
  6401. q *= qlength_m1;
  6402. typename BoxData::Type Omega_approx = -qlength_m2*dot(q,data.myN);
  6403. #if TAYLOR_SERIES_ORDER >= 1
  6404. const int order = myOrder;
  6405. if (order >= 1)
  6406. {
  6407. const UT_FixedVector<typename BoxData::Type,3> q2 = q*q;
  6408. const typename BoxData::Type qlength_m3 = qlength_m2*qlength_m1;
  6409. const typename BoxData::Type Omega_1 =
  6410. qlength_m3*(data.myNijDiag[0] + data.myNijDiag[1] + data.myNijDiag[2]
  6411. -typename BoxData::Type(3.0)*(dot(q2,data.myNijDiag) +
  6412. q[0]*q[1]*data.myNxy_Nyx +
  6413. q[0]*q[2]*data.myNzx_Nxz +
  6414. q[1]*q[2]*data.myNyz_Nzy));
  6415. Omega_approx += Omega_1;
  6416. #if TAYLOR_SERIES_ORDER >= 2
  6417. if (order >= 2)
  6418. {
  6419. const UT_FixedVector<typename BoxData::Type,3> q3 = q2*q;
  6420. const typename BoxData::Type qlength_m4 = qlength_m2*qlength_m2;
  6421. typename BoxData::Type temp0[3] = {
  6422. data.my2Nyyx_Nxyy+data.my2Nzzx_Nxzz,
  6423. data.my2Nzzy_Nyzz+data.my2Nxxy_Nyxx,
  6424. data.my2Nxxz_Nzxx+data.my2Nyyz_Nzyy
  6425. };
  6426. typename BoxData::Type temp1[3] = {
  6427. q[1]*data.my2Nxxy_Nyxx + q[2]*data.my2Nxxz_Nzxx,
  6428. q[2]*data.my2Nyyz_Nzyy + q[0]*data.my2Nyyx_Nxyy,
  6429. q[0]*data.my2Nzzx_Nxzz + q[1]*data.my2Nzzy_Nyzz
  6430. };
  6431. const typename BoxData::Type Omega_2 =
  6432. qlength_m4*(typename BoxData::Type(1.5)*dot(q, typename BoxData::Type(3)*data.myNijkDiag + UT_FixedVector<typename BoxData::Type,3>(temp0))
  6433. -typename BoxData::Type(7.5)*(dot(q3,data.myNijkDiag) + q[0]*q[1]*q[2]*data.mySumPermuteNxyz + dot(q2, UT_FixedVector<typename BoxData::Type,3>(temp1))));
  6434. Omega_approx += Omega_2;
  6435. }
  6436. #endif
  6437. }
  6438. #endif
  6439. // If q is so small that we got NaNs and we just have a
  6440. // small bounding box, it needs to descend.
  6441. const v4uu mask = Omega_approx.isFinite() & ~descend_mask;
  6442. Omega_approx = Omega_approx & mask;
  6443. descend_bitmask = (~_mm_movemask_ps(V4SF(mask.vector))) & allchildbits;
  6444. T sum = Omega_approx[0];
  6445. for (int i = 1; i < BVH_N; ++i)
  6446. sum += Omega_approx[i];
  6447. *data_for_parent = sum;
  6448. return descend_bitmask;
  6449. }
  6450. void item(const int itemi, const int /*parent_nodei*/, T &data_for_parent) const
  6451. {
  6452. const UT_Vector3T<S> *const positions = myPositions;
  6453. const int *const cur_triangle_points = myTrianglePoints + 3*itemi;
  6454. const UT_Vector3T<T> a = positions[cur_triangle_points[0]];
  6455. const UT_Vector3T<T> b = positions[cur_triangle_points[1]];
  6456. const UT_Vector3T<T> c = positions[cur_triangle_points[2]];
  6457. data_for_parent = UTsignedSolidAngleTri(a, b, c, myQueryPoint);
  6458. }
  6459. SYS_FORCE_INLINE void post(const int /*nodei*/, const int /*parent_nodei*/, T *data_for_parent, const int nchildren, const T *child_data_array, const uint descend_bits) const
  6460. {
  6461. T sum = (descend_bits&1) ? child_data_array[0] : 0;
  6462. for (int i = 1; i < nchildren; ++i)
  6463. sum += ((descend_bits>>i)&1) ? child_data_array[i] : 0;
  6464. *data_for_parent += sum;
  6465. }
  6466. };
  6467. const SolidAngleFunctors functors(myData.get(), query_point, accuracy_scale2, myOrder, myPositions, myTrianglePoints);
  6468. T sum;
  6469. myTree.traverseVector(functors, &sum);
  6470. return sum;
  6471. }
  6472. template<typename T,typename S>
  6473. struct UT_SubtendedAngle<T,S>::BoxData
  6474. {
  6475. void clear()
  6476. {
  6477. // Set everything to zero
  6478. memset(this,0,sizeof(*this));
  6479. }
  6480. using Type = typename std::conditional<BVH_N==4 && std::is_same<T,float>::value, v4uf, UT_FixedVector<T,BVH_N>>::type;
  6481. using SType = typename std::conditional<BVH_N==4 && std::is_same<S,float>::value, v4uf, UT_FixedVector<S,BVH_N>>::type;
  6482. /// An upper bound on the squared distance from myAverageP to the farthest point in the box.
  6483. SType myMaxPDist2;
  6484. /// Centre of mass of the mesh surface in this box
  6485. UT_FixedVector<Type,2> myAverageP;
  6486. /// Unnormalized, area-weighted normal of the mesh in this box
  6487. UT_FixedVector<Type,2> myN;
  6488. /// Values for Omega_1
  6489. /// @{
  6490. UT_FixedVector<Type,2> myNijDiag; // Nxx, Nyy
  6491. Type myNxy_Nyx; // Nxy+Nyx
  6492. /// @}
  6493. /// Values for Omega_2
  6494. /// @{
  6495. UT_FixedVector<Type,2> myNijkDiag; // Nxxx, Nyyy
  6496. Type my2Nxxy_Nyxx; // Nxxy+Nxyx+Nyxx = 2Nxxy+Nyxx
  6497. Type my2Nyyx_Nxyy; // Nyyx+Nyxy+Nxyy = 2Nyyx+Nxyy
  6498. /// @}
  6499. };
  6500. template<typename T,typename S>
  6501. inline UT_SubtendedAngle<T,S>::UT_SubtendedAngle()
  6502. : myTree()
  6503. , myNBoxes(0)
  6504. , myOrder(2)
  6505. , myData(nullptr)
  6506. , myNSegments(0)
  6507. , mySegmentPoints(nullptr)
  6508. , myNPoints(0)
  6509. , myPositions(nullptr)
  6510. {}
  6511. template<typename T,typename S>
  6512. inline UT_SubtendedAngle<T,S>::~UT_SubtendedAngle()
  6513. {
  6514. // Default destruction works, but this needs to be outlined
  6515. // to avoid having to include UT_BVHImpl.h in the header,
  6516. // (for the UT_UniquePtr destructor.)
  6517. }
  6518. template<typename T,typename S>
  6519. inline void UT_SubtendedAngle<T,S>::init(
  6520. const int nsegments,
  6521. const int *const segment_points,
  6522. const int npoints,
  6523. const UT_Vector2T<S> *const positions,
  6524. const int order)
  6525. {
  6526. #if SOLID_ANGLE_DEBUG
  6527. UTdebugFormat("");
  6528. UTdebugFormat("");
  6529. UTdebugFormat("Building BVH for {} segments on {} points:", nsegments, npoints);
  6530. #endif
  6531. myOrder = order;
  6532. myNSegments = nsegments;
  6533. mySegmentPoints = segment_points;
  6534. myNPoints = npoints;
  6535. myPositions = positions;
  6536. #if SOLID_ANGLE_TIME_PRECOMPUTE
  6537. UT_StopWatch timer;
  6538. timer.start();
  6539. #endif
  6540. UT_SmallArray<UT::Box<S,2>> segment_boxes;
  6541. segment_boxes.setSizeNoInit(nsegments);
  6542. if (nsegments < 16*1024)
  6543. {
  6544. const int *cur_segment_points = segment_points;
  6545. for (int i = 0; i < nsegments; ++i, cur_segment_points += 2)
  6546. {
  6547. UT::Box<S,2> &box = segment_boxes[i];
  6548. box.initBounds(positions[cur_segment_points[0]]);
  6549. box.enlargeBounds(positions[cur_segment_points[1]]);
  6550. }
  6551. }
  6552. else
  6553. {
  6554. igl::parallel_for(nsegments,
  6555. [segment_points,&segment_boxes,positions](int i)
  6556. {
  6557. const int *cur_segment_points = segment_points + i*2;
  6558. UT::Box<S,2> &box = segment_boxes[i];
  6559. box.initBounds(positions[cur_segment_points[0]]);
  6560. box.enlargeBounds(positions[cur_segment_points[1]]);
  6561. });
  6562. }
  6563. #if SOLID_ANGLE_TIME_PRECOMPUTE
  6564. double time = timer.stop();
  6565. UTdebugFormat("{} s to create bounding boxes.", time);
  6566. timer.start();
  6567. #endif
  6568. myTree.template init<UT::BVH_Heuristic::BOX_AREA,S,2>(segment_boxes.array(), nsegments);
  6569. #if SOLID_ANGLE_TIME_PRECOMPUTE
  6570. time = timer.stop();
  6571. UTdebugFormat("{} s to initialize UT_BVH structure. {} nodes", time, myTree.getNumNodes());
  6572. #endif
  6573. //myTree.debugDump();
  6574. const int nnodes = myTree.getNumNodes();
  6575. myNBoxes = nnodes;
  6576. BoxData *box_data = new BoxData[nnodes];
  6577. myData.reset(box_data);
  6578. // Some data are only needed during initialization.
  6579. struct LocalData
  6580. {
  6581. // Bounding box
  6582. UT::Box<S,2> myBox;
  6583. // P and N are needed from each child for computing Nij.
  6584. UT_Vector2T<T> myAverageP;
  6585. UT_Vector2T<T> myLengthP;
  6586. UT_Vector2T<T> myN;
  6587. // Unsigned length is needed for computing the average position.
  6588. T myLength;
  6589. // These are needed for computing Nijk.
  6590. UT_Vector2T<T> myNijDiag;
  6591. T myNxy; T myNyx;
  6592. UT_Vector2T<T> myNijkDiag; // Nxxx, Nyyy
  6593. T my2Nxxy_Nyxx; // Nxxy+Nxyx+Nyxx = 2Nxxy+Nyxx
  6594. T my2Nyyx_Nxyy; // Nyyx+Nyxy+Nxyy = 2Nyyx+Nxyy
  6595. };
  6596. struct PrecomputeFunctors
  6597. {
  6598. BoxData *const myBoxData;
  6599. const UT::Box<S,2> *const mySegmentBoxes;
  6600. const int *const mySegmentPoints;
  6601. const UT_Vector2T<S> *const myPositions;
  6602. const int myOrder;
  6603. PrecomputeFunctors(
  6604. BoxData *box_data,
  6605. const UT::Box<S,2> *segment_boxes,
  6606. const int *segment_points,
  6607. const UT_Vector2T<S> *positions,
  6608. const int order)
  6609. : myBoxData(box_data)
  6610. , mySegmentBoxes(segment_boxes)
  6611. , mySegmentPoints(segment_points)
  6612. , myPositions(positions)
  6613. , myOrder(order)
  6614. {}
  6615. constexpr SYS_FORCE_INLINE bool pre(const int /*nodei*/, LocalData * /*data_for_parent*/) const
  6616. {
  6617. return true;
  6618. }
  6619. void item(const int itemi, const int /*parent_nodei*/, LocalData &data_for_parent) const
  6620. {
  6621. const UT_Vector2T<S> *const positions = myPositions;
  6622. const int *const cur_segment_points = mySegmentPoints + 2*itemi;
  6623. const UT_Vector2T<T> a = positions[cur_segment_points[0]];
  6624. const UT_Vector2T<T> b = positions[cur_segment_points[1]];
  6625. const UT_Vector2T<T> ab = b-a;
  6626. const UT::Box<S,2> &segment_box = mySegmentBoxes[itemi];
  6627. data_for_parent.myBox = segment_box;
  6628. // Length-weighted normal (unnormalized)
  6629. UT_Vector2T<T> N;
  6630. N[0] = ab[1];
  6631. N[1] = -ab[0];
  6632. const T length2 = ab.length2();
  6633. const T length = SYSsqrt(length2);
  6634. const UT_Vector2T<T> P = T(0.5)*(a+b);
  6635. data_for_parent.myAverageP = P;
  6636. data_for_parent.myLengthP = P*length;
  6637. data_for_parent.myN = N;
  6638. #if SOLID_ANGLE_DEBUG
  6639. UTdebugFormat("");
  6640. UTdebugFormat("Triangle {}: P = {}; N = {}; length = {}", itemi, P, N, length);
  6641. UTdebugFormat(" box = {}", data_for_parent.myBox);
  6642. #endif
  6643. data_for_parent.myLength = length;
  6644. const int order = myOrder;
  6645. if (order < 1)
  6646. return;
  6647. // NOTE: Due to P being at the centroid, segments have Nij = 0
  6648. // contributions to Nij.
  6649. data_for_parent.myNijDiag = T(0);
  6650. data_for_parent.myNxy = 0; data_for_parent.myNyx = 0;
  6651. if (order < 2)
  6652. return;
  6653. // If it's zero-length, the results are zero, so we can skip.
  6654. if (length == 0)
  6655. {
  6656. data_for_parent.myNijkDiag = T(0);
  6657. data_for_parent.my2Nxxy_Nyxx = 0;
  6658. data_for_parent.my2Nyyx_Nxyy = 0;
  6659. return;
  6660. }
  6661. T integral_xx = ab[0]*ab[0]/T(12);
  6662. T integral_xy = ab[0]*ab[1]/T(12);
  6663. T integral_yy = ab[1]*ab[1]/T(12);
  6664. data_for_parent.myNijkDiag[0] = integral_xx*N[0];
  6665. data_for_parent.myNijkDiag[1] = integral_yy*N[1];
  6666. T Nxxy = N[0]*integral_xy;
  6667. T Nyxx = N[1]*integral_xx;
  6668. T Nyyx = N[1]*integral_xy;
  6669. T Nxyy = N[0]*integral_yy;
  6670. data_for_parent.my2Nxxy_Nyxx = 2*Nxxy + Nyxx;
  6671. data_for_parent.my2Nyyx_Nxyy = 2*Nyyx + Nxyy;
  6672. #if SOLID_ANGLE_DEBUG
  6673. UTdebugFormat(" integral_xx = {}; yy = {}", integral_xx, integral_yy);
  6674. UTdebugFormat(" integral_xy = {}", integral_xy);
  6675. #endif
  6676. }
  6677. void post(const int nodei, const int /*parent_nodei*/, LocalData *data_for_parent, const int nchildren, const LocalData *child_data_array) const
  6678. {
  6679. // NOTE: Although in the general case, data_for_parent may be null for the root call,
  6680. // this functor assumes that it's non-null, so the call below must pass a non-null pointer.
  6681. BoxData &current_box_data = myBoxData[nodei];
  6682. UT_Vector2T<T> N = child_data_array[0].myN;
  6683. ((T*)&current_box_data.myN[0])[0] = N[0];
  6684. ((T*)&current_box_data.myN[1])[0] = N[1];
  6685. UT_Vector2T<T> lengthP = child_data_array[0].myLengthP;
  6686. T length = child_data_array[0].myLength;
  6687. const UT_Vector2T<T> local_P = child_data_array[0].myAverageP;
  6688. ((T*)&current_box_data.myAverageP[0])[0] = local_P[0];
  6689. ((T*)&current_box_data.myAverageP[1])[0] = local_P[1];
  6690. for (int i = 1; i < nchildren; ++i)
  6691. {
  6692. const UT_Vector2T<T> local_N = child_data_array[i].myN;
  6693. N += local_N;
  6694. ((T*)&current_box_data.myN[0])[i] = local_N[0];
  6695. ((T*)&current_box_data.myN[1])[i] = local_N[1];
  6696. lengthP += child_data_array[i].myLengthP;
  6697. length += child_data_array[i].myLength;
  6698. const UT_Vector2T<T> local_P = child_data_array[i].myAverageP;
  6699. ((T*)&current_box_data.myAverageP[0])[i] = local_P[0];
  6700. ((T*)&current_box_data.myAverageP[1])[i] = local_P[1];
  6701. }
  6702. for (int i = nchildren; i < BVH_N; ++i)
  6703. {
  6704. // Set to zero, just to avoid false positives for uses of uninitialized memory.
  6705. ((T*)&current_box_data.myN[0])[i] = 0;
  6706. ((T*)&current_box_data.myN[1])[i] = 0;
  6707. ((T*)&current_box_data.myAverageP[0])[i] = 0;
  6708. ((T*)&current_box_data.myAverageP[1])[i] = 0;
  6709. }
  6710. data_for_parent->myN = N;
  6711. data_for_parent->myLengthP = lengthP;
  6712. data_for_parent->myLength = length;
  6713. UT::Box<S,2> box(child_data_array[0].myBox);
  6714. for (int i = 1; i < nchildren; ++i)
  6715. box.combine(child_data_array[i].myBox);
  6716. // Normalize P
  6717. UT_Vector2T<T> averageP;
  6718. if (length > 0)
  6719. averageP = lengthP/length;
  6720. else
  6721. averageP = T(0.5)*(box.getMin() + box.getMax());
  6722. data_for_parent->myAverageP = averageP;
  6723. data_for_parent->myBox = box;
  6724. for (int i = 0; i < nchildren; ++i)
  6725. {
  6726. const UT::Box<S,2> &local_box(child_data_array[i].myBox);
  6727. const UT_Vector2T<T> &local_P = child_data_array[i].myAverageP;
  6728. const UT_Vector2T<T> maxPDiff = SYSmax(local_P-UT_Vector2T<T>(local_box.getMin()), UT_Vector2T<T>(local_box.getMax())-local_P);
  6729. ((T*)&current_box_data.myMaxPDist2)[i] = maxPDiff.length2();
  6730. }
  6731. for (int i = nchildren; i < BVH_N; ++i)
  6732. {
  6733. // This child is non-existent. If we set myMaxPDist2 to infinity, it will never
  6734. // use the approximation, and the traverseVector function can check for EMPTY.
  6735. ((T*)&current_box_data.myMaxPDist2)[i] = std::numeric_limits<T>::infinity();
  6736. }
  6737. const int order = myOrder;
  6738. if (order >= 1)
  6739. {
  6740. // We now have the current box's P, so we can adjust Nij and Nijk
  6741. data_for_parent->myNijDiag = child_data_array[0].myNijDiag;
  6742. data_for_parent->myNxy = 0;
  6743. data_for_parent->myNyx = 0;
  6744. data_for_parent->myNijkDiag = child_data_array[0].myNijkDiag;
  6745. data_for_parent->my2Nxxy_Nyxx = child_data_array[0].my2Nxxy_Nyxx;
  6746. data_for_parent->my2Nyyx_Nxyy = child_data_array[0].my2Nyyx_Nxyy;
  6747. for (int i = 1; i < nchildren; ++i)
  6748. {
  6749. data_for_parent->myNijDiag += child_data_array[i].myNijDiag;
  6750. data_for_parent->myNijkDiag += child_data_array[i].myNijkDiag;
  6751. data_for_parent->my2Nxxy_Nyxx += child_data_array[i].my2Nxxy_Nyxx;
  6752. data_for_parent->my2Nyyx_Nxyy += child_data_array[i].my2Nyyx_Nxyy;
  6753. }
  6754. for (int j = 0; j < 2; ++j)
  6755. ((T*)&current_box_data.myNijDiag[j])[0] = child_data_array[0].myNijDiag[j];
  6756. ((T*)&current_box_data.myNxy_Nyx)[0] = child_data_array[0].myNxy + child_data_array[0].myNyx;
  6757. for (int j = 0; j < 2; ++j)
  6758. ((T*)&current_box_data.myNijkDiag[j])[0] = child_data_array[0].myNijkDiag[j];
  6759. ((T*)&current_box_data.my2Nxxy_Nyxx)[0] = child_data_array[0].my2Nxxy_Nyxx;
  6760. ((T*)&current_box_data.my2Nyyx_Nxyy)[0] = child_data_array[0].my2Nyyx_Nxyy;
  6761. for (int i = 1; i < nchildren; ++i)
  6762. {
  6763. for (int j = 0; j < 2; ++j)
  6764. ((T*)&current_box_data.myNijDiag[j])[i] = child_data_array[i].myNijDiag[j];
  6765. ((T*)&current_box_data.myNxy_Nyx)[i] = child_data_array[i].myNxy + child_data_array[i].myNyx;
  6766. for (int j = 0; j < 2; ++j)
  6767. ((T*)&current_box_data.myNijkDiag[j])[i] = child_data_array[i].myNijkDiag[j];
  6768. ((T*)&current_box_data.my2Nxxy_Nyxx)[i] = child_data_array[i].my2Nxxy_Nyxx;
  6769. ((T*)&current_box_data.my2Nyyx_Nxyy)[i] = child_data_array[i].my2Nyyx_Nxyy;
  6770. }
  6771. for (int i = nchildren; i < BVH_N; ++i)
  6772. {
  6773. // Set to zero, just to avoid false positives for uses of uninitialized memory.
  6774. for (int j = 0; j < 2; ++j)
  6775. ((T*)&current_box_data.myNijDiag[j])[i] = 0;
  6776. ((T*)&current_box_data.myNxy_Nyx)[i] = 0;
  6777. for (int j = 0; j < 2; ++j)
  6778. ((T*)&current_box_data.myNijkDiag[j])[i] = 0;
  6779. ((T*)&current_box_data.my2Nxxy_Nyxx)[i] = 0;
  6780. ((T*)&current_box_data.my2Nyyx_Nxyy)[i] = 0;
  6781. }
  6782. for (int i = 0; i < nchildren; ++i)
  6783. {
  6784. const LocalData &child_data = child_data_array[i];
  6785. UT_Vector2T<T> displacement = child_data.myAverageP - UT_Vector2T<T>(data_for_parent->myAverageP);
  6786. UT_Vector2T<T> N = child_data.myN;
  6787. // Adjust Nij for the change in centre P
  6788. data_for_parent->myNijDiag += N*displacement;
  6789. T Nxy = child_data.myNxy + N[0]*displacement[1];
  6790. T Nyx = child_data.myNyx + N[1]*displacement[0];
  6791. data_for_parent->myNxy += Nxy;
  6792. data_for_parent->myNyx += Nyx;
  6793. if (order >= 2)
  6794. {
  6795. // Adjust Nijk for the change in centre P
  6796. data_for_parent->myNijkDiag += T(2)*displacement*child_data.myNijDiag + displacement*displacement*child_data.myN;
  6797. data_for_parent->my2Nxxy_Nyxx +=
  6798. 2*(displacement[1]*child_data.myNijDiag[0] + displacement[0]*child_data.myNxy + N[0]*displacement[0]*displacement[1])
  6799. + 2*child_data.myNyx*displacement[0] + N[1]*displacement[0]*displacement[0];
  6800. data_for_parent->my2Nyyx_Nxyy +=
  6801. 2*(displacement[0]*child_data.myNijDiag[1] + displacement[1]*child_data.myNyx + N[1]*displacement[1]*displacement[0])
  6802. + 2*child_data.myNxy*displacement[1] + N[0]*displacement[1]*displacement[1];
  6803. }
  6804. }
  6805. }
  6806. #if SOLID_ANGLE_DEBUG
  6807. UTdebugFormat("");
  6808. UTdebugFormat("Node {}: nchildren = {}; maxP = {}", nodei, nchildren, SYSsqrt(current_box_data.myMaxPDist2));
  6809. UTdebugFormat(" P = {}; N = {}", current_box_data.myAverageP, current_box_data.myN);
  6810. UTdebugFormat(" Nii = {}", current_box_data.myNijDiag);
  6811. UTdebugFormat(" Nxy+Nyx = {}", current_box_data.myNxy_Nyx);
  6812. UTdebugFormat(" Niii = {}", current_box_data.myNijkDiag);
  6813. UTdebugFormat(" 2Nxxy+Nyxx = {}; 2Nyyx+Nxyy = {}", current_box_data.my2Nxxy_Nyxx, current_box_data.my2Nyyx_Nxyy);
  6814. #endif
  6815. }
  6816. };
  6817. #if SOLID_ANGLE_TIME_PRECOMPUTE
  6818. timer.start();
  6819. #endif
  6820. const PrecomputeFunctors functors(box_data, segment_boxes.array(), segment_points, positions, order);
  6821. // NOTE: post-functor relies on non-null data_for_parent, so we have to pass one.
  6822. LocalData local_data;
  6823. myTree.template traverseParallel<LocalData>(4096, functors, &local_data);
  6824. //myTree.template traverse<LocalData>(functors);
  6825. #if SOLID_ANGLE_TIME_PRECOMPUTE
  6826. time = timer.stop();
  6827. UTdebugFormat("{} s to precompute coefficients.", time);
  6828. #endif
  6829. }
  6830. template<typename T,typename S>
  6831. inline void UT_SubtendedAngle<T, S>::clear()
  6832. {
  6833. myTree.clear();
  6834. myNBoxes = 0;
  6835. myOrder = 2;
  6836. myData.reset();
  6837. myNSegments = 0;
  6838. mySegmentPoints = nullptr;
  6839. myNPoints = 0;
  6840. myPositions = nullptr;
  6841. }
  6842. template<typename T,typename S>
  6843. inline T UT_SubtendedAngle<T, S>::computeAngle(const UT_Vector2T<T> &query_point, const T accuracy_scale) const
  6844. {
  6845. const T accuracy_scale2 = accuracy_scale*accuracy_scale;
  6846. struct AngleFunctors
  6847. {
  6848. const BoxData *const myBoxData;
  6849. const UT_Vector2T<T> myQueryPoint;
  6850. const T myAccuracyScale2;
  6851. const UT_Vector2T<S> *const myPositions;
  6852. const int *const mySegmentPoints;
  6853. const int myOrder;
  6854. AngleFunctors(
  6855. const BoxData *const box_data,
  6856. const UT_Vector2T<T> &query_point,
  6857. const T accuracy_scale2,
  6858. const int order,
  6859. const UT_Vector2T<S> *const positions,
  6860. const int *const segment_points)
  6861. : myBoxData(box_data)
  6862. , myQueryPoint(query_point)
  6863. , myAccuracyScale2(accuracy_scale2)
  6864. , myOrder(order)
  6865. , myPositions(positions)
  6866. , mySegmentPoints(segment_points)
  6867. {}
  6868. uint pre(const int nodei, T *data_for_parent) const
  6869. {
  6870. const BoxData &data = myBoxData[nodei];
  6871. const typename BoxData::Type maxP2 = data.myMaxPDist2;
  6872. UT_FixedVector<typename BoxData::Type,2> q;
  6873. q[0] = typename BoxData::Type(myQueryPoint[0]);
  6874. q[1] = typename BoxData::Type(myQueryPoint[1]);
  6875. q -= data.myAverageP;
  6876. const typename BoxData::Type qlength2 = q[0]*q[0] + q[1]*q[1];
  6877. // If the query point is within a factor of accuracy_scale of the box radius,
  6878. // it's assumed to be not a good enough approximation, so it needs to descend.
  6879. // TODO: Is there a way to estimate the error?
  6880. static_assert((std::is_same<typename BoxData::Type,v4uf>::value), "FIXME: Implement support for other tuple types!");
  6881. v4uu descend_mask = (qlength2 <= maxP2*myAccuracyScale2);
  6882. uint descend_bitmask = _mm_movemask_ps(V4SF(descend_mask.vector));
  6883. constexpr uint allchildbits = ((uint(1)<<BVH_N)-1);
  6884. if (descend_bitmask == allchildbits)
  6885. {
  6886. *data_for_parent = 0;
  6887. return allchildbits;
  6888. }
  6889. // qlength2 must be non-zero, since it's strictly greater than something.
  6890. // We still need to be careful for NaNs, though, because the 4th power might cause problems.
  6891. const typename BoxData::Type qlength_m2 = typename BoxData::Type(1.0)/qlength2;
  6892. const typename BoxData::Type qlength_m1 = sqrt(qlength_m2);
  6893. // Normalize q to reduce issues with overflow/underflow, since we'd need the 6th power
  6894. // if we didn't normalize, and (1e-7)^-6 = 1e42, which overflows single-precision.
  6895. q *= qlength_m1;
  6896. typename BoxData::Type Omega_approx = -qlength_m1*dot(q,data.myN);
  6897. const int order = myOrder;
  6898. if (order >= 1)
  6899. {
  6900. const UT_FixedVector<typename BoxData::Type,2> q2 = q*q;
  6901. const typename BoxData::Type Omega_1 =
  6902. qlength_m2*(data.myNijDiag[0] + data.myNijDiag[1]
  6903. -typename BoxData::Type(2.0)*(dot(q2,data.myNijDiag) +
  6904. q[0]*q[1]*data.myNxy_Nyx));
  6905. Omega_approx += Omega_1;
  6906. if (order >= 2)
  6907. {
  6908. const UT_FixedVector<typename BoxData::Type,2> q3 = q2*q;
  6909. const typename BoxData::Type qlength_m3 = qlength_m2*qlength_m1;
  6910. typename BoxData::Type temp0[2] = {
  6911. data.my2Nyyx_Nxyy,
  6912. data.my2Nxxy_Nyxx
  6913. };
  6914. typename BoxData::Type temp1[2] = {
  6915. q[1]*data.my2Nxxy_Nyxx,
  6916. q[0]*data.my2Nyyx_Nxyy
  6917. };
  6918. const typename BoxData::Type Omega_2 =
  6919. qlength_m3*(dot(q, typename BoxData::Type(3)*data.myNijkDiag + UT_FixedVector<typename BoxData::Type,2>(temp0))
  6920. -typename BoxData::Type(4.0)*(dot(q3,data.myNijkDiag) + dot(q2, UT_FixedVector<typename BoxData::Type,2>(temp1))));
  6921. Omega_approx += Omega_2;
  6922. }
  6923. }
  6924. // If q is so small that we got NaNs and we just have a
  6925. // small bounding box, it needs to descend.
  6926. const v4uu mask = Omega_approx.isFinite() & ~descend_mask;
  6927. Omega_approx = Omega_approx & mask;
  6928. descend_bitmask = (~_mm_movemask_ps(V4SF(mask.vector))) & allchildbits;
  6929. T sum = Omega_approx[0];
  6930. for (int i = 1; i < BVH_N; ++i)
  6931. sum += Omega_approx[i];
  6932. *data_for_parent = sum;
  6933. return descend_bitmask;
  6934. }
  6935. void item(const int itemi, const int /*parent_nodei*/, T &data_for_parent) const
  6936. {
  6937. const UT_Vector2T<S> *const positions = myPositions;
  6938. const int *const cur_segment_points = mySegmentPoints + 2*itemi;
  6939. const UT_Vector2T<T> a = positions[cur_segment_points[0]];
  6940. const UT_Vector2T<T> b = positions[cur_segment_points[1]];
  6941. data_for_parent = UTsignedAngleSegment(a, b, myQueryPoint);
  6942. }
  6943. SYS_FORCE_INLINE void post(const int /*nodei*/, const int /*parent_nodei*/, T *data_for_parent, const int nchildren, const T *child_data_array, const uint descend_bits) const
  6944. {
  6945. T sum = (descend_bits&1) ? child_data_array[0] : 0;
  6946. for (int i = 1; i < nchildren; ++i)
  6947. sum += ((descend_bits>>i)&1) ? child_data_array[i] : 0;
  6948. *data_for_parent += sum;
  6949. }
  6950. };
  6951. const AngleFunctors functors(myData.get(), query_point, accuracy_scale2, myOrder, myPositions, mySegmentPoints);
  6952. T sum;
  6953. myTree.traverseVector(functors, &sum);
  6954. return sum;
  6955. }
  6956. // Instantiate our templates.
  6957. //template class UT_SolidAngle<fpreal32,fpreal32>;
  6958. // FIXME: The SIMD parts will need to be handled differently in order to support fpreal64.
  6959. //template class UT_SolidAngle<fpreal64,fpreal32>;
  6960. //template class UT_SolidAngle<fpreal64,fpreal64>;
  6961. //template class UT_SubtendedAngle<fpreal32,fpreal32>;
  6962. //template class UT_SubtendedAngle<fpreal64,fpreal32>;
  6963. //template class UT_SubtendedAngle<fpreal64,fpreal64>;
  6964. } // End HDK_Sample namespace
  6965. }}