| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111 |
- //===================================================================================
- // Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
- //
- // Permission is hereby granted, free of charge, to any person obtaining a copy
- // of this software and associated documentation files(the "Software"), to deal
- // in the Software without restriction, including without limitation the rights
- // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
- // copies of the Software, and to permit persons to whom the Software is
- // furnished to do so, subject to the following conditions :
- //
- // The above copyright notice and this permission notice shall be included in
- // all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- // THE SOFTWARE.
- //
- //==================================================================================
- //----------------------------------------------------------------------------------
- // File: BC7Encode.hlsl
- //
- // The Compute Shader for BC7 Encoder
- //
- // Copyright (c) Microsoft Corporation. All rights reserved.
- // Licensed under the MIT License.
- //----------------------------------------------------------------------------------
- #ifdef ASPM_GPU
- #pragma warning(disable : 3078) // "loop control variable conflicts with a previous declaration in the outer scope"
- #else // using CPU
- #include "common_def.h"
- #include "bcn_common_api.h"
- #include <algorithm>
- #endif
- // TryMode456CS
- #define ENABLE_MODE4
- #define ENABLE_MODE5
- #define ENABLE_MODE6
- // TryMode02CS
- #define ENABLE_MODE0
- #define ENABLE_MODE2
- // TryMode137CS
- #define ENABLE_MODE1
- #define ENABLE_MODE3
- #define ENABLE_MODE7
- //#define ENABLE_CMP_MODE0
- //#define ENABLE_CMP_MODE1
- //#define ENABLE_CMP_MODE2
- //#define ENABLE_CMP_MODE3
- //#define ENABLE_CMP_MODE4
- //#define ENABLE_CMP_MODE5
- #define ENABLE_CMP_MODE6
- //#define ENABLE_CMP_MODE7
- #define ENABLE_CMP_API
- #define USE_NEW_SP_ERR_IDX
- #define ENABLE_CMP_REFINE_MODE6_API // API to improve mode 6 quality
- #define MAX_TRY_SHAKER 1 // used in cmp_ep_shaker
- //====================================================================================
- // HLSL Host Simulation
- //====================================================================================
- // Simulate HLSL compute code on a CPU host must run single treaded
- // On cpu the code simulates a single compute unit as used by CMP DXC host
- // Enable SIMULATE_GPU to run simulation in CPU using HPC in CMP GUI or CMP CLI
- // Note: some bcn_encode_kernel.cpp files have specific code you simulate with, enable
- // the define USE_NEW_SINGLE_HEADER_INTERFACES and pick the external or local codec
- // to run with.
- //===========================================================================
- // Prototype to degug a simple simulation of shader using shared global data
- // run as single thread on CPU
- // #define SIMULATE_GPU
- //===========================================================================
- #if !defined(ASPM_GPU)
- #define THREAD_GROUP_SIZE 64
- #define BLOCK_SIZE_X 4
- #define BLOCK_SIZE_Y 4
- #define MAX_UINT 0xFFFFFFFF
- #define MIN_UINT 0x00000000
-
- // Source Texture to process
- // Texture2D g_Input;
- // Normalized 0..1
- struct Texture2D
- {
- CGU_Vec4f Texture[16];
- CGU_Vec4f Load(CGU_Vec3ui index)
- {
- CGU_INT offset;
- offset = (index.x + (index.y * 4)) & 0x0F;
- return Texture[offset];
- };
- CGU_Vec4f Load(CGU_Vec3ui index, CGU_UINT32 z)
- {
- CMP_UNUSED(z);
- CGU_INT offset;
- offset = (index.x + (index.y * 4)) & 0x0F;
- return Texture[offset];
- };
- // Ignoring z in Texture2D load
- CGU_Vec4ui Load(CGU_Vec4ui index)
- {
- CGU_INT offset;
- offset = (index.x + (index.y * 4)) & 0x0F;
- // implicit conversion of float to uint
- CGU_Vec4ui res;
- res.x = Texture[offset].x;
- res.y = Texture[offset].y;
- res.z = Texture[offset].z;
- res.w = Texture[offset].w;
- return res;
- };
- };
- // matches GPU struct in HLSL
- struct BufferShared
- {
- CGU_Vec4ui pixel;
- CGU_UINT32 error;
- CGU_UINT32 mode;
- CGU_UINT32 partition;
- CGU_UINT32 index_selector;
- CGU_UINT32 rotation;
- CGU_UINT32 pbit;
- CGU_Vec4ui endPoint_low;
- CGU_Vec4ui endPoint_high;
- CGU_Vec4ui endPoint_low_quantized;
- CGU_Vec4ui endPoint_high_quantized;
- CGU_UINT32 colorindex;
- CGU_UINT32 alphaindex;
- };
- struct SharedIOData
- {
- CGU_UINT32 error;
- CGU_UINT32 mode;
- CGU_UINT32 index_selector;
- CGU_UINT32 rotation;
- CGU_UINT32 partition;
- CGU_Vec4ui data2;
- };
- CMP_STATIC BufferShared shared_temp[THREAD_GROUP_SIZE];
- CMP_STATIC Texture2D g_Input;
- // cbuffer input: On cpu will use 1 block
- CMP_STATIC CGU_UINT32 g_tex_width; // Not used in HLSLHost simulation code
- CMP_STATIC CGU_UINT32 g_num_block_x = 1;
- CMP_STATIC CGU_UINT32 g_format; // Not used in HLSLHost simulation code
- CMP_STATIC CGU_UINT32 g_mode_id = 1;
- CMP_STATIC CGU_UINT32 g_start_block_id = 0;
- CMP_STATIC CGU_UINT32 g_num_total_blocks;
- CMP_STATIC CGU_FLOAT g_alpha_weight = 1.0f;
- CMP_STATIC CGU_FLOAT g_quality = 1.0f;
- CMP_STATIC SharedIOData g_InBuff[THREAD_GROUP_SIZE];
- CMP_STATIC CGU_Vec4ui g_OutBuff[THREAD_GROUP_SIZE]; // Used by EncodeBlocks & TryMode...
- CMP_STATIC SharedIOData g_OutBuff1[THREAD_GROUP_SIZE]; // Used by TryMode...
- // Forward definitions
- void TryMode456CS(CGU_UINT32 GI CMP_SVGROUPINDEX, CGU_Vec3ui groupID CMP_SVGROUPID);
- void TryMode137CS(CGU_UINT32 GI CMP_SVGROUPINDEX, CGU_Vec3ui groupID CMP_SVGROUPID);
- void TryMode02CS( CGU_UINT32 GI CMP_SVGROUPINDEX, CGU_Vec3ui groupID CMP_SVGROUPID);
- void EncodeBlocks(CGU_UINT32 GI CMP_SVGROUPINDEX, CGU_Vec3ui groupID CMP_SVGROUPID);
- CMP_STATIC void HLSLHost(CGU_Vec4f image_src[16])
- {
- //====================================
- // Simulate a single block CS
- //====================================
- // Load image_src
- CGU_Vec4ui imageBlock[16];
- for (CGU_INT i = 0; i < 16; i++)
- {
- g_Input.Texture[i].x = image_src[i].x / 255.0f;
- g_Input.Texture[i].y = image_src[i].y / 255.0f;
- g_Input.Texture[i].z = image_src[i].z / 255.0f;
- g_Input.Texture[i].w = image_src[i].w / 255.0f;
- }
-
- // Init global Buffers for first time use
- for (CGU_INT i = 0; i < THREAD_GROUP_SIZE; i++)
- {
- memset(&shared_temp[i], 0, sizeof(BufferShared));
- memset(&g_InBuff[i], 0, sizeof(SharedIOData));
- memset(&g_OutBuff1[i], 0, sizeof(SharedIOData));
- }
-
- // First Shader call
- CGU_Vec3ui SV_GroupID = {0, 0, 0}; // = Dispatch (1..(n-1),1,1) where n = number of (4x4) blocks in the image;
- CGU_Vec3ui SV_GrounThreadID = {0, 0, 0};
- g_start_block_id = 0;
-
- // // Global Group Memory Sync for Pixel
- // for (CGU_INT i = 0; i < 16; i++)
- // {
- // CGU_Vec4f px = g_Input.Load(CGU_Vec3ui(i % 4, i / 4, 0));
- // px = cmp_clampVec4f(px * 255.0f, 0.0f, 255.0f);
- // //printf("in px[%2d] %3.0f %3.0f %3.0f\n",i, px.x, px.y, px.z);
- // shared_temp[i].pixel.r = (CGU_UINT32)px.r;
- // shared_temp[i].pixel.g = (CGU_UINT32)px.g;
- // shared_temp[i].pixel.b = (CGU_UINT32)px.b;
- // shared_temp[i].pixel.a = (CGU_UINT32)px.a;
- // }
-
- g_mode_id = 6;
- for (CGU_INT SV_GroupIndex = 15; SV_GroupIndex >= 0; SV_GroupIndex--)
- {
- TryMode456CS(SV_GroupIndex, SV_GroupID);
- }
-
- // Return Outbuff back to inbuff for next CS use
- for (CGU_INT i = 0; i < THREAD_GROUP_SIZE; i++)
- {
- memcpy(&g_InBuff[i], &g_OutBuff1[i], sizeof(SharedIOData));
- }
-
- // Global Group Memory Sync for Pixel
- //for (CGU_INT i = 0; i < 16; i++)
- //{
- // CGU_Vec4f px = g_Input.Load(CGU_Vec3ui(i % 4, i / 4, 0));
- // px = cmp_clampVec4f(px * 255.0f, 0.0f, 255.0f);
- // shared_temp[i].pixel.r = (CGU_UINT32)px.r;
- // shared_temp[i].pixel.g = (CGU_UINT32)px.g;
- // shared_temp[i].pixel.b = (CGU_UINT32)px.b;
- // shared_temp[i].pixel.a = (CGU_UINT32)px.a;
- //}
-
- // Next Shader call
- g_mode_id = 1;
- for (CGU_INT SV_GroupIndex = 63; SV_GroupIndex >= 0; SV_GroupIndex--)
- {
- TryMode137CS(SV_GroupIndex, SV_GroupID);
- }
-
- // Return Outbuff back to inbuff for next shader call
- for (CGU_INT i = 0; i < THREAD_GROUP_SIZE; i++)
- {
- memcpy(&g_InBuff[i], &g_OutBuff1[i], sizeof(SharedIOData));
- }
-
- // Final Shader call
- for (CGU_INT SV_GroupIndex = 15; SV_GroupIndex >= 0; SV_GroupIndex--)
- {
- EncodeBlocks(SV_GroupIndex, SV_GroupID);
- }
- }
- #endif
- #ifdef ENABLE_CMP_API
- // Change this to CGU_Vec4ui par_vectors42_nd[4][2];
- CMP_STATIC CMP_CONSTANT CGU_UINT32 par_vectors42_nd[4][2][4] = {
- // type = 2
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, // 0 {0,0}
- {{0, 0, 0, 0}, {1, 1, 1, 1}}, // 1 {0,1}
- {{1, 1, 1, 1}, {0, 0, 0, 0}}, // 2 {1,0}
- {{1, 1, 1, 1}, {1, 1, 1, 1}} // 3 {1,1}
- };
- #define COMP_RED 0
- #define COMP_GREEN 1
- #define COMP_BLUE 2
- #define COMP_ALPHA 3
- typedef struct
- {
- CGU_UINT32 numPartitionModes;
- CGU_UINT32 maxSubSets;
- CGU_UINT32 channels3or4;
- CGU_UINT32 bits;
- CGU_UINT32 clusters;
- CGU_UINT32 componentBits;
- CGU_UINT32 partitionBits;
- CGU_UINT32 indexBits;
- } MODESETTINGS;
- CMP_STATIC CMP_CONSTANT MODESETTINGS g_modesettings[8] = {
- // numPartitionModes,maxSubSets channels3or4, bits, clusters, componentBits, partitionBits, indexBits
- {16, 3, 3, 26, 8, 4, 4, 3}, // Mode 0
- {64, 2, 3, 37, 8, 6, 6, 3}, // Mode 1
- {64, 3, 3, 30, 4, 5, 6, 2}, // Mode 2
- {64, 2, 3, 44, 4, 7, 6, 2}, // Mode 3
- { 0, 0, 0, 0, 0, 0, 0, 2}, // Mode 4
- { 0, 0, 0, 0, 0, 0, 0, 2}, // Mode 5
- { 0, 0, 4, 58, 16, 7, 0, 4}, // Mode 6
- {64, 2, 4, 42, 4, 5, 6, 2} // Mode 7
- };
- #ifndef ASPM_HLSL //=======================================================
- CMP_STATIC CMP_CONSTANT CGU_UINT32 subset_mask_table2[128] = {
- // 2 subset region patterns
- 0x0000CCCCu, // 0 1100 1100 1100 1100 (MSB..LSB)
- 0x00008888u, // 1 1000 1000 1000 1000
- 0x0000EEEEu, // 2 1110 1110 1110 1110
- 0x0000ECC8u, // 3 1110 1100 1100 1000
- 0x0000C880u, // 4 1100 1000 1000 0000
- 0x0000FEECu, // 5 1111 1110 1110 1100
- 0x0000FEC8u, // 6 1111 1110 1100 1000
- 0x0000EC80u, // 7 1110 1100 1000 0000
- 0x0000C800u, // 8 1100 1000 0000 0000
- 0x0000FFECu, // 9 1111 1111 1110 1100
- 0x0000FE80u, // 10 1111 1110 1000 0000
- 0x0000E800u, // 11 1110 1000 0000 0000
- 0x0000FFE8u, // 12 1111 1111 1110 1000
- 0x0000FF00u, // 13 1111 1111 0000 0000
- 0x0000FFF0u, // 14 1111 1111 1111 0000
- 0x0000F000u, // 15 1111 0000 0000 0000
- 0x0000F710u, // 16 1111 0111 0001 0000
- 0x0000008Eu, // 17 0000 0000 1000 1110
- 0x00007100u, // 18 0111 0001 0000 0000
- 0x000008CEu, // 19 0000 1000 1100 1110
- 0x0000008Cu, // 20 0000 0000 1000 1100
- 0x00007310u, // 21 0111 0011 0001 0000
- 0x00003100u, // 22 0011 0001 0000 0000
- 0x00008CCEu, // 23 1000 1100 1100 1110
- 0x0000088Cu, // 24 0000 1000 1000 1100
- 0x00003110u, // 25 0011 0001 0001 0000
- 0x00006666u, // 26 0110 0110 0110 0110
- 0x0000366Cu, // 27 0011 0110 0110 1100
- 0x000017E8u, // 28 0001 0111 1110 1000
- 0x00000FF0u, // 29 0000 1111 1111 0000
- 0x0000718Eu, // 30 0111 0001 1000 1110
- 0x0000399Cu, // 31 0011 1001 1001 1100
- 0x0000AAAAu, // 32 1010 1010 1010 1010
- 0x0000F0F0u, // 33 1111 0000 1111 0000
- 0x00005A5Au, // 34 0101 1010 0101 1010
- 0x000033CCu, // 35 0011 0011 1100 1100
- 0x00003C3Cu, // 36 0011 1100 0011 1100
- 0x000055AAu, // 37 0101 0101 1010 1010
- 0x00009696u, // 38 1001 0110 1001 0110
- 0x0000A55Au, // 39 1010 0101 0101 1010
- 0x000073CEu, // 40 0111 0011 1100 1110
- 0x000013C8u, // 41 0001 0011 1100 1000
- 0x0000324Cu, // 42 0011 0010 0100 1100
- 0x00003BDCu, // 43 0011 1011 1101 1100
- 0x00006996u, // 44 0110 1001 1001 0110
- 0x0000C33Cu, // 45 1100 0011 0011 1100
- 0x00009966u, // 46 1001 1001 0110 0110
- 0x00000660u, // 47 0000 0110 0110 0000
- 0x00000272u, // 48 0000 0010 0111 0010
- 0x000004E4u, // 49 0000 0100 1110 0100
- 0x00004E40u, // 50 0100 1110 0100 0000
- 0x00002720u, // 51 0010 0111 0010 0000
- 0x0000C936u, // 52 1100 1001 0011 0110
- 0x0000936Cu, // 53 1001 0011 0110 1100
- 0x000039C6u, // 54 0011 1001 1100 0110
- 0x0000639Cu, // 55 0110 0011 1001 1100
- 0x00009336u, // 56 1001 0011 0011 0110
- 0x00009CC6u, // 57 1001 1100 1100 0110
- 0x0000817Eu, // 58 1000 0001 0111 1110
- 0x0000E718u, // 59 1110 0111 0001 1000
- 0x0000CCF0u, // 60 1100 1100 1111 0000
- 0x00000FCCu, // 61 0000 1111 1100 1100
- 0x00007744u, // 62 0111 0111 0100 0100
- 0x0000EE22u, // 63 1110 1110 0010 0010
- // 3 Subset region patterns
- 0xF60008CCu, // 0 1111 0110 0000 0000 : 0000 1000 1100 1100 = 2222122011001100 (MSB...LSB)
- 0x73008CC8u, // 1 0111 0011 0000 0000 : 1000 1100 1100 1000 = 1222112211001000
- 0x3310CC80u, // 2 0011 0011 0001 0000 : 1100 1100 1000 0000 = 1122112210020000
- 0x00CEEC00u, // 3 0000 0000 1100 1110 : 1110 1100 0000 0000 = 1110110022002220
- 0xCC003300u, // 4 1100 1100 0000 0000 : 0011 0011 0000 0000 = 2211221100000000
- 0xCC0000CCu, // 5 1100 1100 0000 0000 : 0000 0000 1100 1100 = 2200220011001100
- 0x00CCFF00u, // 6 0000 0000 1100 1100 : 1111 1111 0000 0000 = 1111111122002200
- 0x3300CCCCu, // 7 0011 0011 0000 0000 : 1100 1100 1100 1100 = 1122112211001100
- 0xF0000F00u, // 8 1111 0000 0000 0000 : 0000 1111 0000 0000 = 2222111100000000
- 0xF0000FF0u, // 9 1111 0000 0000 0000 : 0000 1111 1111 0000 = 2222111111110000
- 0xFF0000F0u, // 10 1111 1111 0000 0000 : 0000 0000 1111 0000 = 2222222211110000
- 0x88884444u, // 11 1000 1000 1000 1000 : 0100 0100 0100 0100 = 2100210021002100
- 0x88886666u, // 12 1000 1000 1000 1000 : 0110 0110 0110 0110 = 2110211021102110
- 0xCCCC2222u, // 13 1100 1100 1100 1100 : 0010 0010 0010 0010 = 2210221022102210
- 0xEC80136Cu, // 14 1110 1100 1000 0000 : 0001 0011 0110 1100 = 2221221121101100
- 0x7310008Cu, // 15 0111 0011 0001 0000 : 0000 0000 1000 1100 = 0222002210021100
- 0xC80036C8u, // 16 1100 1000 0000 0000 : 0011 0110 1100 1000 = 2211211011001000
- 0x310008CEu, // 17 0011 0001 0000 0000 : 0000 1000 1100 1110 = 0022100211001110
- 0xCCC03330u, // 18 1100 1100 1100 0000 : 0011 0011 0011 0000 = 2211221122110000
- 0x0CCCF000u, // 19 0000 1100 1100 1100 : 1111 0000 0000 0000 = 1111220022002200
- 0xEE0000EEu, // 20 1110 1110 0000 0000 : 0000 0000 1110 1110 = 2220222011101110
- 0x77008888u, // 21 0111 0111 0000 0000 : 1000 1000 1000 1000 = 1222122210001000
- 0xCC0022C0u, // 22 1100 1100 0000 0000 : 0010 0010 1100 0000 = 2210221011000000
- 0x33004430u, // 23 0011 0011 0000 0000 : 0100 0100 0011 0000 = 0122012200110000
- 0x00CC0C22u, // 24 0000 0000 1100 1100 : 0000 1100 0010 0010 = 0000110022102210
- 0xFC880344u, // 25 1111 1100 1000 1000 : 0000 0011 0100 0100 = 2222221121002100
- 0x06606996u, // 26 0000 0110 0110 0000 : 0110 1001 1001 0110 = 0110122112210110
- 0x66009960u, // 27 0110 0110 0000 0000 : 1001 1001 0110 0000 = 1221122101100000
- 0xC88C0330u, // 28 1100 1000 1000 1100 : 0000 0011 0011 0000 = 2200201120112200
- 0xF9000066u, // 29 1111 1001 0000 0000 : 0000 0000 0110 0110 = 2222200201100110
- 0x0CC0C22Cu, // 30 0000 1100 1100 0000 : 1100 0010 0010 1100 = 1100221022101100
- 0x73108C00u, // 31 0111 0011 0001 0000 : 1000 1100 0000 0000 = 1222112200020000
- 0xEC801300u, // 32 1110 1100 1000 0000 : 0001 0011 0000 0000 = 2221221120000000
- 0x08CEC400u, // 33 0000 1000 1100 1110 : 1100 0100 0000 0000 = 1100210022002220
- 0xEC80004Cu, // 34 1110 1100 1000 0000 : 0000 0000 0100 1100 = 2220220021001100
- 0x44442222u, // 35 0100 0100 0100 0100 : 0010 0010 0010 0010 = 0210021002100210
- 0x0F0000F0u, // 36 0000 1111 0000 0000 : 0000 0000 1111 0000 = 0000222211110000
- 0x49242492u, // 37 0100 1001 0010 0100 : 0010 0100 1001 0010 = 0210210210210210
- 0x42942942u, // 38 0100 0010 1001 0100 : 0010 1001 0100 0010 = 0210102121020210
- 0x0C30C30Cu, // 39 0000 1100 0011 0000 : 1100 0011 0000 1100 = 1100221100221100
- 0x03C0C03Cu, // 40 0000 0011 1100 0000 : 1100 0000 0011 1100 = 1100002222111100
- 0xFF0000AAu, // 41 1111 1111 0000 0000 : 0000 0000 1010 1010 = 2222222210101010
- 0x5500AA00u, // 42 0101 0101 0000 0000 : 1010 1010 0000 0000 = 1212121200000000
- 0xCCCC3030u, // 43 1100 1100 1100 1100 : 0011 0000 0011 0000 = 2211220022112200
- 0x0C0CC0C0u, // 44 0000 1100 0000 1100 : 1100 0000 1100 0000 = 1100220011002200
- 0x66669090u, // 45 0110 0110 0110 0110 : 1001 0000 1001 0000 = 1221022012210220
- 0x0FF0A00Au, // 46 0000 1111 1111 0000 : 1010 0000 0000 1010 = 1010222222221010
- 0x5550AAA0u, // 47 0101 0101 0101 0000 : 1010 1010 1010 0000 = 1212121212120000
- 0xF0000AAAu, // 48 1111 0000 0000 0000 : 0000 1010 1010 1010 = 2222101010101010
- 0x0E0EE0E0u, // 49 0000 1110 0000 1110 : 1110 0000 1110 0000 = 1110222011102220
- 0x88887070u, // 50 1000 1000 1000 1000 : 0111 0000 0111 0000 = 2111200021112000
- 0x99906660u, // 51 1001 1001 1001 0000 : 0110 0110 0110 0000 = 2112211221120000
- 0xE00E0EE0u, // 52 1110 0000 0000 1110 : 0000 1110 1110 0000 = 2220111011102220
- 0x88880770u, // 53 1000 1000 1000 1000 : 0000 0111 0111 0000 = 2000211121112000
- 0xF0000666u, // 54 1111 0000 0000 0000 : 0000 0110 0110 0110 = 2222011001100110
- 0x99006600u, // 55 1001 1001 0000 0000 : 0110 0110 0000 0000 = 2112211200000000
- 0xFF000066u, // 56 1111 1111 0000 0000 : 0000 0000 0110 0110 = 2222222201100110
- 0xC00C0CC0u, // 57 1100 0000 0000 1100 : 0000 1100 1100 0000 = 2200110011002200
- 0xCCCC0330u, // 58 1100 1100 1100 1100 : 0000 0011 0011 0000 = 2200221122112200
- 0x90006000u, // 59 1001 0000 0000 0000 : 0110 0000 0000 0000 = 2112000000000000
- 0x08088080u, // 60 0000 1000 0000 1000 : 1000 0000 1000 0000 = 1000200010002000
- 0xEEEE1010u, // 61 1110 1110 1110 1110 : 0001 0000 0001 0000 = 2221222022212220
- 0xFFF0000Au, // 62 1111 1111 1111 0000 : 0000 0000 0000 1010 = 2222222222221010
- 0x731008CEu, // 63 0111 0011 0001 0000 : 0000 1000 1100 1110 = 0222102211021110
- };
- CMP_STATIC CMP_CONSTANT CGU_UINT8 cmp_npv_nd[2][8] = {
- {1, 2, 4, 8, 16, 32, 0, 0}, // 3
- {1, 2, 4, 0, 0, 0, 0, 0} // 4
- };
- CMP_STATIC CMP_CONSTANT CGU_UINT8 cmp_par_vectors_nd[2][8][64][2][4] = {
- {
- // 3D
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{1, 1, 1, 0}, {1, 1, 1, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {1, 1, 1, 0}}, {{1, 1, 1, 0}, {0, 0, 0, 0}}, {{1, 1, 1, 0}, {1, 1, 1, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{1, 1, 0, 0}, {1, 1, 0, 0}}, {{1, 0, 1, 0}, {1, 0, 1, 0}}, {{0, 1, 1, 0}, {0, 1, 1, 0}}, {{0, 0, 0, 0}, {1, 1, 1, 0}},
- {{1, 1, 1, 0}, {0, 0, 0, 0}}, {{0, 1, 0, 0}, {0, 1, 0, 0}}, {{1, 1, 1, 0}, {1, 1, 1, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{1, 1, 0, 0}, {0, 0, 0, 0}}, {{1, 0, 1, 0}, {0, 0, 0, 0}}, {{0, 1, 1, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {1, 1, 0, 0}},
- {{1, 1, 0, 0}, {1, 1, 0, 0}}, {{1, 0, 1, 0}, {1, 1, 0, 0}}, {{0, 1, 1, 0}, {1, 1, 0, 0}}, {{0, 0, 0, 0}, {1, 0, 1, 0}}, {{1, 1, 0, 0}, {1, 0, 1, 0}},
- {{1, 0, 1, 0}, {1, 0, 1, 0}}, {{0, 1, 1, 0}, {1, 0, 1, 0}}, {{0, 0, 0, 0}, {0, 1, 1, 0}}, {{1, 1, 0, 0}, {0, 1, 1, 0}}, {{1, 0, 1, 0}, {0, 1, 1, 0}},
- {{0, 1, 1, 0}, {0, 1, 1, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{1, 1, 0, 0}, {0, 0, 0, 0}}, {{1, 0, 1, 0}, {0, 0, 0, 0}}, {{0, 1, 1, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {1, 1, 0, 0}},
- {{1, 1, 0, 0}, {1, 1, 0, 0}}, {{1, 0, 1, 0}, {1, 1, 0, 0}}, {{0, 1, 1, 0}, {1, 1, 0, 0}}, {{0, 0, 0, 0}, {1, 0, 1, 0}}, {{1, 1, 0, 0}, {1, 0, 1, 0}},
- {{1, 0, 1, 0}, {1, 0, 1, 0}}, {{0, 1, 1, 0}, {1, 0, 1, 0}}, {{0, 0, 0, 0}, {0, 1, 1, 0}}, {{1, 1, 0, 0}, {0, 1, 1, 0}}, {{1, 0, 1, 0}, {0, 1, 1, 0}},
- {{0, 1, 1, 0}, {0, 1, 1, 0}}, {{1, 0, 0, 0}, {1, 1, 1, 0}}, {{0, 1, 0, 0}, {1, 1, 1, 0}}, {{0, 0, 1, 0}, {1, 1, 1, 0}}, {{1, 1, 1, 0}, {1, 1, 1, 0}},
- {{1, 0, 0, 0}, {0, 0, 1, 0}}, {{0, 1, 0, 0}, {0, 0, 1, 0}}, {{0, 0, 1, 0}, {0, 0, 1, 0}}, {{1, 1, 1, 0}, {0, 0, 1, 0}}, {{1, 0, 0, 0}, {1, 0, 0, 0}},
- {{0, 1, 0, 0}, {1, 0, 0, 0}}, {{0, 0, 1, 0}, {1, 0, 0, 0}}, {{1, 1, 1, 0}, {1, 0, 0, 0}}, {{1, 0, 0, 0}, {0, 1, 0, 0}}, {{0, 1, 0, 0}, {0, 1, 0, 0}},
- {{0, 0, 1, 0}, {0, 1, 0, 0}}, {{1, 1, 1, 0}, {0, 1, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- },
- {
- // 4D
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{1, 1, 1, 1}, {1, 1, 1, 1}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {0, 0, 0, 0}}, {{1, 1, 1, 1}, {1, 1, 1, 1}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 1, 1, 1}}, {{0, 1, 1, 1}, {0, 0, 0, 0}}, {{0, 1, 1, 1}, {0, 1, 1, 1}}, {{1, 0, 0, 0}, {1, 0, 0, 0}},
- {{1, 0, 0, 0}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {1, 0, 0, 0}}, {{1, 1, 1, 1}, {1, 1, 1, 1}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 1, 1, 1}}, {{0, 1, 1, 1}, {0, 0, 0, 0}}, {{0, 1, 1, 1}, {0, 1, 1, 1}}, {{1, 0, 0, 0}, {1, 0, 0, 0}},
- {{1, 0, 0, 0}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {1, 0, 0, 0}}, {{1, 1, 1, 1}, {1, 1, 1, 1}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 1, 1}},
- {{0, 0, 1, 1}, {0, 0, 0, 0}}, {{0, 1, 0, 1}, {0, 1, 0, 1}}, {{1, 0, 0, 0}, {1, 0, 0, 0}}, {{1, 0, 0, 0}, {1, 0, 1, 1}}, {{1, 0, 1, 1}, {1, 0, 0, 0}},
- {{1, 1, 0, 1}, {1, 1, 0, 1}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- },
- };
- CMP_STATIC CMP_CONSTANT CGU_UINT8 cmp_rampI[3][16] = {
- {0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // 2 bit index
- {0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0}, // 3 bit index
- {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64} // 4 bit index
- };
- // The data is saved as a packed INT = (BC7_FIXUPINDEX1 << 4 + BC7_FIXUPINDEX2)
- CMP_STATIC CMP_CONSTANT CGU_UINT32 CMPFIXUPINDEX[128] = {
- // 2 subset partitions 0..63
- 0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,
- 0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,
- 0xf0u,0x20u,0x80u,0x20u,0x20u,0x80u,0x80u,0xf0u,
- 0x20u,0x80u,0x20u,0x20u,0x80u,0x80u,0x20u,0x20u,
- 0xf0u,0xf0u,0x60u,0x80u,0x20u,0x80u,0xf0u,0xf0u,
- 0x20u,0x80u,0x20u,0x20u,0x20u,0xf0u,0xf0u,0x60u,
- 0x60u,0x20u,0x60u,0x80u,0xf0u,0xf0u,0x20u,0x20u,
- 0xf0u,0xf0u,0xf0u,0xf0u,0xf0u,0x20u,0x20u,0xf0u,
- // 3 subset partitions 64..128
- 0x3fu,0x38u,0xf8u,0xf3u,0x8fu,0x3fu,0xf3u,0xf8u,
- 0x8fu,0x8fu,0x6fu,0x6fu,0x6fu,0x5fu,0x3fu,0x38u,
- 0x3fu,0x38u,0x8fu,0xf3u,0x3fu,0x38u,0x6fu,0xa8u,
- 0x53u,0x8fu,0x86u,0x6au,0x8fu,0x5fu,0xfau,0xf8u,
- 0x8fu,0xf3u,0x3fu,0x5au,0x6au,0xa8u,0x89u,0xfau,
- 0xf6u,0x3fu,0xf8u,0x5fu,0xf3u,0xf6u,0xf6u,0xf8u,
- 0x3fu,0xf3u,0x5fu,0x5fu,0x5fu,0x8fu,0x5fu,0xafu,
- 0x5fu,0xafu,0x8fu,0xdfu,0xf3u,0xcfu,0x3fu,0x38u };
- INLINE void cmp_get_fixuptable(CMP_INOUT CGU_UINT32 fixup[3], CGU_INT part_id)
- {
- CGU_UINT32 skip_packed = CMPFIXUPINDEX[part_id]; // gather_int2(FIXUPINDEX, part_id);
- fixup[0] = 0;
- fixup[1] = skip_packed >> 4;
- fixup[2] = skip_packed & 15;
- }
- INLINE CGU_UINT8 shift_right_epocode2(CMP_IN CGU_UINT8 v, CMP_IN CGU_INT bits)
- {
- return v >> bits; // (perf warning expected)
- }
- INLINE CGU_UINT8 expand_epocode2(CMP_IN CGU_UINT8 v, CMP_IN CGU_INT bits)
- {
- CGU_UINT8 vv = v << (8 - bits);
- return vv + shift_right_epocode2(vv, bits);
- }
- INLINE CGV_FLOAT cmp_GetRamp(CMP_IN CGU_INT index_bits, // ramp bits Valid range 2..4
- CMP_IN CGU_INT bits, // Component Valid range 5..8
- CMP_IN CGU_INT p1, // 0..255
- CMP_IN CGU_INT p2, // 0..255
- CMP_IN CGU_UINT8 index)
- {
- CGU_INT e1 = expand_epocode2(p1, bits);
- CGU_INT e2 = expand_epocode2(p2, bits);
- CGV_FLOAT ramp = cmp_rampI[index_bits - 2][index] / 64.0F;
- CGV_FLOAT rampf = floor(e1 + ramp * (e2 - e1) + 0.5F);
- return rampf;
- }
- #if defined(USE_NEW_SP_ERR_IDX)
- #ifndef ASPM_GPU
- struct BC7_EncodeRamps2
- {
- CGU_INT ep_d[4][256];
- CGU_UINT8 sp_err[3*4*256*2*2*16];
- CGU_INT sp_idx[3*4*256*2*2*16*2];
- CGU_BOOL ramp_init;
- };
- BC7_EncodeRamps2 BC7EncodeRamps2;
- #define LOG_CL_RANGE2 5
- #define LOG_CL_BASE2 2
- #define BIT_BASE2 5
- #define BIT_RANGE2 9
- #define BTT2(bits) (bits-BIT_BASE2)
- #define CLT2(cl) (cl-LOG_CL_BASE2)
- #define SOURCE_BLOCK_SIZE 16
- CMP_CONSTANT CGU_FLOAT rampWeights2[5][SOURCE_BLOCK_SIZE] = {
- { 0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f}, // 0 bit index
- { 0.000000f,1.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f}, // 1 bit index
- { 0.000000f,0.328125f,0.671875f,1.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f}, // 2 bit index
- { 0.000000f,0.140625f,0.281250f,0.421875f,0.578125f,0.718750f,0.859375f,1.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f,0.000000f}, // 3 bit index
- { 0.000000f,0.062500f,0.140625f,0.203125f,0.265625f,0.328125f,0.406250f,0.468750f,0.531250f,0.593750f,0.671875f,0.734375f,0.796875f,0.859375f,0.937500f,1.000000f} // 4 bit index
- };
- CGU_INT old_expandbits(CGU_INT bits, CGU_INT v)
- {
- return (v << (8 - bits) | v >> (2 * bits - 8));
- }
- void old_init_BC7ramps()
- {
- CMP_STATIC CGU_BOOL g_rampsInitialized = FALSE;
- if (g_rampsInitialized == TRUE)
- return;
- g_rampsInitialized = TRUE;
- BC7EncodeRamps2.ramp_init = TRUE;
- //bc7_isa(); ASPM_PRINT((" INIT Ramps\n"));
- CGU_INT bits;
- CGU_INT p1;
- CGU_INT p2;
- CGU_INT clogBC7;
- CGU_INT index;
- CGU_INT j;
- CGU_INT o1;
- CGU_INT o2;
- for (bits = BIT_BASE2; bits < BIT_RANGE2; bits++)
- {
- for (p1 = 0; p1 < (1 << bits); p1++)
- {
- BC7EncodeRamps2.ep_d[BTT2(bits)][p1] = old_expandbits(bits, p1);
- } //p1
- } //bits<BIT_RANGE
- for (clogBC7 = LOG_CL_BASE2; clogBC7 < LOG_CL_RANGE2; clogBC7++)
- {
- for (bits = BIT_BASE2; bits < BIT_RANGE2; bits++)
- {
- // SP_ERR_IDX : Init
- for (j = 0; j < 256; j++)
- {
- for (o1 = 0; o1 < 2; o1++)
- {
- for (o2 = 0; o2 < 2; o2++)
- {
- for (index = 0; index < 16; index++)
- {
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) + (j * 2 * 2 * 16 * 2) +
- (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 0] = 0;
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) + (j * 2 * 2 * 16 * 2) +
- (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 1] = 255;
- BC7EncodeRamps2.sp_err[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16) + (BTT2(bits) * 256 * 2 * 2 * 16) + (j * 2 * 2 * 16) + (o1 * 2 * 16) +
- (o2 * 16) + index] = 255;
- } // i<16
- } //o2<2;
- } //o1<2
- } //j<256
- // SP_ERR_IDX : calc
- for (p1 = 0; p1 < (1 << bits); p1++)
- {
- for (p2 = 0; p2 < (1 << bits); p2++)
- {
- for (index = 0; index < (1 << clogBC7); index++)
- {
- CGV_INT floatf =
- floor((CGV_FLOAT)BC7EncodeRamps2.ep_d[BTT2(bits)][p1] +
- rampWeights2[clogBC7][index] * (CGV_FLOAT)((BC7EncodeRamps2.ep_d[BTT2(bits)][p2] - BC7EncodeRamps2.ep_d[BTT2(bits)][p1])) + 0.5F);
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) + (floatf * 2 * 2 * 16 * 2) +
- ((p1 & 0x1) * 2 * 16 * 2) + ((p2 & 0x1) * 16 * 2) + (index * 2) + 0] = p1;
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) + (floatf * 2 * 2 * 16 * 2) +
- ((p1 & 0x1) * 2 * 16 * 2) + ((p2 & 0x1) * 16 * 2) + (index * 2) + 1] = p2;
- BC7EncodeRamps2.sp_err[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16) + (BTT2(bits) * 256 * 2 * 2 * 16) + (floatf * 2 * 2 * 16) +
- ((p1 & 0x1) * 2 * 16) + (p2 & 0x1 * 16) + index] = 0;
- } //i<(1 << clogBC7)
- } //p2
- } //p1<(1 << bits)
- for (j = 0; j < 256; j++)
- {
- for (o1 = 0; o1 < 2; o1++)
- {
- for (o2 = 0; o2 < 2; o2++)
- {
- for (index = 0; index < (1 << clogBC7); index++)
- {
- if ( // check for unitialized sp_idx
- (BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) + (j * 2 * 2 * 16 * 2) +
- (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 0] == 0) &&
- (BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) + (j * 2 * 2 * 16 * 2) +
- (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 1] == 255))
- {
- CGU_INT k;
- CGU_INT tf;
- CGU_INT tc;
- for (k = 1; k < 256; k++)
- {
- tf = j - k;
- tc = j + k;
- if ((tf >= 0 && BC7EncodeRamps2.sp_err[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16) + (BTT2(bits) * 256 * 2 * 2 * 16) +
- (tf * 2 * 2 * 16) + (o1 * 2 * 16) + (o2 * 16) + index] == 0))
- {
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) +
- (j * 2 * 2 * 16 * 2) + (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 0] =
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) +
- (tf * 2 * 2 * 16 * 2) + (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 0];
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) +
- (j * 2 * 2 * 16 * 2) + (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 1] =
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) +
- (tf * 2 * 2 * 16 * 2) + (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 1];
- break;
- }
- else if ((tc < 256 && BC7EncodeRamps2.sp_err[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16) + (BTT2(bits) * 256 * 2 * 2 * 16) +
- (tc * 2 * 2 * 16) + (o1 * 2 * 16) + (o2 * 16) + index] == 0))
- {
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) +
- (j * 2 * 2 * 16 * 2) + (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 0] =
- BC7EncodeRamps2.sp_idx[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) + (BTT2(bits) * 256 * 2 * 2 * 16 * 2) +
- (tc * 2 * 2 * 16 * 2) + (o1 * 2 * 16 * 2) + (o2 * 16 * 2) + (index * 2) + 0];
- break;
- }
- }
- BC7EncodeRamps2.sp_err[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16) + (BTT2(bits) * 256 * 2 * 2 * 16) + (j * 2 * 2 * 16) +
- (o1 * 2 * 16) + (o2 * 16) + index] = (CGU_UINT8)k;
- } //sp_idx < 0
- } //i<(1 << clogBC7)
- } //o2
- } //o1
- } //j
- } //bits<BIT_RANGE
- } //clogBC7<LOG_CL_RANGE
- }
- CGV_FLOAT old_img_absf(CGV_FLOAT a)
- {
- return a > 0.0F ? a : -a;
- }
- INLINE CGV_FLOAT old_get_sperr(CGU_INT clogBC7, // ramp bits Valid range 2..4
- CGU_INT bits, // Component Valid range 5..8
- CGV_INT p1, // 0..255
- CGU_INT t1,
- CGU_INT t2,
- CGV_UINT8 index)
- {
- if (BC7EncodeRamps2.ramp_init)
- return BC7EncodeRamps2.sp_err[(CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16) + (BTT2(bits) * 256 * 2 * 2 * 16) + (p1 * 2 * 2 * 16) + (t1 * 2 * 16) + (t2 * 16) + index];
- else
- return 0.0f;
- }
- #endif
- #endif
- #endif // Not ASPM_HLSL
- #endif // ENABLE_CMP_API
- #define get_end_point_l(subset) shared_temp[threadBase + subset].endPoint_low_quantized
- #define get_end_point_h(subset) shared_temp[threadBase + subset].endPoint_high_quantized
- #define get_color_index(index) shared_temp[threadBase + index].error
- #define get_alpha_index(index) shared_temp[threadBase + index].mode
- //4 bit index: 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64
- CMP_STATIC CMP_CONSTANT CGU_UINT32 aStep[3][64] = {
- {0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7,
- 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15},
- //3 bit index: 0, 9, 18, 27, 37, 46, 55, 64
- {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7},
- //2 bit index: 0, 21, 43, 64
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
- CMP_STATIC CMP_CONSTANT CGU_UINT32 aWeight[3][16] = {{0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64},
- {0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
- //Associated to partition 0-63
- CMP_STATIC CMP_CONSTANT CGU_UINT32 blockPartitions[64] = {
- 0xCCCC, 0x8888, 0xEEEE, 0xECC8, 0xC880, 0xFEEC, 0xFEC8, 0xEC80, 0xC800, 0xFFEC, 0xFE80, 0xE800, 0xFFE8, 0xFF00, 0xFFF0, 0xF000,
- 0xF710, 0x008E, 0x7100, 0x08CE, 0x008C, 0x7310, 0x3100, 0x8CCE, 0x088C, 0x3110, 0x6666, 0x366C, 0x17E8, 0x0FF0, 0x718E, 0x399C,
- 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, 0x3c3c, 0x55aa, 0x9696, 0xa55a, 0x73ce, 0x13c8, 0x324c, 0x3bdc, 0x6996, 0xc33c, 0x9966, 0x660,
- 0x272, 0x4e4, 0x4e40, 0x2720, 0xc936, 0x936c, 0x39c6, 0x639c, 0x9336, 0x9cc6, 0x817e, 0xe718, 0xccf0, 0xfcc, 0x7744, 0xee22,
- };
- //Associated to partition 64-127
- CMP_STATIC CMP_CONSTANT CGU_UINT32 blockPartitions2[64] = {
- 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050, 0xaa550000, 0xaa555500, 0xaaaa5500,
- 0x90909090, 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250, 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, 0xa8a85454, 0x6a6a4040,
- 0xa4a45000, 0x1a1a0500, 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400, 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, 0xa9a58000,
- 0x5090a0a8, 0xa8a09050, 0x24242424, 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0,
- 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600, 0xa85454a8, 0x80959580, 0xaa141414,
- 0x96960000, 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254,
- };
- CMP_STATIC CMP_CONSTANT CGU_Vec2ui candidateFixUpIndex1D[128] = {
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{ 2, 0},{ 8, 0},{ 2, 0},
- { 2, 0},{ 8, 0},{ 8, 0},{15, 0},
- { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
- { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-
- {15, 0},{15, 0},{ 6, 0},{ 8, 0},
- { 2, 0},{ 8, 0},{15, 0},{15, 0},
- { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
- { 2, 0},{15, 0},{15, 0},{ 6, 0},
- { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0},
- {15, 0},{15, 0},{ 2, 0},{ 2, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{ 2, 0},{ 2, 0},{15, 0},
- //candidateFixUpIndex1D[i][1], i < 64 should not be used
-
- { 3,15},{ 3, 8},{15, 8},{15, 3},
- { 8,15},{ 3,15},{15, 3},{15, 8},
- { 8,15},{ 8,15},{ 6,15},{ 6,15},
- { 6,15},{ 5,15},{ 3,15},{ 3, 8},
- { 3,15},{ 3, 8},{ 8,15},{15, 3},
- { 3,15},{ 3, 8},{ 6,15},{10, 8},
- { 5, 3},{ 8,15},{ 8, 6},{ 6,10},
- { 8,15},{ 5,15},{15,10},{15, 8},
-
- { 8,15},{15, 3},{ 3,15},{ 5,10},
- { 6,10},{10, 8},{ 8, 9},{15,10},
- {15, 6},{ 3,15},{15, 8},{ 5,15},
- {15, 3},{15, 6},{15, 6},{15, 8}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct
- { 3,15},{15, 3},{ 5,15},{ 5,15},
- { 5,15},{ 8,15},{ 5,15},{10,15},
- { 5,15},{10,15},{ 8,15},{13,15},
- {15, 3},{12,15},{ 3,15},{ 3, 8},
- };
- CMP_STATIC CMP_CONSTANT CGU_Vec2ui candidateFixUpIndex1DOrdered[128] = {
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{ 2, 0},{ 8, 0},{ 2, 0},
- { 2, 0},{ 8, 0},{ 8, 0},{15, 0},
- { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
- { 8, 0},{ 8, 0},{ 2, 0},{ 2, 0},
-
- {15, 0},{15, 0},{ 6, 0},{ 8, 0},
- { 2, 0},{ 8, 0},{15, 0},{15, 0},
- { 2, 0},{ 8, 0},{ 2, 0},{ 2, 0},
- { 2, 0},{15, 0},{15, 0},{ 6, 0},
- { 6, 0},{ 2, 0},{ 6, 0},{ 8, 0},
- {15, 0},{15, 0},{ 2, 0},{ 2, 0},
- {15, 0},{15, 0},{15, 0},{15, 0},
- {15, 0},{ 2, 0},{ 2, 0},{15, 0},
- //candidateFixUpIndex1DOrdered[i][1], i < 64 should not be used
-
- { 3,15},{ 3, 8},{ 8,15},{ 3,15},
- { 8,15},{ 3,15},{ 3,15},{ 8,15},
- { 8,15},{ 8,15},{ 6,15},{ 6,15},
- { 6,15},{ 5,15},{ 3,15},{ 3, 8},
- { 3,15},{ 3, 8},{ 8,15},{ 3,15},
- { 3,15},{ 3, 8},{ 6,15},{ 8,10},
- { 3, 5},{ 8,15},{ 6, 8},{ 6,10},
- { 8,15},{ 5,15},{10,15},{ 8,15},
-
- { 8,15},{ 3,15},{ 3,15},{ 5,10},
- { 6,10},{ 8,10},{ 8, 9},{10,15},
- { 6,15},{ 3,15},{ 8,15},{ 5,15},
- { 3,15},{ 6,15},{ 6,15},{ 8,15}, //The Spec doesn't mark the first fixed up index in this row, so I apply 15 for them, and seems correct
- { 3,15},{ 3,15},{ 5,15},{ 5,15},
- { 5,15},{ 8,15},{ 5,15},{10,15},
- { 5,15},{10,15},{ 8,15},{13,15},
- { 3,15},{12,15},{ 3,15},{ 3, 8}
- };
- CGU_Vec4ui quantize(CGU_Vec4ui color, CGU_UINT32 uPrec)
- {
- return (((color << 8) + color) * ((1 << uPrec) - 1) + 32768U) >> 16;
- }
- CGU_Vec4ui unquantize(CGU_Vec4ui color, CGU_UINT32 uPrec)
- {
- #ifdef ASPM_GPU
- color = color << (8 - uPrec);
- return color | (color >> uPrec);
- #else
- CGU_Vec4ui res;
- color.x = color.x << (8 - uPrec);
- color.y = color.y << (8 - uPrec);
- color.z = color.z << (8 - uPrec);
- color.w = color.w << (8 - uPrec);
- res.x = color.x | (color.x >> uPrec);
- res.y = color.y | (color.y >> uPrec);
- res.z = color.z | (color.z >> uPrec);
- res.w = color.w | (color.w >> uPrec);
- return res;
- #endif
- }
- void swap(CMP_INOUT CGU_Vec4ui CMP_REFINOUT lhs, CMP_INOUT CGU_Vec4ui CMP_REFINOUT rhs)
- {
- CGU_Vec4ui tmp = lhs;
- lhs = rhs;
- rhs = tmp;
- }
- void swap(CMP_INOUT CGU_Vec3ui CMP_REFINOUT lhs, CMP_INOUT CGU_Vec3ui CMP_REFINOUT rhs)
- {
- CGU_Vec3ui tmp = lhs;
- lhs = rhs;
- rhs = tmp;
- }
- void swap(CMP_INOUT CGU_UINT32 CMP_REFINOUT lhs, CMP_INOUT CGU_UINT32 CMP_REFINOUT rhs)
- {
- CGU_UINT32 tmp = lhs;
- lhs = rhs;
- rhs = tmp;
- }
- CGU_UINT32 ComputeError(CMP_IN CGU_Vec4ui a, CMP_IN CGU_Vec4ui b)
- {
- return dot(a.rgb, b.rgb) + (g_alpha_weight * a.a * b.a);
- }
- void Ensure_A_Is_Larger(CMP_INOUT CGU_Vec4ui CMP_REFINOUT a, CMP_INOUT CGU_Vec4ui CMP_REFINOUT b)
- {
- if (a.x < b.x)
- swap(a.x, b.x);
- if (a.y < b.y)
- swap(a.y, b.y);
- if (a.z < b.z)
- swap(a.z, b.z);
- if (a.w < b.w)
- swap(a.w, b.w);
- }
- void compress_endpoints0(CMP_INOUT CGU_Vec4ui endPoint[2], CMP_INOUT CGU_Vec4ui quantized[2], CGU_Vec2ui P)
- {
- #ifdef ASPM_GPU
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb & 0xFFFFFFFE;
- quantized[j].rgb |= P[j];
- quantized[j].a = 0xFF;
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;
- endPoint[j].a = 0xFF;
- quantized[j] <<= 3;
- }
- #else
- CGU_Vec4ui rgbb;
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- rgbb.r = endPoint[j].r;
- rgbb.g = endPoint[j].g;
- rgbb.b = endPoint[j].b;
- rgbb.a = endPoint[j].b;
- quantized[j].rgb = quantize(rgbb, 5).rgb;
- quantized[j].r &= 0xFFFFFFFE;
- quantized[j].g &= 0xFFFFFFFE;
- quantized[j].b &= 0xFFFFFFFE;
- quantized[j].r |= P[j];
- quantized[j].g |= P[j];
- quantized[j].b |= P[j];
- quantized[j].a = 0xFF;
- rgbb.r = quantized[j].r;
- rgbb.g = quantized[j].g;
- rgbb.b = quantized[j].b;
- rgbb.a = quantized[j].b;
- endPoint[j].rgb = unquantize(rgbb, 5).rgb;
- endPoint[j].a = 0xFF;
- quantized[j].r <<= 3;
- quantized[j].g <<= 3;
- quantized[j].b <<= 3;
- quantized[j].a <<= 3;
- }
- #endif
- }
- void compress_endpoints1(CMP_INOUT CGU_Vec4ui endPoint[2], CMP_OUT CGU_Vec4ui quantized[2], CGU_Vec2ui P)
- {
- #ifdef ASPM_GPU
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb & 0xFFFFFFFE;
- quantized[j].rgb |= P[j];
- quantized[j].a = 0xFF;
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb;
- endPoint[j].a = 0xFF;
- quantized[j] <<= 1;
- }
- #else
- CGU_Vec4ui rgbb;
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- rgbb.r = endPoint[j].r;
- rgbb.g = endPoint[j].g;
- rgbb.b = endPoint[j].b;
- rgbb.a = endPoint[j].b;
- quantized[j].rgb = quantize(rgbb, 7).rgb;
- quantized[j].r &= 0xFFFFFFFE;
- quantized[j].g &= 0xFFFFFFFE;
- quantized[j].b &= 0xFFFFFFFE;
- quantized[j].r |= P[j];
- quantized[j].g |= P[j];
- quantized[j].b |= P[j];
- quantized[j].a = 0xFF;
- rgbb.r = quantized[j].r;
- rgbb.g = quantized[j].g;
- rgbb.b = quantized[j].b;
- rgbb.a = quantized[j].b;
- endPoint[j].rgb = unquantize(rgbb, 7).rgb;
- endPoint[j].a = 0xFF;
- quantized[j].r = quantized[j].r << 1;
- quantized[j].g = quantized[j].g << 1;
- quantized[j].b = quantized[j].b << 1;
- quantized[j].a = quantized[j].a << 1;
- }
- #endif
- }
- void compress_endpoints2(CMP_INOUT CGU_Vec4ui endPoint[2], CMP_INOUT CGU_Vec4ui quantized[2])
- {
- #ifdef ASPM_GPU
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb;
- quantized[j].a = 0xFF;
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;
- endPoint[j].a = 0xFF;
- quantized[j] <<= 3;
- }
- #else
- CGU_Vec4ui rgbb;
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- rgbb.r = endPoint[j].r;
- rgbb.g = endPoint[j].g;
- rgbb.b = endPoint[j].b;
- rgbb.a = endPoint[j].b;
- quantized[j].rgb = quantize(rgbb, 5).rgb;
- quantized[j].a = 0xFF;
- rgbb.r = quantized[j].r;
- rgbb.g = quantized[j].g;
- rgbb.b = quantized[j].b;
- rgbb.a = quantized[j].b;
- endPoint[j].rgb = unquantize(rgbb, 5).rgb;
- endPoint[j].a = 0xFF;
- quantized[j].r <<= 3;
- quantized[j].g <<= 3;
- quantized[j].b <<= 3;
- quantized[j].a <<= 3;
- }
- #endif
- }
- void compress_endpoints3(CMP_INOUT CGU_Vec4ui endPoint[2], CMP_INOUT CGU_Vec4ui quantized[2], CGU_Vec2ui P)
- {
- for (CGU_UINT32 j = 0; j < 2; j++)
- {
- quantized[j].r = endPoint[j].x & 0xFFFFFFFE;
- quantized[j].g = endPoint[j].y & 0xFFFFFFFE;
- quantized[j].b = endPoint[j].z & 0xFFFFFFFE;
- quantized[j].a = 0xFF;
- quantized[j].r |= P[j];
- quantized[j].g |= P[j];
- quantized[j].b |= P[j];
- endPoint[j].r = quantized[j].r;
- endPoint[j].g = quantized[j].g;
- endPoint[j].b = quantized[j].b;
- endPoint[j].a = 0xFF;
- }
- }
- void compress_endpoints4(CMP_INOUT CGU_Vec4ui endPoint[2], CMP_INOUT CGU_Vec4ui quantized[2])
- {
- #ifdef ASPM_HLSL
- [unroll] for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 5).rgb;
- quantized[j].a = quantize(endPoint[j].a, 6).r;
-
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 5).rgb;
- endPoint[j].a = unquantize(quantized[j].a, 6).r;
- quantized[j].rgb <<= 3;
- quantized[j].a <<= 2;
- }
- #else
- CGU_Vec4ui rgbb;
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- rgbb.r = endPoint[j].r;
- rgbb.g = endPoint[j].g;
- rgbb.b = endPoint[j].b;
- rgbb.a = endPoint[j].b;
- quantized[j].rgb = quantize(rgbb, 5).rgb;
- quantized[j].a = quantize(endPoint[j].a, 6).r;
- rgbb.r = quantized[j].r;
- rgbb.g = quantized[j].g;
- rgbb.b = quantized[j].b;
- rgbb.a = quantized[j].b;
- endPoint[j].rgb = unquantize(rgbb, 5).rgb;
- endPoint[j].a = unquantize(quantized[j].a, 6).r;
- quantized[j].r <<= 3;
- quantized[j].g <<= 3;
- quantized[j].b <<= 3;
- quantized[j].a <<= 2;
- }
- #endif
- }
- void compress_endpoints5(CMP_INOUT CGU_Vec4ui endPoint[2], CMP_INOUT CGU_Vec4ui quantized[2])
- {
- #ifdef ASPM_HLSL
- CMP_UNROLL for ( uint j = 0; j < 2; j ++ )
- {
- quantized[j].rgb = quantize(endPoint[j].rgbb, 7).rgb;
- quantized[j].a = endPoint[j].a;
- endPoint[j].rgb = unquantize(quantized[j].rgbb, 7).rgb;
- // endPoint[j].a Alpha is full precision
- quantized[j].rgb <<= 1;
- }
- #else
- CGU_Vec4ui rgbb;
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- rgbb.r = endPoint[j].r;
- rgbb.g = endPoint[j].g;
- rgbb.b = endPoint[j].b;
- rgbb.a = endPoint[j].b;
- quantized[j].rgb = quantize(rgbb, 7).rgb;
- quantized[j].a = endPoint[j].a;
- rgbb.r = quantized[j].r;
- rgbb.g = quantized[j].g;
- rgbb.b = quantized[j].b;
- rgbb.a = quantized[j].b;
- endPoint[j].rgb = unquantize(rgbb, 7).rgb;
- quantized[j].r <<= 1;
- quantized[j].g <<= 1;
- quantized[j].b <<= 1;
- }
- #endif
- }
- void compress_endpoints6(CMP_INOUT CGU_Vec4ui endPoint[2], CMP_OUT CGU_Vec4ui quantized[2], CGU_Vec2ui P)
- {
- for (CGU_UINT32 j = 0; j < 2; j++)
- {
- quantized[j].x = endPoint[j].x & 0xFFFFFFFE;
- quantized[j].y = endPoint[j].y & 0xFFFFFFFE;
- quantized[j].z = endPoint[j].z & 0xFFFFFFFE;
- quantized[j].w = endPoint[j].w & 0xFFFFFFFE;
- quantized[j].x = quantized[j].x | P[j];
- quantized[j].y = quantized[j].y | P[j];
- quantized[j].z = quantized[j].z | P[j];
- quantized[j].w = quantized[j].w | P[j];
- endPoint[j] = quantized[j];
- }
- }
- void compress_endpoints7(CMP_INOUT CGU_Vec4ui endPoint[2], CMP_INOUT CGU_Vec4ui quantized[2], CGU_Vec2ui P)
- {
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- quantized[j] = quantize(endPoint[j], 6);
- quantized[j].x = (quantized[j].x & 0xFFFFFFFE) | P[j];
- quantized[j].y = (quantized[j].y & 0xFFFFFFFE) | P[j];
- quantized[j].z = (quantized[j].z & 0xFFFFFFFE) | P[j];
- quantized[j].w = (quantized[j].w & 0xFFFFFFFE) | P[j];
- endPoint[j] = unquantize(quantized[j], 6);
- }
- CMP_UNROLL for (CGU_UINT32 j = 0; j < 2; j++)
- {
- quantized[j].x = quantized[j].x << 2;
- quantized[j].y = quantized[j].y << 2;
- quantized[j].z = quantized[j].z << 2;
- quantized[j].w = quantized[j].w << 2;
- }
- }
- void block_package0(CMP_OUT CGU_Vec4ui CMP_REFINOUT block, CGU_UINT32 partition, CGU_UINT32 threadBase)
- {
- block.x = 0x01 | ((partition - 64) << 1) | ((get_end_point_l(0).r & 0xF0) << 1) | ((get_end_point_h(0).r & 0xF0) << 5) |
- ((get_end_point_l(1).r & 0xF0) << 9) | ((get_end_point_h(1).r & 0xF0) << 13) | ((get_end_point_l(2).r & 0xF0) << 17) |
- ((get_end_point_h(2).r & 0xF0) << 21) | ((get_end_point_l(0).g & 0xF0) << 25);
- block.y = ((get_end_point_l(0).g & 0xF0) >> 7) | ((get_end_point_h(0).g & 0xF0) >> 3) | ((get_end_point_l(1).g & 0xF0) << 1) |
- ((get_end_point_h(1).g & 0xF0) << 5) | ((get_end_point_l(2).g & 0xF0) << 9) | ((get_end_point_h(2).g & 0xF0) << 13) |
- ((get_end_point_l(0).b & 0xF0) << 17) | ((get_end_point_h(0).b & 0xF0) << 21) | ((get_end_point_l(1).b & 0xF0) << 25);
- block.z = ((get_end_point_l(1).b & 0xF0) >> 7) | ((get_end_point_h(1).b & 0xF0) >> 3) | ((get_end_point_l(2).b & 0xF0) << 1) |
- ((get_end_point_h(2).b & 0xF0) << 5) | ((get_end_point_l(0).r & 0x08) << 10) | ((get_end_point_h(0).r & 0x08) << 11) |
- ((get_end_point_l(1).r & 0x08) << 12) | ((get_end_point_h(1).r & 0x08) << 13) | ((get_end_point_l(2).r & 0x08) << 14) |
- ((get_end_point_h(2).r & 0x08) << 15) | (get_color_index(0) << 19);
- block.w = 0;
- CGU_UINT32 i = 1;
- for (; i <= cmp_min(candidateFixUpIndex1DOrdered[partition][0], 4); i++)
- {
- block.z |= get_color_index(i) << (i * 3 + 18);
- }
- if (candidateFixUpIndex1DOrdered[partition][0] < 4) //i = 4
- {
- block.z |= get_color_index(4) << 29;
- i += 1;
- }
- else //i = 5
- {
- block.w |= (get_color_index(4) & 0x04) >> 2;
- for (; i <= candidateFixUpIndex1DOrdered[partition][0]; i++)
- block.w |= get_color_index(i) << (i * 3 - 14);
- }
- for (; i <= candidateFixUpIndex1DOrdered[partition][1]; i++)
- {
- block.w |= get_color_index(i) << (i * 3 - 15);
- }
- for (; i < 16; i++)
- {
- block.w |= get_color_index(i) << (i * 3 - 16);
- }
- }
- void block_package1(CMP_OUT CGU_Vec4ui CMP_REFINOUT block, CGU_UINT32 partition, CGU_UINT32 threadBase)
- {
- block.x = 0x02 | (partition << 2) | ((get_end_point_l(0).r & 0xFC) << 6) | ((get_end_point_h(0).r & 0xFC) << 12) | ((get_end_point_l(1).r & 0xFC) << 18) |
- ((get_end_point_h(1).r & 0xFC) << 24);
- block.y = ((get_end_point_l(0).g & 0xFC) >> 2) | ((get_end_point_h(0).g & 0xFC) << 4) | ((get_end_point_l(1).g & 0xFC) << 10) |
- ((get_end_point_h(1).g & 0xFC) << 16) | ((get_end_point_l(0).b & 0xFC) << 22) | ((get_end_point_h(0).b & 0xFC) << 28);
- block.z = ((get_end_point_h(0).b & 0xFC) >> 4) | ((get_end_point_l(1).b & 0xFC) << 2) | ((get_end_point_h(1).b & 0xFC) << 8) |
- ((get_end_point_l(0).r & 0x02) << 15) | ((get_end_point_l(1).r & 0x02) << 16) | (get_color_index(0) << 18);
- if (candidateFixUpIndex1DOrdered[partition][0] == 15)
- {
- block.w = (get_color_index(15) << 30) | (get_color_index(14) << 27) | (get_color_index(13) << 24) | (get_color_index(12) << 21) |
- (get_color_index(11) << 18) | (get_color_index(10) << 15) | (get_color_index(9) << 12) | (get_color_index(8) << 9) |
- (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5);
- block.z |=
- (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
- }
- else if (candidateFixUpIndex1DOrdered[partition][0] == 2)
- {
- block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) |
- (get_color_index(11) << 17) | (get_color_index(10) << 14) | (get_color_index(9) << 11) | (get_color_index(8) << 8) |
- (get_color_index(7) << 5) | (get_color_index(6) << 2) | (get_color_index(5) >> 1);
- block.z |= (get_color_index(5) << 31) | (get_color_index(4) << 28) | (get_color_index(3) << 25) | (get_color_index(2) << 23) |
- (get_color_index(1) << 20) | (get_color_index(0) << 18);
- }
- else if (candidateFixUpIndex1DOrdered[partition][0] == 8)
- {
- block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) |
- (get_color_index(11) << 17) | (get_color_index(10) << 14) | (get_color_index(9) << 11) | (get_color_index(8) << 9) |
- (get_color_index(7) << 6) | (get_color_index(6) << 3) | get_color_index(5);
- block.z |=
- (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
- }
- else //candidateFixUpIndex1DOrdered[partition] == 6
- {
- block.w = (get_color_index(15) << 29) | (get_color_index(14) << 26) | (get_color_index(13) << 23) | (get_color_index(12) << 20) |
- (get_color_index(11) << 17) | (get_color_index(10) << 14) | (get_color_index(9) << 11) | (get_color_index(8) << 8) |
- (get_color_index(7) << 5) | (get_color_index(6) << 3) | get_color_index(5);
- block.z |=
- (get_color_index(4) << 29) | (get_color_index(3) << 26) | (get_color_index(2) << 23) | (get_color_index(1) << 20) | (get_color_index(0) << 18);
- }
- }
- void block_package2(CMP_OUT CGU_Vec4ui CMP_REFINOUT block, CGU_UINT32 partition, CGU_UINT32 threadBase)
- {
- block.x = 0x04 | ((partition - 64) << 3) | ((get_end_point_l(0).r & 0xF8) << 6) | ((get_end_point_h(0).r & 0xF8) << 11) |
- ((get_end_point_l(1).r & 0xF8) << 16) | ((get_end_point_h(1).r & 0xF8) << 21) | ((get_end_point_l(2).r & 0xF8) << 26);
- block.y = ((get_end_point_l(2).r & 0xF8) >> 6) | ((get_end_point_h(2).r & 0xF8) >> 1) | ((get_end_point_l(0).g & 0xF8) << 4) |
- ((get_end_point_h(0).g & 0xF8) << 9) | ((get_end_point_l(1).g & 0xF8) << 14) | ((get_end_point_h(1).g & 0xF8) << 19) |
- ((get_end_point_l(2).g & 0xF8) << 24);
- block.z = ((get_end_point_h(2).g & 0xF8) >> 3) | ((get_end_point_l(0).b & 0xF8) << 2) | ((get_end_point_h(0).b & 0xF8) << 7) |
- ((get_end_point_l(1).b & 0xF8) << 12) | ((get_end_point_h(1).b & 0xF8) << 17) | ((get_end_point_l(2).b & 0xF8) << 22) |
- ((get_end_point_h(2).b & 0xF8) << 27);
- block.w = ((get_end_point_h(2).b & 0xF8) >> 5) | (get_color_index(0) << 3);
- CGU_UINT32 i = 1;
- for (; i <= candidateFixUpIndex1DOrdered[partition][0]; i++)
- {
- block.w |= get_color_index(i) << (i * 2 + 2);
- }
- for (; i <= candidateFixUpIndex1DOrdered[partition][1]; i++)
- {
- block.w |= get_color_index(i) << (i * 2 + 1);
- }
- for (; i < 16; i++)
- {
- block.w |= get_color_index(i) << (i * 2);
- }
- }
- void block_package3(CMP_OUT CGU_Vec4ui CMP_REFINOUT block, CGU_UINT32 partition, CGU_UINT32 threadBase)
- {
- block.x = 0x08 | (partition << 4) | ((get_end_point_l(0).r & 0xFE) << 9) | ((get_end_point_h(0).r & 0xFE) << 16) | ((get_end_point_l(1).r & 0xFE) << 23) |
- ((get_end_point_h(1).r & 0xFE) << 30);
- block.y = ((get_end_point_h(1).r & 0xFE) >> 2) | ((get_end_point_l(0).g & 0xFE) << 5) | ((get_end_point_h(0).g & 0xFE) << 12) |
- ((get_end_point_l(1).g & 0xFE) << 19) | ((get_end_point_h(1).g & 0xFE) << 26);
- block.z = ((get_end_point_h(1).g & 0xFE) >> 6) | ((get_end_point_l(0).b & 0xFE) << 1) | ((get_end_point_h(0).b & 0xFE) << 8) |
- ((get_end_point_l(1).b & 0xFE) << 15) | ((get_end_point_h(1).b & 0xFE) << 22) | ((get_end_point_l(0).r & 0x01) << 30) |
- ((get_end_point_h(0).r & 0x01) << 31);
- block.w = ((get_end_point_l(1).r & 0x01) << 0) | ((get_end_point_h(1).r & 0x01) << 1) | (get_color_index(0) << 2);
- CGU_UINT32 i = 1;
- for (; i <= candidateFixUpIndex1DOrdered[partition][0]; i++)
- {
- block.w |= get_color_index(i) << (i * 2 + 1);
- }
- for (; i < 16; i++)
- {
- block.w |= get_color_index(i) << (i * 2);
- }
- }
- void block_package4(CMP_OUT CGU_Vec4ui CMP_REFINOUT block, CGU_UINT32 rotation, CGU_UINT32 index_selector, CGU_UINT32 threadBase)
- {
- block.x = 0x10 | ((rotation & 3) << 5) | ((index_selector & 1) << 7) | ((get_end_point_l(0).r & 0xF8) << 5) | ((get_end_point_h(0).r & 0xF8) << 10) |
- ((get_end_point_l(0).g & 0xF8) << 15) | ((get_end_point_h(0).g & 0xF8) << 20) | ((get_end_point_l(0).b & 0xF8) << 25);
- block.y = ((get_end_point_l(0).b & 0xF8) >> 7) | ((get_end_point_h(0).b & 0xF8) >> 2) | ((get_end_point_l(0).a & 0xFC) << 4) |
- ((get_end_point_h(0).a & 0xFC) << 10) | ((get_color_index(0) & 1) << 18) | (get_color_index(1) << 19) | (get_color_index(2) << 21) |
- (get_color_index(3) << 23) | (get_color_index(4) << 25) | (get_color_index(5) << 27) | (get_color_index(6) << 29) | (get_color_index(7) << 31);
- block.z = (get_color_index(7) >> 1) | (get_color_index(8) << 1) | (get_color_index(9) << 3) | (get_color_index(10) << 5) | (get_color_index(11) << 7) |
- (get_color_index(12) << 9) | (get_color_index(13) << 11) | (get_color_index(14) << 13) | (get_color_index(15) << 15) |
- ((get_alpha_index(0) & 3) << 17) | (get_alpha_index(1) << 19) | (get_alpha_index(2) << 22) | (get_alpha_index(3) << 25) |
- (get_alpha_index(4) << 28) | (get_alpha_index(5) << 31);
- block.w = (get_alpha_index(5) >> 1) | (get_alpha_index(6) << 2) | (get_alpha_index(7) << 5) | (get_alpha_index(8) << 8) | (get_alpha_index(9) << 11) |
- (get_alpha_index(10) << 14) | (get_alpha_index(11) << 17) | (get_alpha_index(12) << 20) | (get_alpha_index(13) << 23) |
- (get_alpha_index(14) << 26) | (get_alpha_index(15) << 29);
- }
- void block_package5(CMP_OUT CGU_Vec4ui CMP_REFINOUT block, CGU_UINT32 rotation, CGU_UINT32 threadBase)
- {
- block.x = 0x20 | (rotation << 6) | ((get_end_point_l(0).r & 0xFE) << 7) | ((get_end_point_h(0).r & 0xFE) << 14) | ((get_end_point_l(0).g & 0xFE) << 21) |
- ((get_end_point_h(0).g & 0xFE) << 28);
- block.y = ((get_end_point_h(0).g & 0xFE) >> 4) | ((get_end_point_l(0).b & 0xFE) << 3) | ((get_end_point_h(0).b & 0xFE) << 10) |
- (get_end_point_l(0).a << 18) | (get_end_point_h(0).a << 26);
- block.z = (get_end_point_h(0).a >> 6) | (get_color_index(0) << 2) | (get_color_index(1) << 3) | (get_color_index(2) << 5) | (get_color_index(3) << 7) |
- (get_color_index(4) << 9) | (get_color_index(5) << 11) | (get_color_index(6) << 13) | (get_color_index(7) << 15) | (get_color_index(8) << 17) |
- (get_color_index(9) << 19) | (get_color_index(10) << 21) | (get_color_index(11) << 23) | (get_color_index(12) << 25) |
- (get_color_index(13) << 27) | (get_color_index(14) << 29) | (get_color_index(15) << 31);
- block.w = (get_color_index(15) >> 1) | (get_alpha_index(0) << 1) | (get_alpha_index(1) << 2) | (get_alpha_index(2) << 4) | (get_alpha_index(3) << 6) |
- (get_alpha_index(4) << 8) | (get_alpha_index(5) << 10) | (get_alpha_index(6) << 12) | (get_alpha_index(7) << 14) | (get_alpha_index(8) << 16) |
- (get_alpha_index(9) << 18) | (get_alpha_index(10) << 20) | (get_alpha_index(11) << 22) | (get_alpha_index(12) << 24) |
- (get_alpha_index(13) << 26) | (get_alpha_index(14) << 28) | (get_alpha_index(15) << 30);
- }
- void block_package6(CMP_OUT CGU_Vec4ui CMP_REFINOUT block, CGU_UINT32 threadBase)
- {
- block.x = 0x40 | ((get_end_point_l(0).r & 0xFE) << 6) | ((get_end_point_h(0).r & 0xFE) << 13) | ((get_end_point_l(0).g & 0xFE) << 20) |
- ((get_end_point_h(0).g & 0xFE) << 27);
- block.y = ((get_end_point_h(0).g & 0xFE) >> 5) | ((get_end_point_l(0).b & 0xFE) << 2) | ((get_end_point_h(0).b & 0xFE) << 9) |
- ((get_end_point_l(0).a & 0xFE) << 16) | ((get_end_point_h(0).a & 0xFE) << 23) | (get_end_point_l(0).r & 0x01) << 31;
- block.z = (get_end_point_h(0).r & 0x01) | (get_color_index(0) << 1) | (get_color_index(1) << 4) | (get_color_index(2) << 8) | (get_color_index(3) << 12) |
- (get_color_index(4) << 16) | (get_color_index(5) << 20) | (get_color_index(6) << 24) | (get_color_index(7) << 28);
- block.w = (get_color_index(8) << 0) | (get_color_index(9) << 4) | (get_color_index(10) << 8) | (get_color_index(11) << 12) | (get_color_index(12) << 16) |
- (get_color_index(13) << 20) | (get_color_index(14) << 24) | (get_color_index(15) << 28);
- }
- void block_package7(CMP_OUT CGU_Vec4ui CMP_REFINOUT block, CGU_UINT32 partition, CGU_UINT32 threadBase)
- {
- block.x = 0x80 | (partition << 8) | ((get_end_point_l(0).r & 0xF8) << 11) | ((get_end_point_h(0).r & 0xF8) << 16) | ((get_end_point_l(1).r & 0xF8) << 21) |
- ((get_end_point_h(1).r & 0xF8) << 26);
- block.y = ((get_end_point_h(1).r & 0xF8) >> 6) | ((get_end_point_l(0).g & 0xF8) >> 1) | ((get_end_point_h(0).g & 0xF8) << 4) |
- ((get_end_point_l(1).g & 0xF8) << 9) | ((get_end_point_h(1).g & 0xF8) << 14) | ((get_end_point_l(0).b & 0xF8) << 19) |
- ((get_end_point_h(0).b & 0xF8) << 24);
- block.z = ((get_end_point_l(1).b & 0xF8) >> 3) | ((get_end_point_h(1).b & 0xF8) << 2) | ((get_end_point_l(0).a & 0xF8) << 7) |
- ((get_end_point_h(0).a & 0xF8) << 12) | ((get_end_point_l(1).a & 0xF8) << 17) | ((get_end_point_h(1).a & 0xF8) << 22) |
- ((get_end_point_l(0).r & 0x04) << 28) | ((get_end_point_h(0).r & 0x04) << 29);
- block.w = ((get_end_point_l(1).r & 0x04) >> 2) | ((get_end_point_h(1).r & 0x04) >> 1) | (get_color_index(0) << 2);
- CGU_UINT32 i = 1;
- for (; i <= candidateFixUpIndex1DOrdered[partition][0]; i++)
- {
- block.w |= get_color_index(i) << (i * 2 + 1);
- }
- for (; i < 16; i++)
- {
- block.w |= get_color_index(i) << (i * 2);
- }
- }
- void GroupSync()
- {
- #ifdef ASPM_GPU
- GroupMemoryBarrierWithGroupSync();
- #endif
- }
- void set_pixel_rotation(CMP_INOUT CGU_Vec4ui CMP_REFINOUT pixel, CGU_UINT32 rotation)
- {
- #ifdef ASPM_GPU
- if (1 == rotation)
- {
- pixel.ra = pixel.ar;
- }
- else if (2 == rotation)
- {
- pixel.ga = pixel.ag;
- }
- else if (3 == rotation)
- {
- pixel.ba = pixel.ab;
- }
- #else
- CGU_UINT32 r, g, b, a;
- r = pixel.r;
- g = pixel.g;
- b = pixel.b;
- a = pixel.a;
- if (1 == rotation)
- {
- pixel.r = a;
- pixel.a = r;
- }
- else if (2 == rotation)
- {
- pixel.g = a;
- pixel.a = g;
- }
- else if (3 == rotation)
- {
- pixel.b = a;
- pixel.a = b;
- }
- #endif
- }
- CGU_BOOL cmp_ImageHasAlpha(CGU_UINT32 threadBase)
- {
- #if defined(ENABLED_MODE6) || defined(ENABLE_CMP_MODE6)
- CGU_UINT32 alpha;
- for (CGU_INT ii = 0; ii < 16; ii++)
- {
- alpha = shared_temp[threadBase + ii].pixel.a;
- if ((alpha < 255))
- return true;
- }
- #endif
- return false;
- }
- #ifdef ENABLE_CMP_API
- CGU_UINT32 GetRamp2(CGU_UINT32 e0, CGU_UINT32 e1, CGU_UINT32 index, CGU_UINT32 indexprecision)
- {
- if (indexprecision == 2)
- return (CGU_UINT32)(((64 - aWeight[2][index]) * e0 + aWeight[2][index] * e1 + 32) >> 6);
- else if (indexprecision == 3)
- return (CGU_UINT32)(((64 - aWeight[1][index]) * e0 + aWeight[1][index] * e1 + 32) >> 6);
- else // indexprecision == 4
- return (CGU_UINT32)(((64 - aWeight[0][index]) * e0 + aWeight[0][index] * e1 + 32) >> 6);
- }
- //====================================== MODE 6 ==========================================
- void cmp_encode_apply_swap(CMP_INOUT CGU_Vec4ui epo_code_out[2], CMP_INOUT CGU_UINT32 block_index[2], CMP_IN CGU_INT bits)
- {
- CGU_UINT32 levels = 1 << bits;
- if ((block_index[0] & 15) >= levels / 2)
- {
- // swap end points
- CGU_Vec4ui t = epo_code_out[0];
- epo_code_out[0] = epo_code_out[1];
- epo_code_out[1] = t;
- block_index[0] = (CGU_UINT32)(0x11111111 * (levels - 1)) - block_index[0];
- block_index[1] = (CGU_UINT32)(0x11111111 * (levels - 1)) - block_index[1];
- }
- }
- CGU_INT cmp_Write32Bit(CMP_INOUT CGU_UINT32 base[4], CMP_IN CGU_INT offset, CMP_IN CGU_INT bits, CMP_IN CGU_UINT32 bitVal)
- {
- base[offset / 32] |= ((CGU_UINT32)bitVal) << (offset % 32);
- if (offset % 32 + bits > 32)
- {
- if ((offset / 32 + 1) < 4)
- base[(offset / 32) + 1] |= cmp_shift_right_uint32(bitVal, 32 - offset % 32);
- }
- offset += bits;
- return offset;
- }
- void cmp_encode_index2(CMP_INOUT CGU_UINT32 data[4], CMP_IN CGU_INT pPos, CMP_INOUT CGU_UINT32 color_index[2], CMP_IN CGU_INT bits, CMP_IN CGU_INT flips)
- {
- CGU_INT levels = 1 << bits;
- CGU_INT flips_shifted = flips;
- for (CGU_INT k1 = 0; k1 < 2; k1++)
- {
- CGU_UINT32 qbits_shifted = color_index[k1];
- for (CGU_INT k2 = 0; k2 < 8; k2++)
- {
- CGU_UINT32 q = qbits_shifted & 15;
- if ((flips_shifted & 1) > 0)
- q = (levels - 1) - q;
- if (k1 == 0 && k2 == 0)
- pPos = cmp_Write32Bit(data, pPos, bits - 1, q);
- else
- pPos = cmp_Write32Bit(data, pPos, bits, q);
- qbits_shifted >>= 4;
- flips_shifted >>= 1;
- }
- }
- }
- void cmp_eigen_vector(CMP_INOUT CGV_Vec4f CMP_REFINOUT eigen_vector,
- CMP_INOUT CGU_Vec4f CMP_REFINOUT image_mean,
- CMP_IN CGV_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries)
- {
- CGU_INT k;
- image_mean = 0.0f;
- eigen_vector = 0.0f;
- CGV_FLOAT vector_covOut[10];
- CGV_FLOAT covar[10] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
- CGV_Vec4f rgbasum = {0.0f, 0.0f, 0.0f, 0.0f};
- for (k = 0; k < numEntries; k++)
- {
- CGV_Vec4f rgba;
- rgba.x = image_src[k].x;
- rgba.y = image_src[k].y;
- rgba.z = image_src[k].z;
- rgba.w = image_src[k].w;
- rgbasum.x += rgba.x;
- rgbasum.y += rgba.y;
- rgbasum.z += rgba.z;
- rgbasum.w += rgba.w;
- covar[0] += rgba.x * rgba.x; //covar[0].x => covar[0]
- covar[1] += rgba.x * rgba.y; //covar[0].y => covar[1]
- covar[2] += rgba.x * rgba.z; //covar[0].z => covar[2]
- covar[3] += rgba.x * rgba.w; //covar[0].w => covar[3]
- covar[4] += rgba.y * rgba.y; //covar[1].y => covar[4]
- covar[5] += rgba.y * rgba.z; //covar[1].z => covar[5]
- covar[6] += rgba.y * rgba.w; //covar[1].w => covar[6]
- covar[7] += rgba.z * rgba.z; //covar[2].z => covar[7]
- covar[8] += rgba.z * rgba.w; //covar[2].w => covar[8]
- covar[9] += rgba.w * rgba.w; //covar[3].w => covar[9]
- }
- image_mean = rgbasum / (CGV_FLOAT)numEntries;
- vector_covOut[0] = covar[0] - (rgbasum.x * rgbasum.x / numEntries);
- vector_covOut[1] = covar[1] - (rgbasum.x * rgbasum.y / numEntries);
- vector_covOut[2] = covar[2] - (rgbasum.x * rgbasum.z / numEntries);
- vector_covOut[3] = covar[3] - (rgbasum.x * rgbasum.w / numEntries);
- vector_covOut[4] = covar[4] - (rgbasum.y * rgbasum.y / numEntries);
- vector_covOut[5] = covar[5] - (rgbasum.y * rgbasum.z / numEntries);
- vector_covOut[6] = covar[6] - (rgbasum.y * rgbasum.w / numEntries);
- vector_covOut[7] = covar[7] - (rgbasum.z * rgbasum.z / numEntries);
- vector_covOut[8] = covar[8] - (rgbasum.z * rgbasum.w / numEntries);
- vector_covOut[9] = covar[9] - (rgbasum.w * rgbasum.w / numEntries);
- CGV_FLOAT inv_var = 1.0 / (256 * 256); // GPU multiply is faster 1.5258789062500000e-05
- for (k = 0; k < 10; k++)
- {
- vector_covOut[k] = vector_covOut[k] * inv_var;
- }
- // Compute eigen_vector
- CGV_Vec4f vec = {1.0f, 1.0f, 1.0f, 1.0f};
- CGU_INT powerIterations = 6; // 4 not enough for HQ : can use quality to set ranges from 2..n
- for (k = 0; k < powerIterations; k++)
- {
- eigen_vector.x = vector_covOut[0] * vec.x + vector_covOut[1] * vec.y + vector_covOut[2] * vec.z + vector_covOut[3] * vec.w;
- eigen_vector.y = vector_covOut[1] * vec.x + vector_covOut[4] * vec.y + vector_covOut[5] * vec.z + vector_covOut[6] * vec.w;
- eigen_vector.z = vector_covOut[2] * vec.x + vector_covOut[5] * vec.y + vector_covOut[7] * vec.z + vector_covOut[8] * vec.w;
- eigen_vector.w = vector_covOut[3] * vec.x + vector_covOut[6] * vec.y + vector_covOut[8] * vec.z + vector_covOut[9] * vec.w;
- // renormalize every other iteration
- if (k % 2 == 1)
- {
- CGV_FLOAT norm_sq = cmp_dot4f(eigen_vector, eigen_vector);
- CGV_FLOAT rnorm = cmp_Image_rsqrt(norm_sq);
- vec = eigen_vector * rnorm;
- }
- else
- vec = eigen_vector;
- }
- eigen_vector = vec;
- //printf("eigen_vector [%1.8f,%1.3f,%1.8f,%1.8f]\n", eigen_vector.x, eigen_vector.y, eigen_vector.z, eigen_vector.w);
- }
- void cmp_endpoints2(CMP_INOUT CGU_Vec4ui end_points_out[2], CMP_IN CGV_Vec4f ext[2], CMP_IN CGV_Vec4f eigen_vector, CMP_IN CGV_Vec4f image_mean)
- {
- CGV_FLOAT levelHigh = 255; // Mode 6 levels = 1 << bits = 128 then use (level * 2) - 1
- CGV_FLOAT levelLow = 254; // Mode 6 levels = 1 << bits = 128 then use (level * 2) - 2
- CGV_Vec4f qep_b[2];
- CGV_FLOAT err0 = 0.0f;
- CGV_FLOAT err1 = 0.0f;
- CGV_Vec4f block_endpoints[2];
- block_endpoints[0] = ext[0] * eigen_vector + image_mean;
- block_endpoints[1] = ext[1] * eigen_vector + image_mean;
- for (CGU_INT subset = 0; subset < 2; subset++)
- { // this code effects quality
- qep_b[0].x = cmp_clampf((CGV_INT)((block_endpoints[subset].x / 255.0f * levelHigh) / 2.0f + 0.5f) * 2.0f, 0, levelLow);
- qep_b[0].y = cmp_clampf((CGV_INT)((block_endpoints[subset].y / 255.0f * levelHigh) / 2.0f + 0.5f) * 2.0f, 0, levelLow);
- qep_b[0].z = cmp_clampf((CGV_INT)((block_endpoints[subset].z / 255.0f * levelHigh) / 2.0f + 0.5f) * 2.0f, 0, levelLow);
- qep_b[0].w = cmp_clampf((CGV_INT)((block_endpoints[subset].w / 255.0f * levelHigh) / 2.0f + 0.5f) * 2.0f, 0, levelLow);
- qep_b[1].x = cmp_clampf((CGV_INT)((block_endpoints[subset].x / 255.0f * levelHigh - 1) / 2.0f + 0.5f) * 2 + 1, 1, levelHigh);
- qep_b[1].y = cmp_clampf((CGV_INT)((block_endpoints[subset].y / 255.0f * levelHigh - 1) / 2.0f + 0.5f) * 2 + 1, 1, levelHigh);
- qep_b[1].z = cmp_clampf((CGV_INT)((block_endpoints[subset].z / 255.0f * levelHigh - 1) / 2.0f + 0.5f) * 2 + 1, 1, levelHigh);
- qep_b[1].w = cmp_clampf((CGV_INT)((block_endpoints[subset].w / 255.0f * levelHigh - 1) / 2.0f + 0.5f) * 2 + 1, 1, levelHigh);
- err0 = cmp_dot4f(block_endpoints[subset] - qep_b[0], block_endpoints[subset] - qep_b[0]);
- err1 = cmp_dot4f(block_endpoints[subset] - qep_b[1], block_endpoints[subset] - qep_b[1]);
- if (subset == 0)
- {
- end_points_out[1].x = (err0 < err1) ? qep_b[0].x : qep_b[1].x;
- end_points_out[1].y = (err0 < err1) ? qep_b[0].y : qep_b[1].y;
- end_points_out[1].z = (err0 < err1) ? qep_b[0].z : qep_b[1].z;
- end_points_out[1].w = (err0 < err1) ? qep_b[0].w : qep_b[1].w;
- }
- else
- {
- end_points_out[0].x = ((err0 < err1) ? qep_b[0].x : qep_b[1].x);
- end_points_out[0].y = ((err0 < err1) ? qep_b[0].y : qep_b[1].y);
- end_points_out[0].z = ((err0 < err1) ? qep_b[0].z : qep_b[1].z);
- end_points_out[0].w = ((err0 < err1) ? qep_b[0].w : qep_b[1].w);
- }
- }
- }
- void cmp_block_endpoints(CMP_INOUT CGU_Vec4ui end_points_out[2],
- CMP_IN CGV_Vec4f eigen_vector,
- CMP_IN CGV_Vec4f image_mean,
- CMP_IN CGU_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries, //IN: range 0..15 (MAX_SUBSET_SIZE)
- CMP_IN CGU_INT partition_mask // 0xFFFF:FFFF
- )
- {
- CGV_Vec4f ext[2] = {{255.0f, 255.0f, 255.0f, 255.0f}, {0.0f, 0.0f, 0.0f, 0.0f}};
- // find min/max
- CGV_INT mask_shifted = partition_mask << 1;
- for (CGU_INT k3 = 0; k3 <= numEntries; k3++)
- {
- mask_shifted >>= 1;
- if ((mask_shifted & 1) == 0)
- continue;
- CGV_FLOAT dot = 0;
- CGV_Vec4f diff;
- diff.x = image_src[k3].x - image_mean.x;
- diff.y = image_src[k3].y - image_mean.y;
- diff.z = image_src[k3].z - image_mean.z;
- diff.w = image_src[k3].w - image_mean.w;
- dot += cmp_dot4f(eigen_vector, diff);
- ext[0].x = cmp_minf(ext[0].x, dot);
- ext[0].y = cmp_minf(ext[0].y, dot);
- ext[0].z = cmp_minf(ext[0].z, dot);
- ext[0].w = cmp_minf(ext[0].w, dot);
- ext[1].x = cmp_maxf(ext[1].x, dot);
- ext[1].y = cmp_maxf(ext[1].y, dot);
- ext[1].z = cmp_maxf(ext[1].z, dot);
- ext[1].w = cmp_maxf(ext[1].w, dot);
- }
- // create some distance if the endpoints collapse
- if (ext[1].x - ext[0].x < 1.0f)
- {
- ext[0] -= 0.5f;
- ext[1] += 0.5f;
- }
- cmp_endpoints2(end_points_out, ext, eigen_vector, image_mean);
- }
- CGV_UINT8 clampIndex2(CGV_UINT8 v, CGV_UINT8 a, CGV_UINT8 b)
- {
- if (v < a)
- return a;
- else if (v > b)
- return b;
- return v;
- }
- void cmp_block_index(CMP_INOUT CGU_UINT32 index_out[16],
- CMP_IN CGV_Vec4f eigen_vector,
- CMP_IN CGV_Vec4f image_mean,
- CMP_IN CGU_Vec4ui image_src[16],
- CMP_IN CGU_UINT32 numEntries // Range 0..15 (MAX_SUBSET_SIZE)
- )
- {
- //=====================
- // Get Projected Index
- //=====================
- CGV_FLOAT image_projected[16];
- CGV_FLOAT image_v[16];
- CGV_FLOAT image_z[16];
- CGV_FLOAT projected_high; // Values are +ve about centered image projection
- CGV_FLOAT projected_low; // Values are -ve about centered image projection
- CGV_FLOAT image_s;
- //====================================================================
- // Center the image to new coordinate axis centered at the mean value
- //====================================================================
- CGV_Vec4f image_centered[16];
- CGV_Vec4f diff;
- for (CGU_UINT32 k1 = 0; k1 <= numEntries; k1++)
- {
- diff.x = image_src[k1].x - image_mean.x;
- diff.y = image_src[k1].y - image_mean.y;
- diff.z = image_src[k1].z - image_mean.z;
- diff.w = image_src[k1].w - image_mean.w;
- image_centered[k1] = diff * eigen_vector;
- image_projected[k1] = image_centered[k1].x + image_centered[k1].y + image_centered[k1].z + image_centered[k1].w;
- }
- projected_high = image_projected[0];
- projected_low = image_projected[0];
- for (CGU_UINT32 i1 = 1; i1 <= numEntries; i1++)
- {
- if (projected_high < image_projected[i1])
- projected_high = image_projected[i1];
- if (projected_low > image_projected[i1])
- projected_low = image_projected[i1];
- }
- CGV_FLOAT img_diff = projected_low - projected_high;
- if (img_diff == 0.0f)
- return;
- image_s = numEntries / img_diff;
- // Get initial index projection
- for (CGU_UINT32 idx = 0; idx <= numEntries; idx++)
- {
- image_v[idx] = image_projected[idx] * image_s;
- image_z[idx] = floor(image_v[idx] + 0.5F - projected_high * image_s);
- index_out[idx] = (CGV_UINT32)image_z[idx];
- }
- // get minimum index
- CGU_UINT32 index_min = index_out[0];
- for (CGU_UINT32 i3 = 1; i3 <= numEntries; i3++)
- {
- if (index_out[i3] < index_min)
- index_min = index_out[i3];
- }
- // Reposition all index by min index (using min index as 0)
- //printf("index : ");
- for (CGU_UINT32 i4 = 0; i4 <= numEntries; i4++)
- {
- index_out[i4] = clampIndex2(index_out[i4] - index_min, 0, 15);
- //printf("%02x,", index_out[i4]);
- }
- //printf("\n");
- }
- CGU_UINT32 cmp_calcblockerr(CGU_Vec4ui endPoint_in[2], CGU_Vec4ui image_src[16])
- {
- CGU_UINT32 error = 0;
- CGU_Vec4ui pixel = image_src[0];
- CGU_Vec4ui endPoint[2];
- CGU_Vec4i pixelDiff;
- endPoint[0] = endPoint_in[0];
- endPoint[1] = endPoint_in[1];
- pixelDiff.x = pixel.x - endPoint[0].x;
- pixelDiff.y = pixel.y - endPoint[0].y;
- pixelDiff.z = pixel.z - endPoint[0].z;
- pixelDiff.w = pixel.w - endPoint[0].w;
- CGU_Vec4i span;
- CGU_Vec2i span_norm_sqr;
- CGU_Vec2i dotProduct;
- span.x = endPoint[1].x - endPoint[0].x;
- span.y = endPoint[1].y - endPoint[0].y;
- span.z = endPoint[1].z - endPoint[0].z;
- span.w = endPoint[1].w - endPoint[0].w;
- span_norm_sqr = cmp_dotVec4i(span, span);
- dotProduct = cmp_dotVec4i(span, pixelDiff);
- if (span_norm_sqr.x > 0 && dotProduct.x >= 0 && CGU_UINT32(dotProduct.x * 63.49999) > CGU_UINT32(32 * span_norm_sqr.x))
- {
- span.x = -span.x;
- span.y = -span.y;
- span.z = -span.z;
- span.w = -span.w;
- swap(endPoint[0], endPoint[1]);
- }
- CGU_UINT32 color_index;
- CGU_Vec4ui pixel_r;
- for (CGU_UINT32 i = 0; i < 16; i++)
- {
- pixel = image_src[i];
- pixelDiff.x = pixel.x - endPoint[0].x;
- pixelDiff.y = pixel.y - endPoint[0].y;
- pixelDiff.z = pixel.z - endPoint[0].z;
- pixelDiff.w = pixel.w - endPoint[0].w;
- dotProduct.x = cmp_dotVec4i(span, pixelDiff);
- color_index = (span_norm_sqr.x <= 0 || dotProduct.x <= 0)
- ? 0
- : ((dotProduct.x < span_norm_sqr.x) ? aStep[0][CGU_UINT32(dotProduct.x * 63.49999 / span_norm_sqr.x)] : aStep[0][63]);
- pixel_r = (endPoint[0] * (64 - aWeight[0][color_index]) + endPoint[1] * aWeight[0][color_index] + 32u) >> 6;
- Ensure_A_Is_Larger(pixel_r, pixel);
- pixel_r -= pixel;
- error += ComputeError(pixel_r, pixel_r);
- }
- return error;
- }
- CGU_FLOAT cmp_GetIndexedEndPoints(CMP_INOUT CGU_Vec4ui epo_code_out[2],
- CMP_INOUT CGU_UINT32 index_out[16],
- CMP_IN CGU_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGU_INT partition_mask)
- {
- CGV_Vec4f image_mean = {0.0f, 0.0f, 0.0f, 0.0f};
- CGV_Vec4f eigen_vector;
- for (CGU_INT i0 = 0; i0 < 16; i0++)
- index_out[i0] = 0;
- cmp_eigen_vector(eigen_vector, image_mean, image_src, numEntries);
- cmp_block_endpoints(epo_code_out, eigen_vector, image_mean, image_src, numEntries, partition_mask);
- cmp_block_index(index_out, eigen_vector, image_mean, image_src, numEntries);
- CGU_UINT32 besterr = cmp_calcblockerr(epo_code_out, image_src);
- return besterr;
- }
- void cmp_encode_mode6(CMP_INOUT CGU_UINT32 cmp_out[4], CMP_IN CGU_Vec4ui epo_code_out[2], CMP_IN CGU_UINT32 packed_color_index[2])
- {
- cmp_encode_apply_swap(epo_code_out, packed_color_index, 4);
- CGU_INT k;
- for (k = 0; k < 4; k++)
- cmp_out[k] = 0;
- CGU_INT pos = 0;
- // mode 6
- pos = cmp_Write32Bit(cmp_out, pos, 7, 64);
- // endpoints
- pos = cmp_Write32Bit(cmp_out, pos, 7, epo_code_out[0].x >> 1);
- pos = cmp_Write32Bit(cmp_out, pos, 7, epo_code_out[1].x >> 1);
- pos = cmp_Write32Bit(cmp_out, pos, 7, epo_code_out[0].y >> 1);
- pos = cmp_Write32Bit(cmp_out, pos, 7, epo_code_out[1].y >> 1);
- pos = cmp_Write32Bit(cmp_out, pos, 7, epo_code_out[0].z >> 1);
- pos = cmp_Write32Bit(cmp_out, pos, 7, epo_code_out[1].z >> 1);
- pos = cmp_Write32Bit(cmp_out, pos, 7, epo_code_out[0].w >> 1);
- pos = cmp_Write32Bit(cmp_out, pos, 7, epo_code_out[1].w >> 1);
- // p bits
- pos = cmp_Write32Bit(cmp_out, pos, 1, epo_code_out[0].x & 1);
- pos = cmp_Write32Bit(cmp_out, pos, 1, epo_code_out[1].x & 1);
- // quantized values
- cmp_encode_index2(cmp_out, pos, packed_color_index, 4, 0);
- }
- //====================================== MODES 01237 ==========================================
- CGU_UINT32 index_collapse2(CMP_INOUT CGU_UINT32 index[16], CGU_UINT32 numEntries)
- {
- CGU_UINT32 minIndex = index[0];
- CGU_UINT32 MaxIndex = index[0];
- for (CGU_UINT32 km = 1; km < numEntries; km++)
- {
- if (index[km] < minIndex)
- minIndex = index[km];
- if (index[km] > MaxIndex)
- MaxIndex = index[km];
- }
- if (MaxIndex == 0)
- return 0;
- CGU_UINT32 D = 1;
- for (CGU_UINT32 d = 2; d <= MaxIndex - minIndex; d++)
- {
- for (CGU_UINT32 ent = 0U; ent < numEntries; ent++)
- {
- CGU_UINT8 imod = (index[ent] - minIndex);
- if (fmod(imod, d) > 0.0f)
- {
- if (ent >= numEntries)
- D = d;
- break;
- }
- }
- }
- CGU_FLOAT invD = 1.0f / D;
- for (CGU_UINT32 ki = 0; ki < numEntries; ki++)
- {
- index[ki] = (CGU_UINT32)((index[ki] - minIndex) * invD);
- }
- for (CGU_UINT32 k = 1; k < numEntries; k++)
- {
- if (index[k] > MaxIndex)
- MaxIndex = index[k];
- }
- return (MaxIndex);
- }
- INLINE void GetClusterMean2(CMP_INOUT CGV_Vec4f image_cluster_mean[16],
- CMP_IN CGU_Vec4ui image_src[16],
- CMP_IN CGU_UINT32 index_cluster[16],
- CMP_IN CGU_UINT32 numEntries, // < 16
- CMP_IN CGU_UINT32 channels3or4)
- { // IN: 3 = RGB or 4 = RGBA (4 = MAX_CHANNELS)
- // unused index values are underfined
- CGU_UINT32 i_cnt[16];
- CGU_UINT32 i_comp[16];
- CGU_UINT32 idx;
- for (CGU_UINT32 i0 = 0; i0 < numEntries; i0++)
- {
- idx = index_cluster[i0] & 0x0F;
- i_cnt[idx] = 0;
- image_cluster_mean[idx] = 0.0f;
- }
- CGU_UINT32 ic = 0;
- for (CGU_UINT32 i1 = 0; i1 < numEntries; i1++)
- {
- idx = index_cluster[i1] & 0x0F;
- if (i_cnt[idx] == 0)
- i_comp[ic++] = idx;
- i_cnt[idx]++;
- image_cluster_mean[idx].x += image_src[i1].x;
- image_cluster_mean[idx].y += image_src[i1].y;
- image_cluster_mean[idx].z += image_src[i1].z;
- image_cluster_mean[idx].w += image_src[i1].w;
- }
- for (CGU_UINT32 i = 0; i < ic; i++)
- {
- CGU_UINT32 icmp = i_comp[i];
- if (i_cnt[icmp] != 0)
- {
- image_cluster_mean[icmp].x = (CGV_FLOAT)floor((image_cluster_mean[icmp].x / (CGV_FLOAT)i_cnt[icmp]) + 0.5F);
- image_cluster_mean[icmp].y = (CGV_FLOAT)floor((image_cluster_mean[icmp].y / (CGV_FLOAT)i_cnt[icmp]) + 0.5F);
- image_cluster_mean[icmp].z = (CGV_FLOAT)floor((image_cluster_mean[icmp].z / (CGV_FLOAT)i_cnt[icmp]) + 0.5F);
- if (channels3or4 == 4)
- image_cluster_mean[icmp].w = (CGV_FLOAT)floor((image_cluster_mean[icmp].w / (CGV_FLOAT)i_cnt[icmp]) + 0.5F);
- else
- image_cluster_mean[icmp].w = 0.0f;
- }
- }
- }
- #ifndef ASPM_HLSL // CPU Version
- #define USE_OLDCODE
- INLINE CGU_UINT8 cmp_get_partition_subset2(CMP_IN CGU_INT part_id, CMP_IN CGU_INT maxSubsets, CMP_IN CGU_INT index)
- {
- if (maxSubsets == 2)
- {
- CGU_UINT32 mask_packed = subset_mask_table2[part_id];
- return ((mask_packed & (0x01 << index)) ? 1 : 0); // This can be moved to caller, just return mask!!
- }
- // 3 region subsets
- part_id += 64;
- CGU_UINT32 mask0 = subset_mask_table2[part_id] & 0xFFFF;
- CGU_UINT32 mask1 = subset_mask_table2[part_id] >> 16;
- CGU_UINT32 mask = 0x01 << index;
- return ((mask1 & mask) ? 2 : 0 + (mask0 & mask) ? 1 : 0); // This can be moved to caller, just return mask!!
- }
- void cmp_GetPartitionSubSet2_mode01237(CMP_INOUT CGV_Vec4ui image_subsets[3][16], // OUT: Subset pattern mapped with image src colors
- CMP_INOUT CGU_INT entryCount_out[3], // OUT: Number of entries per subset
- CMP_IN CGU_UINT8 partition, // Partition Shape 0..63
- CMP_IN CGV_Vec4ui image_src[16], // Image colors
- CMP_IN CGU_INT blockMode, // [0,1,2,3 or 7]
- CMP_IN CGU_UINT8 channels3or4)
- { // 3 = RGB or 4 = RGBA (4 = MAX_CHANNELS)
- CGU_UINT8 maxSubsets = 2;
- if (blockMode == 0 || blockMode == 2)
- maxSubsets = 3;
- entryCount_out[0] = 0;
- entryCount_out[1] = 0;
- entryCount_out[2] = 0;
- for (CGU_INT i = 0; i < 16; i++)
- {
- CGU_UINT8 subset = cmp_get_partition_subset2(partition, maxSubsets, i);
- image_subsets[subset][entryCount_out[subset]].x = image_src[i].x;
- image_subsets[subset][entryCount_out[subset]].y = image_src[i].y;
- image_subsets[subset][entryCount_out[subset]].z = image_src[i].z;
- // if we have only 3 channels then set the alpha subset to 0
- if (channels3or4 == 3)
- image_subsets[subset][entryCount_out[subset]].w = 0.0F;
- else
- image_subsets[subset][entryCount_out[subset]].w = image_src[i].w;
- entryCount_out[subset]++;
- }
- }
- void cmp_GetImageCentered(CMP_INOUT CGV_Vec4f image_centered[16],
- CMP_INOUT CGV_Vec4f CMP_REFINOUT mean_out,
- CMP_IN CGV_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGU_UINT8 channels3or4)
- {
- (channels3or4);
- mean_out = 0.0f;
- CGU_INT k;
- for (k = 0; k < numEntries; k++)
- {
- mean_out.x = mean_out.x + image_src[k].x;
- mean_out.y = mean_out.y + image_src[k].y;
- mean_out.z = mean_out.z + image_src[k].z;
- if (channels3or4 == 4)
- mean_out.w = mean_out.w + image_src[k].w;
- }
- mean_out /= (CGV_FLOAT)numEntries;
- for (k = 0; k < numEntries; k++)
- {
- image_centered[k].x = image_src[k].x - mean_out.x;
- image_centered[k].y = image_src[k].y - mean_out.y;
- image_centered[k].z = image_src[k].z - mean_out.z;
- if (channels3or4 == 4)
- image_centered[k].w = image_src[k].w - mean_out.w;
- }
- }
- void cmp_GetCovarianceVector(CMP_INOUT CGV_FLOAT covariance_out[16],
- CMP_IN CGV_Vec4f image_centered[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGU_UINT8 channels3or4)
- {
- CGU_UINT8 ch1;
- CGU_UINT8 ch2;
- CGU_INT k;
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- for (ch2 = 0; ch2 <= ch1; ch2++)
- {
- covariance_out[ch1 + ch2 * 4] = 0;
- for (k = 0; k < numEntries; k++)
- covariance_out[ch1 + ch2 * 4] += image_centered[k][ch1] * image_centered[k][ch2];
- }
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- for (ch2 = ch1 + 1; ch2 < channels3or4; ch2++)
- covariance_out[ch1 + ch2 * 4] = covariance_out[ch2 + ch1 * 4];
- }
- void cmp_GetEigenVector(CMP_INOUT CGV_Vec4f CMP_REFINOUT EigenVector_out, // Normalized Eigen Vector output
- CMP_IN CGV_FLOAT CovarianceVector[16], // Covariance Vector
- CMP_IN CGU_UINT8 channels3or4)
- {
- CGV_FLOAT vector_covIn[16];
- CGV_FLOAT vector_covOut[16];
- CGV_FLOAT vector_maxCovariance;
- CGU_UINT8 ch1;
- CGU_UINT8 ch2;
- CGU_UINT8 ch3;
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- for (ch2 = 0; ch2 < channels3or4; ch2++)
- {
- vector_covIn[ch1 + ch2 * 4] = CovarianceVector[ch1 + ch2 * 4];
- }
- vector_maxCovariance = 0;
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- {
- if (vector_covIn[ch1 + ch1 * 4] > vector_maxCovariance)
- vector_maxCovariance = vector_covIn[ch1 + ch1 * 4];
- }
- // Normalize Input Covariance Vector
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- for (ch2 = 0; ch2 < channels3or4; ch2++)
- {
- if (vector_maxCovariance > 0)
- vector_covIn[ch1 + ch2 * 4] = vector_covIn[ch1 + ch2 * 4] / vector_maxCovariance;
- }
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- {
- for (ch2 = 0; ch2 < channels3or4; ch2++)
- {
- CGV_FLOAT vector_temp_cov = 0;
- for (ch3 = 0; ch3 < channels3or4; ch3++)
- {
- vector_temp_cov = vector_temp_cov + vector_covIn[ch1 + ch3 * 4] * vector_covIn[ch3 + ch2 * 4];
- }
- vector_covOut[ch1 + ch2 * 4] = vector_temp_cov;
- }
- }
- vector_maxCovariance = 0;
- CGU_INT maxCovariance_channel = 0;
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- {
- if (vector_covOut[ch1 + ch1 * 4] > vector_maxCovariance)
- {
- maxCovariance_channel = ch1;
- vector_maxCovariance = vector_covOut[ch1 + ch1 * 4];
- }
- }
- CGV_FLOAT vector_t = 0;
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- {
- vector_t = vector_t + vector_covOut[maxCovariance_channel + ch1 * 4] * vector_covOut[maxCovariance_channel + ch1 * 4];
- EigenVector_out[ch1] = vector_covOut[maxCovariance_channel + ch1 * 4];
- }
- // Normalize the Eigen Vector
- vector_t = sqrt(vector_t);
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- {
- if (vector_t > 0)
- EigenVector_out[ch1] = EigenVector_out[ch1] / vector_t;
- }
- }
- void cmp_GetProjecedImage(CMP_INOUT CGV_FLOAT projection_out[16],
- CMP_IN CGV_Vec4f image_centered[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGV_Vec4f EigenVector,
- CMP_IN CGU_UINT8 channels3or4)
- {
- // EigenVector must be normalized
- for (CGU_INT k = 0; k < numEntries; k++)
- {
- projection_out[k] = 0.0F;
- projection_out[k] = projection_out[k] + (image_centered[k].x * EigenVector.x);
- projection_out[k] = projection_out[k] + (image_centered[k].y * EigenVector.y);
- projection_out[k] = projection_out[k] + (image_centered[k].z * EigenVector.z);
- if (channels3or4 == 4)
- projection_out[k] = projection_out[k] + (image_centered[k].w * EigenVector.w);
- }
- }
- typedef struct
- {
- CGV_FLOAT image;
- CGU_UINT8 index;
- } CMP_di2;
- void cmp_GetProjectedIndex(CMP_INOUT CGU_UINT8 projected_index_out[16], //output: index, uncentered, in the range 0..clusters-1
- CMP_IN CGV_FLOAT image_projected[16], // image_block points, might be uncentered
- CMP_IN CGU_INT clusters, // clusters: number of points in the ramp (max 16)
- CMP_IN CGU_INT numEntries)
- {
- CMP_di2 what[16];
- CGV_FLOAT image_v[16];
- CGV_FLOAT image_z[16];
- CGV_FLOAT image_l;
- CGV_FLOAT image_mm;
- CGV_FLOAT image_r = 0.0F;
- CGV_FLOAT image_dm = 0.0F;
- CGV_FLOAT image_min;
- CGV_FLOAT image_max;
- CGV_FLOAT image_s;
- CGU_INT i;
- CGU_INT j;
- for (i = 0; i < 16; i++)
- projected_index_out[i] = 0;
- image_min = image_projected[0];
- image_max = image_projected[0];
- for (i = 1; i < numEntries; i++)
- {
- if (image_min < image_projected[i])
- image_min = image_projected[i];
- if (image_max > image_projected[i])
- image_max = image_projected[i];
- }
- CGV_FLOAT img_diff = image_max - image_min;
- if (img_diff == 0.0f)
- return;
- if (cmp_isnan(img_diff))
- return;
- image_s = (clusters - 1) / img_diff;
- for (i = 0; i < numEntries; i++)
- {
- image_v[i] = image_projected[i] * image_s;
- image_z[i] = floor(image_v[i] + 0.5F - image_min * image_s);
- projected_index_out[i] = (CGU_UINT8)image_z[i];
- what[i].image = image_v[i] - image_z[i] - image_min * image_s;
- what[i].index = i;
- image_dm += what[i].image;
- image_r += what[i].image * what[i].image;
- }
- if (numEntries * image_r - image_dm * image_dm >= (CGV_FLOAT)(numEntries - 1) / 8)
- {
- image_dm /= numEntries;
- for (i = 0; i < numEntries; i++)
- what[i].image -= image_dm;
- CGU_UINT8 tmp_index;
- CGV_FLOAT tmp_image;
- for (i = 1; i < numEntries; i++)
- {
- for (j = i; j > 0; j--)
- {
- if (what[j - 1].image > what[j].image)
- {
- tmp_index = what[j].index;
- tmp_image = what[j].image;
- what[j].index = what[j - 1].index;
- what[j].image = what[j - 1].image;
- what[j - 1].index = tmp_index;
- what[j - 1].image = tmp_image;
- }
- }
- }
- // got into fundamental simplex
- // move coordinate system origin to its center
- // i=0 < numEntries avoids varying int division by 0
- for (i = 0; i < numEntries; i++)
- {
- what[i].image = what[i].image - (CGV_FLOAT)(((2.0f * i + 1) - numEntries) / (2.0f * numEntries));
- }
- image_mm = 0.0F;
- image_l = 0.0F;
- j = -1;
- for (i = 0; i < numEntries; i++)
- {
- image_l += what[i].image;
- if (image_l < image_mm)
- {
- image_mm = image_l;
- j = i;
- }
- }
- j = j + 1;
- // avoid j = j%numEntries use this
- while (j > numEntries)
- j = j - numEntries;
- for (i = j; i < numEntries; i++)
- {
- CGU_UINT8 idx = what[i].index;
- CGU_UINT8 pidx = projected_index_out[idx] + 1; //gather_index(projected_index_out,idx)+1;
- projected_index_out[idx] = pidx; // scatter_index(projected_index_out,idx,pidx);
- }
- }
- // get minimum index
- CGU_UINT8 index_min = projected_index_out[0];
- for (i = 1; i < numEntries; i++)
- {
- if (projected_index_out[i] < index_min)
- index_min = projected_index_out[i];
- }
- // reposition all index by min index (using min index as 0)
- for (i = 0; i < numEntries; i++)
- {
- projected_index_out[i] = cmp_clampi(projected_index_out[i] - index_min, 0, 15);
- }
- }
- CGV_FLOAT cmp_err_Total(CMP_IN CGV_Vec4ui image_src1[16], CMP_IN CGV_Vec4f image_src2[16], CMP_IN CGU_INT numEntries, CMP_IN CGU_UINT8 channels3or4)
- {
- CGV_FLOAT err_t = 0.0F;
- for (CGU_INT k = 0; k < numEntries; k++)
- {
- err_t = err_t + cmp_squaref(image_src1[k].x - image_src2[k].x);
- err_t = err_t + cmp_squaref(image_src1[k].y - image_src2[k].y);
- err_t = err_t + cmp_squaref(image_src1[k].z - image_src2[k].z);
- if (channels3or4 == 4)
- err_t = err_t + cmp_squaref(image_src1[k].w - image_src2[k].w);
- }
- return err_t;
- };
- CGV_FLOAT cmp_GetQuantizeIndex_old(CMP_INOUT CGU_UINT8 index_out[16],
- CMP_IN CGV_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGU_INT numClusters,
- CMP_IN CGU_UINT8 channels3or4)
- {
- CGV_FLOAT covariance_vector[16];
- CGV_Vec4f image_centered[16];
- CGV_FLOAT image_projected[16];
- CGV_Vec4f image_mean = 0.0f;
- CGV_Vec4f eigen_vector = 0.0f;
- // Init vars
- for (CGU_INT ik = 0; ik < 16; ik++)
- {
- covariance_vector[ik] = 0.0f;
- image_centered[ik] = 0.0f;
- image_projected[ik] = 0.0f;
- }
- cmp_GetImageCentered(image_centered, image_mean, image_src, numEntries, channels3or4);
- cmp_GetCovarianceVector(covariance_vector, image_centered, numEntries, channels3or4);
- //-----------------------------------------------------
- // check if all covariances are the same
- // if so then set all index to same value 0 and return
- // use EPSILON to set the limit for all same limit
- //-----------------------------------------------------
- CGV_FLOAT image_covt = 0.0F;
- image_covt = covariance_vector[0];
- image_covt = image_covt + covariance_vector[5];
- image_covt = image_covt + covariance_vector[10];
- if (channels3or4 == 4)
- image_covt = image_covt + covariance_vector[15];
- if (image_covt < 0.00390625f)
- {
- for (CGU_INT i = 0; i < 16; i++)
- index_out[i] = 0;
- return 0.0f;
- }
- cmp_GetEigenVector(eigen_vector, covariance_vector, channels3or4);
- cmp_GetProjecedImage(image_projected, image_centered, numEntries, eigen_vector, channels3or4);
- cmp_GetProjectedIndex(index_out, image_projected, numClusters, numEntries);
- //==========================================
- // Refine
- //==========================================
- CGV_FLOAT image_q = 0.0F;
- eigen_vector = 0.0f;
- for (CGU_INT k = 0; k < numEntries; k++)
- {
- eigen_vector.x = eigen_vector.x + image_centered[k].x * index_out[k];
- eigen_vector.y = eigen_vector.y + image_centered[k].y * index_out[k];
- eigen_vector.z = eigen_vector.z + image_centered[k].z * index_out[k];
- if (channels3or4 == 4)
- eigen_vector.w = eigen_vector.w + image_centered[k].w * index_out[k];
- }
- image_q = image_q + eigen_vector.x * eigen_vector.x;
- image_q = image_q + eigen_vector.y * eigen_vector.y;
- image_q = image_q + eigen_vector.z * eigen_vector.z;
- if (channels3or4 == 4)
- image_q = image_q + eigen_vector.w * eigen_vector.w;
- image_q = sqrt(image_q);
- // direction needs to be normalized
- if (image_q != 0.0F)
- eigen_vector = eigen_vector / image_q;
- // Get new projected data
- cmp_GetProjecedImage(image_projected, image_centered, numEntries, eigen_vector, channels3or4);
- cmp_GetProjectedIndex(index_out, image_projected, numClusters, numEntries);
- // Calc Error
- CGV_FLOAT image_t = 0.0F;
- CGV_FLOAT index_average = 0.0F;
- for (CGU_INT ik = 0; ik < numEntries; ik++)
- {
- index_average = index_average + index_out[ik];
- image_t = image_t + index_out[ik] * index_out[ik];
- }
- index_average = index_average / (CGV_FLOAT)numEntries;
- image_t = image_t - index_average * index_average * (CGV_FLOAT)numEntries;
- if (image_t != 0.0F)
- image_t = 1.0F / image_t;
- eigen_vector = 0.0f;
- for (CGU_INT nk = 0; nk < numEntries; nk++)
- {
- eigen_vector.x = eigen_vector.x + image_centered[nk].x * index_out[nk];
- eigen_vector.y = eigen_vector.y + image_centered[nk].y * index_out[nk];
- eigen_vector.z = eigen_vector.z + image_centered[nk].z * index_out[nk];
- if (channels3or4 == 4)
- eigen_vector.w = eigen_vector.w + image_centered[nk].w * index_out[nk];
- }
- CGV_Vec4f image_decomp[SOURCE_BLOCK_SIZE];
- for (CGU_UINT32 ii = 0; ii < SOURCE_BLOCK_SIZE; ii++)
- image_decomp[ii] = 0.0f;
- for (CGU_INT i = 0; i < numEntries; i++)
- {
- image_decomp[i].x = image_mean.x + eigen_vector.x * image_t * (index_out[i] - index_average);
- image_decomp[i].y = image_mean.y + eigen_vector.y * image_t * (index_out[i] - index_average);
- image_decomp[i].z = image_mean.z + eigen_vector.z * image_t * (index_out[i] - index_average);
- if (channels3or4 == 4)
- image_decomp[i].w = image_mean.w + eigen_vector.w * image_t * (index_out[i] - index_average);
- }
- CGV_FLOAT err_1 = cmp_err_Total(image_src, image_decomp, numEntries, channels3or4);
- return err_1;
- }
- typedef struct
- {
- CGV_FLOAT image;
- CGU_UINT8 index;
- } CMP_du2;
- void cmp_sortPartitionProjection(CMP_IN CGV_FLOAT projection[64], CMP_INOUT CGU_UINT8 order[64],
- CMP_IN CGU_UINT8 numPartitions) // max 64
- {
- CMP_du2 what[64];
- CGU_UINT8 Parti;
- CGU_UINT8 Partj;
- for (Parti = 0; Parti < numPartitions; Parti++)
- {
- what[Parti].index = Parti;
- what[Parti].image = projection[Parti];
- }
- CGU_UINT8 index;
- CGV_FLOAT data;
- for (Parti = 1; Parti < numPartitions; Parti++)
- {
- for (Partj = Parti; Partj > 0; Partj--)
- {
- if (what[Partj - 1].image > what[Partj].image)
- {
- index = what[Partj].index;
- data = what[Partj].image;
- what[Partj].index = what[Partj - 1].index;
- what[Partj].image = what[Partj - 1].image;
- what[Partj - 1].index = index;
- what[Partj - 1].image = data;
- }
- }
- }
- for (Parti = 0; Parti < numPartitions; Parti++)
- order[Parti] = what[Parti].index;
- };
- CGU_BOOL cmp_get_ideal_cluster(CMP_INOUT CGV_Vec4f image_cluster[2],
- CMP_IN CGU_UINT32 index_cluster[16],
- CMP_IN CGU_INT Mi_,
- CMP_IN CGV_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGU_UINT8 channels3or4)
- {
- // get ideal cluster centers
- CGV_Vec4f image_cluster_mean[16];
- for (CGU_INT ii = 0; ii < numEntries; ii++)
- {
- image_cluster_mean[ii] = 0.0f;
- }
- GetClusterMean2(image_cluster_mean, image_src, index_cluster, numEntries, channels3or4); // unrounded
- CGV_FLOAT image_matrix0[2] = {0, 0}; // matrix /inverse matrix
- CGV_FLOAT image_matrix1[2] = {0, 0}; // matrix /inverse matrix
- CGV_Vec4f image_rp[2]; // right part for RMS fit problem
- image_rp[0] = 0.0f;
- image_rp[1] = 0.0f;
- // weight with cnt if runnning on compacted index
- for (CGU_INT k = 0; k < numEntries; k++)
- {
- image_matrix0[0] += (Mi_ - index_cluster[k]) * (Mi_ - index_cluster[k]);
- image_matrix0[1] += index_cluster[k] * (Mi_ - index_cluster[k]); // im is symmetric
- image_matrix1[1] += index_cluster[k] * index_cluster[k];
- image_rp[0] += image_cluster_mean[index_cluster[k]] * (Mi_ - index_cluster[k]);
- image_rp[1] += image_cluster_mean[index_cluster[k]] * index_cluster[k];
- }
- CGV_FLOAT matrix_dd = image_matrix0[0] * image_matrix1[1] - image_matrix0[1] * image_matrix0[1];
- // assert(matrix_dd !=0);
- // matrix_dd=0 means that index_cidx[k] and (Mi_-index_cidx[k]) collinear which implies only one active index;
- // taken care of separately
- if (matrix_dd == 0)
- {
- image_cluster[0] = 0.0f;
- image_cluster[1] = 0.0f;
- return FALSE;
- }
- image_matrix1[0] = image_matrix0[0];
- image_matrix0[0] = image_matrix1[1] / matrix_dd;
- image_matrix1[1] = image_matrix1[0] / matrix_dd;
- image_matrix1[0] = image_matrix0[1] = -image_matrix0[1] / matrix_dd;
- CGV_FLOAT Mif = (CGV_FLOAT)Mi_;
- // values can exceed 255 here, clamp made no diff in quality!
- image_cluster[0] = (((image_rp[0] * image_matrix0[0]) + (image_rp[1] * image_matrix0[1])) * Mif);
- image_cluster[1] = (((image_rp[0] * image_matrix1[0]) + (image_rp[1] * image_matrix1[1])) * Mif);
- return TRUE;
- }
- CGV_FLOAT cmp_quant_solid_color(CMP_INOUT CGU_UINT32 index_out[16],
- CMP_INOUT CGV_Vec4ui epo_code_out[2],
- CMP_IN CGV_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGU_UINT8 Mi_,
- CMP_IN CGU_UINT8 bits[4],
- CMP_IN CGU_INT type,
- CMP_IN CGU_UINT8 channels3or4,
- CMP_IN CGU_INT blockMode)
- {
- #ifndef ASPM_GPU
- #if defined(USE_NEW_SP_ERR_IDX)
- CGU_INT clogBC7 = 0;
- CGU_INT iv = Mi_ + 1;
- while (iv >>= 1)
- clogBC7++;
- old_init_BC7ramps(); // first time call inits global
- #endif
- #endif
- CGU_INT index_bits = g_modesettings[blockMode].indexBits;
- CGV_Vec4ui epo_0[2];
- epo_0[0] = 0u;
- epo_0[1] = 0u;
- CGU_UINT8 image_log = 0;
- CGU_UINT8 image_idx = 0;
- CGU_BOOL use_par = FALSE;
- if (type != 0)
- use_par = TRUE;
- CGV_FLOAT error_1 = CMP_FLOAT_MAX;
- //CGU_UINT8 ch;
- CGU_UINT8 ch1;
- //CGU_INT k;
- CGU_INT i;
- for (CGU_INT pn = 0; pn < cmp_npv_nd[channels3or4 - 3][type] && (error_1 != 0.0F); pn++)
- {
- CGU_Vec4ui o1[2] = {{0u, 0u, 0u, 0u}, {2u, 2u, 2u, 2u}};
- CGU_Vec4ui o2[2] = {{0u, 0u, 0u, 0u}, {2u, 2u, 2u, 2u}};
- if (use_par == TRUE)
- {
- if (cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][0])
- o1[0][0] = 1;
- else
- o1[1][0] = 1;
- if (cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][0])
- o2[0][0] = 1;
- else
- o2[1][0] = 1;
- if (cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][1])
- o1[0][1] = 1;
- else
- o1[1][1] = 1;
- if (cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][1])
- o2[0][1] = 1;
- else
- o2[1][1] = 1;
- if (cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][2])
- o1[0][2] = 1;
- else
- o1[1][2] = 1;
- if (cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][2])
- o2[0][2] = 1;
- else
- o2[1][2] = 1;
- if (cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][3])
- o1[0][3] = 1;
- else
- o1[1][3] = 1;
- if (cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][3])
- o2[0][3] = 1;
- else
- o2[1][3] = 1;
- }
- CGU_INT image_tcr[MAX_CHANNELS];
- CGU_INT epo_dr_0[MAX_CHANNELS];
- CGV_FLOAT error_0 = CMP_FLOAT_MAX;
- for (CGU_UINT8 iclogBC7 = 0; iclogBC7 < (1 << index_bits) && (error_0 != 0); iclogBC7++)
- {
- CGV_FLOAT error_t = 0;
- CGU_INT t1o[MAX_CHANNELS], t2o[MAX_CHANNELS];
- for (ch1 = 0; ch1 < channels3or4; ch1++)
- {
- // D
- CGV_FLOAT error_ta = CMP_FLOAT_MAX;
- for (CGU_UINT8 t1 = o1[0][ch1]; t1 < o1[1][ch1]; t1++)
- {
- // C
- // This is needed for non-integer mean points of "collapsed" sets
- for (CGU_UINT8 t2 = o2[0][ch1]; t2 < o2[1][ch1]; t2++)
- {
- // B
- CGU_INT image_tf;
- CGU_INT image_tc;
- image_tf = (CGU_INT)floor(image_src[0][ch1]);
- image_tc = (CGU_INT)ceil(image_src[0][ch1]);
- #ifndef ASPM_GPU
- #ifdef USE_NEW_SP_ERR_IDX
- CGV_FLOAT err_tf = old_get_sperr(clogBC7, bits[ch1], image_tf, t1, t2, iclogBC7);
- CGV_FLOAT err_tc = old_get_sperr(clogBC7, bits[ch1], image_tc, t1, t2, iclogBC7);
- if (err_tf > err_tc)
- image_tcr[ch1] = image_tc;
- else if (err_tf < err_tc)
- image_tcr[ch1] = image_tf;
- else
- image_tcr[ch1] = (CGV_INT)floor(image_src[ch1][COMP_RED] + 0.5F);
-
- //===============================
- // Refine this for better quality!
- //===============================
- CGV_FLOAT error_tr;
- error_tr = old_get_sperr(clogBC7, bits[ch1], image_tcr[ch1], t1, t2, iclogBC7);
- error_tr = (error_tr * error_tr) +
- 2 * error_tr * old_img_absf(image_tcr[ch1] - image_src[ch1][COMP_RED]) +
- (image_tcr[ch1] - image_src[ch1][COMP_RED]) * (image_tcr[ch1] - image_src[ch1][COMP_RED]);
- if (error_tr < error_ta)
- {
- error_ta = error_tr;
- t1o[ch1] = t1;
- t2o[ch1] = t2;
- epo_dr_0[ch1] = cmp_clampi(image_tcr[ch1], 0, 255);
- }
- #endif
- #else
- image_tcr[ch1] = (CGU_INT)floor(image_src[0][ch1] + 0.5F);
- error_ta = 0;
- t1o[ch1] = t1;
- t2o[ch1] = t2;
- epo_dr_0[ch1] = cmp_clampi(image_tcr[ch1], 0, 255);
- #endif
-
- } // B
- } //C
- error_t += error_ta;
- } // D
- if (error_t <= error_0)
- {
- // We have a solid color: Use image src if on GPU
- image_log = iclogBC7;
- image_idx = image_log;
- #ifndef ASPM_GPU
- #ifdef USE_BC7_SP_ERR_IDX
- if (BC7EncodeRamps2.ramp_init) {
- for (CGU_UINT8 ch = 0; ch < channels3or4; ch++)
- {
- CGV_INT index = (CLT2(clogBC7) * 4 * 256 * 2 * 2 * 16 * 2) +
- (BTT2(bits[ch]) * 256 * 2 * 2 * 16 * 2) +
- (epo_dr_0[ch] * 2 * 2 * 16 * 2) +
- (t1o[ch] * 2 * 16 * 2) +
- (t2o[ch] * 16 * 2) +
- (iclogBC7 * 2);
- epo_0[0][ch] = BC7EncodeRamps2.sp_idx[index + 0] & 0xFF;
- epo_0[1][ch] = BC7EncodeRamps2.sp_idx[index + 1] & 0xFF;
- }
- }
- #endif
- #else
- CGU_UINT8 ch;
- CGU_UINT8 k;
- // This needs improving
- CGV_FLOAT MinC[4] = {255, 255, 255, 255};
- CGV_FLOAT MaxC[4] = {0, 0, 0, 0};
- // get min max colors
- for (ch = 0; ch < channels3or4; ch++)
- for (k = 0; k < numEntries; k++)
- {
- if (image_src[k][ch] < MinC[ch])
- MinC[ch] = image_src[k][ch];
- if (image_src[k][ch] > MaxC[ch])
- MaxC[ch] = image_src[k][ch];
- }
- epo_0[0][0] = (CGU_UINT8)MinC[0];
- epo_0[1][0] = (CGU_UINT8)MaxC[0];
- epo_0[0][1] = (CGU_UINT8)MinC[1];
- epo_0[1][1] = (CGU_UINT8)MaxC[1];
- epo_0[0][2] = (CGU_UINT8)MinC[2];
- epo_0[1][2] = (CGU_UINT8)MaxC[2];
- epo_0[0][3] = (CGU_UINT8)MinC[3];
- epo_0[1][3] = (CGU_UINT8)MaxC[3];
- #endif
- error_0 = error_t;
- }
- } // E
- if (error_0 < error_1)
- {
- image_idx = image_log;
- epo_code_out[0] = epo_0[0];
- epo_code_out[1] = epo_0[1];
- error_1 = error_0;
- }
- } //1
- // Get Image error
- CGV_Vec4f image_decomp[16];
- for (i = 0; i < numEntries; i++)
- {
- index_out[i] = image_idx;
- {
- image_decomp[i][0] = cmp_GetRamp(index_bits, bits[0], epo_code_out[0].x, epo_code_out[1].x, i);
- image_decomp[i][1] = cmp_GetRamp(index_bits, bits[1], epo_code_out[0].y, epo_code_out[1].y, i);
- image_decomp[i][2] = cmp_GetRamp(index_bits, bits[2], epo_code_out[0].z, epo_code_out[1].z, i);
- if (channels3or4 == 4)
- image_decomp[i][3] = cmp_GetRamp(index_bits, bits[3], epo_code_out[0].w, epo_code_out[1].w, i);
- }
- }
- // Do we need to do this rather then err_1 * numEntries
- CGV_FLOAT error_quant;
- error_quant = cmp_err_Total(image_src, image_decomp, numEntries, channels3or4);
- return error_quant;
- }
- INLINE CGV_FLOAT old_sq_image(CGV_FLOAT v)
- {
- return v * v;
- }
- CGV_FLOAT cmp_shake3(CMP_INOUT CGU_Vec4ui epo_code_shake[2],
- CMP_IN CGV_Vec4f image_cluster[2],
- CMP_IN CGU_UINT32 index_cidx[16],
- CMP_IN CGV_Vec4ui image_src[16],
- CMP_IN CGU_INT index_bits,
- CMP_IN CGU_INT type,
- CMP_IN CGU_UINT8 max_bits[4],
- CMP_IN CGU_UINT8 use_par,
- CMP_IN CGU_INT numEntries, // max 16
- CMP_IN CGU_UINT8 channels3or4)
- {
- CGV_FLOAT best_err = CMP_FLOAT_MAX;
- CGV_FLOAT err_ed[16] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
- CGU_INT epo_code_par[2][2][2][MAX_CHANNELS];
- for (CGU_UINT8 ch = 0; ch < channels3or4; ch++)
- {
- CGU_UINT8 ppA = 0;
- CGU_UINT8 ppB = 0;
- CGU_UINT8 rr = (use_par ? 2 : 1);
- CGU_INT epo_code_epi[2][2]; // first/second, coord, begin rage end range
- for (ppA = 0; ppA < rr; ppA++)
- { // loop max =2
- for (ppB = 0; ppB < rr; ppB++)
- { //loop max =2
- // set default ranges
- epo_code_epi[0][0] = epo_code_epi[0][1] = cmp_ep_find_floor2(image_cluster[0][ch], max_bits[ch], use_par, ppA);
- epo_code_epi[1][0] = epo_code_epi[1][1] = cmp_ep_find_floor2(image_cluster[1][ch], max_bits[ch], use_par, ppB);
- // set begin range
- epo_code_epi[0][0] -= ((epo_code_epi[0][0] < 1 ? epo_code_epi[0][0] : 1)) & (~use_par);
- epo_code_epi[1][0] -= ((epo_code_epi[1][0] < 1 ? epo_code_epi[1][0] : 1)) & (~use_par);
- // set end range
- epo_code_epi[0][1] += ((1 << max_bits[ch]) - 1 - epo_code_epi[0][1] < 2 ? (1 << max_bits[ch]) - 1 - epo_code_epi[0][1] : 2) & (~use_par);
- epo_code_epi[1][1] += ((1 << max_bits[ch]) - 1 - epo_code_epi[1][1] < 2 ? (1 << max_bits[ch]) - 1 - epo_code_epi[1][1] : 2) & (~use_par);
- CGU_INT step = (1 << use_par);
- err_ed[(ppA * 8) + (ppB * 4) + ch] = CMP_FLOAT_MAX;
- for (CGU_INT epo_p1 = epo_code_epi[0][0]; epo_p1 <= epo_code_epi[0][1]; epo_p1 += step)
- {
- for (CGU_INT epo_p2 = epo_code_epi[1][0]; epo_p2 <= epo_code_epi[1][1]; epo_p2 += step)
- {
- CGV_FLOAT image_square_diff = 0.0F;
- CGU_INT _mc = numEntries;
- CGV_FLOAT image_ramp;
- while (_mc > 0)
- {
- image_ramp = cmp_GetRamp(index_bits, max_bits[ch], epo_p1, epo_p2, index_cidx[_mc - 1]);
- image_square_diff += cmp_squaref(image_ramp - image_src[(_mc - 1)][ch]);
- _mc--;
- }
- if (image_square_diff < err_ed[(ppA * 8) + (ppB * 4) + ch])
- {
- err_ed[(ppA * 8) + (ppB * 4) + ch] = image_square_diff;
- epo_code_par[ppA][ppB][0][ch] = epo_p1;
- epo_code_par[ppA][ppB][1][ch] = epo_p2;
- }
- }
- }
- } // pp1
- } // pp0
- } // j
- //---------------------------------------------------------
- for (CGU_INT pn = 0; pn < cmp_npv_nd[channels3or4 - 3][type]; pn++)
- {
- CGV_FLOAT err_2 = 0.0F;
- CGU_INT d1;
- CGU_INT d2;
- for (CGU_UINT8 ch = 0; ch < channels3or4; ch++)
- {
- d1 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][ch];
- d2 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][ch];
- err_2 += err_ed[(d1 * 8) + (d2 * 4) + ch];
- }
- if (err_2 < best_err)
- {
- best_err = err_2;
- d1 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][0];
- d2 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][0];
- epo_code_shake[0][0] = epo_code_par[d1][d2][0][0];
- epo_code_shake[1][0] = epo_code_par[d1][d2][1][0];
- d1 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][1];
- d2 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][1];
- epo_code_shake[0][1] = epo_code_par[d1][d2][0][1];
- epo_code_shake[1][1] = epo_code_par[d1][d2][1][1];
- d1 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][2];
- d2 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][2];
- epo_code_shake[0][2] = epo_code_par[d1][d2][0][2];
- epo_code_shake[1][2] = epo_code_par[d1][d2][1][2];
- d1 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][0][3];
- d2 = cmp_par_vectors_nd[channels3or4 - 3][type][pn][1][3];
- epo_code_shake[0][3] = epo_code_par[d1][d2][0][3];
- epo_code_shake[1][3] = epo_code_par[d1][d2][1][3];
- }
- }
- return best_err;
- }
- CGV_FLOAT cmp_requantized_index(CMP_INOUT CGU_UINT8 index_out[16],
- CMP_INOUT CGU_Vec4ui epo_code_best[2],
- CMP_IN CGU_INT index_bits,
- CMP_IN CGU_UINT8 max_bits[4],
- CMP_IN CGV_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGU_UINT8 channels3or4)
- {
- //CGV_Vec4f image_requantize[16];
- //CGV_FLOAT err_r = 0.0F;
- CGU_UINT8 k;
- CGU_UINT8 ch;
- // for (k = 0; k < 16; k++)
- // {
- // image_requantize[k][0] = cmp_GetRamp(index_bits, max_bits[0], epo_code_best[0][0], epo_code_best[1][0], k);
- // image_requantize[k][1] = cmp_GetRamp(index_bits, max_bits[1], epo_code_best[0][1], epo_code_best[1][1], k);
- // image_requantize[k][2] = cmp_GetRamp(index_bits, max_bits[2], epo_code_best[0][2], epo_code_best[1][2], k);
- // if (channels3or4 == 4)
- // image_requantize[k][3] = cmp_GetRamp(index_bits, max_bits[3], epo_code_best[0][3], epo_code_best[1][3], k);
- // else
- // image_requantize[k][3] = 0.0f;
- // }
- //=========================================
- // requantized image based on new epo_code
- //=========================================
- CGV_FLOAT image_requantize[SOURCE_BLOCK_SIZE][MAX_CHANNELS];
- CGV_FLOAT err_r = 0.0F;
- for (ch = 0; ch < channels3or4; ch++)
- {
- for (k = 0; k < SOURCE_BLOCK_SIZE; k++)
- {
- image_requantize[k][ch] = cmp_GetRamp(index_bits, max_bits[ch], epo_code_best[0][ch], epo_code_best[1][ch], k);
- }
- }
- //=========================================
- // Calc the error for the requantized image
- //=========================================
- CGV_Vec4f imageDiff;
- //CGU_UINT8 block_entries = (1 << index_bits);
- //
- // for (k = 0; k < numEntries; k++)
- // {
- // CGV_FLOAT err_cmin = 262145.0f; // (256 * 256 * 4) + 1; CMP_FLOAT_MAX;
- // CGU_UINT8 hold_index = 0;
- // CGV_FLOAT image_err;
- //
- // for (CGU_UINT8 k1 = 0; k1 < block_entries; k1++)
- // {
- // imageDiff.x = image_requantize[k1].x - image_src[k].x;
- // imageDiff.y = image_requantize[k1].y - image_src[k].y;
- // imageDiff.z = image_requantize[k1].z - image_src[k].z;
- // imageDiff.w = image_requantize[k1].w - image_src[k].w;
- // image_err = cmp_dot4f(imageDiff, imageDiff);
- //
- // if (image_err < err_cmin)
- // {
- // err_cmin = image_err;
- // hold_index = k1;
- // }
- // }
- //
- // index_out[k] = hold_index;
- // err_r += err_cmin;
- // }
- //=========================================
- // Calc the error for the requantized image
- //=========================================
- for (k = 0; k < numEntries; k++)
- {
- CGV_FLOAT err_cmin = CMP_FLOAT_MAX;
- CGV_INT hold_index_j = 0;
- for (CGV_INT iclogBC7 = 0; iclogBC7 < (1 << index_bits); iclogBC7++)
- {
- CGV_FLOAT image_err = 0.0F;
- for (ch = 0; ch < channels3or4; ch++)
- {
- image_err += old_sq_image(image_requantize[iclogBC7][ch] - image_src[k][ch]);
- }
- if (image_err < err_cmin)
- {
- err_cmin = image_err;
- hold_index_j = iclogBC7;
- }
- }
- index_out[k] = (CGV_UINT8)hold_index_j;
- err_r += err_cmin;
- }
- return err_r;
- }
- CGV_FLOAT cmp_optimize_IndexAndEndPoints(CMP_INOUT CGU_Vec4ui epo_code_out[2],
- CMP_INOUT CGU_UINT32 index_io[16],
- CMP_INOUT CGU_UINT32 index_packed_out[2],
- CMP_IN CGV_Vec4ui image_src[16],
- CMP_IN CGU_INT numEntries,
- CMP_IN CGU_UINT8 Mi_,
- CMP_IN CGU_UINT8 bits,
- CMP_IN CGU_UINT8 channels3or4,
- CMP_IN CGU_FLOAT errorThreshold,
- CMP_IN CGU_INT blockMode)
- {
- CGV_FLOAT err_best = CMP_FLOAT_MAX;
- CGU_INT type;
- CGU_UINT8 channels2 = 2 * channels3or4;
- type = bits % channels2;
- CGU_UINT8 use_par = (type != 0);
- CGU_UINT8 max_bits[4] = {0, 0, 0, 0};
- CGU_UINT8 ch;
- CGU_INT k;
- for (ch = 0; ch < channels3or4; ch++)
- max_bits[ch] = (bits + channels2 - 1) / channels2;
- CGU_INT index_bits = g_modesettings[blockMode].indexBits;
- CGU_INT clt_clogBC7 = index_bits - 2;
-
- if (clt_clogBC7 > 3)
- return CMP_FLOAT_MAX;
-
- Mi_ = Mi_ - 1;
- CGU_UINT32 index_tmp[16];
- CGU_INT maxTry = MAX_TRY_SHAKER;
- for (k = 0; k < numEntries; k++)
- index_tmp[k] = cmp_clampui8(index_io[k], 0, 15);
- epo_code_out[0] = 0u;
- epo_code_out[1] = 0u;
- CGV_FLOAT err_requant = 0.0F;
- CGU_UINT8 MaxIndex;
- MaxIndex = index_collapse2(index_tmp, numEntries);
- //===============================
- // we have a solid color 4x4 block
- //===============================
- if (MaxIndex == 0)
- {
- return cmp_quant_solid_color(index_io, epo_code_out, image_src, numEntries, Mi_, max_bits, type, channels3or4, blockMode);
- }
- for (CGU_INT ii = 0; ii < maxTry; ii++)
- {
- //===============================
- // We have ramp colors to process
- //===============================
- CGV_FLOAT err_cluster = CMP_FLOAT_MAX;
- CGV_FLOAT err_shake;
- CGU_UINT32 index_cluster[16];
- CGU_Vec4ui epo_code_best[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- for (CGU_UINT8 ii2 = 0; ii2 < numEntries; ii2++)
- index_cluster[ii2] = 0;
- CGU_UINT8 mi = Mi_;
- for (CGU_UINT8 index_slope = 1; (index_slope * MaxIndex) <= mi; index_slope++)
- {
- CGV_Vec4f image_cluster[2] = {{0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f, 0.0f}};
- for (CGU_UINT8 index_offset = 0; index_offset <= (mi - index_slope * MaxIndex); index_offset++)
- {
- //-------------------------------------
- // set a new index data to try
- //-------------------------------------
- for (k = 0; k < numEntries; k++)
- index_cluster[k] = index_tmp[k] * index_slope + index_offset;
- if (cmp_get_ideal_cluster(image_cluster, index_cluster, Mi_, image_src, numEntries, channels3or4))
- {
- CGU_Vec4ui epo_code_shake[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- err_shake = cmp_shake3( epo_code_shake,
- image_cluster,
- index_cluster,
- image_src,
- index_bits,
- type,
- max_bits,
- use_par,
- numEntries,
- channels3or4);
- if (err_shake < err_cluster)
- {
- err_cluster = err_shake;
- epo_code_best[0] = epo_code_shake[0];
- epo_code_best[1] = epo_code_shake[1];
- }
- }
- }
- }
- if ((err_cluster != CMP_FLOAT_MAX))
- {
- //=========================
- // test results for quality
- //=========================
- CGU_UINT8 index_best[16] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
- err_requant = cmp_requantized_index(index_best,
- epo_code_best,
- index_bits,
- max_bits,
- image_src,
- numEntries,
- channels3or4);
- if (err_requant < err_best)
- {
- //better = 1;
- for (k = 0; k < numEntries; k++)
- index_io[k] = index_tmp[k] = index_best[k];
- cmp_pack4bitindex32(index_packed_out, index_io);
- epo_code_out[0] = epo_code_best[0];
- epo_code_out[1] = epo_code_best[1];
- err_best = err_requant;
- }
- }
- // Early out if we have our target err
- if (err_best <= errorThreshold)
- break;
- MaxIndex = index_collapse2(index_tmp, numEntries);
- if (MaxIndex == 0)
- break;
- }
- return err_best;
- }
- CGU_UINT8 cmp_Write8Bit2(CMP_INOUT CGU_UINT8 base[16], CMP_IN CGU_INT offset, CMP_IN CGU_INT bits, CMP_IN CGU_UINT8 bitVal)
- {
- base[offset / 8] |= bitVal << (offset % 8);
- if (offset % 8 + bits > 8)
- {
- base[offset / 8 + 1] |= shift_right_uint82(bitVal, 8 - offset % 8);
- }
- return (offset += bits);
- }
- INLINE CGU_UINT8 shift_right_uint8V2(CMP_IN CGU_UINT8 v, CMP_IN CGU_UINT8 bits)
- {
- return v >> bits; // (perf warning expected)
- }
- void cmp_Write8BitV2(CMP_INOUT CGU_UINT8 base[16], CMP_IN CGU_INT offset, CMP_IN CGU_INT bits, CMP_IN CGU_UINT8 bitVal)
- {
- base[offset / 8] |= bitVal << (offset % 8);
- if (offset % 8 + bits > 8)
- {
- base[offset / 8 + 1] |= shift_right_uint8V2(bitVal, 8 - offset % 8);
- }
- }
- void cmp_Encode_mode01237(CMP_IN CGU_INT blockMode,
- CMP_IN CGU_UINT8 bestPartition,
- CMP_IN CGU_UINT32 packedEndpoints[6],
- CMP_IN CGU_UINT8 index16[16],
- CMP_INOUT CGU_UINT8 cmp_out[16])
- {
- CGU_UINT8 blockindex[SOURCE_BLOCK_SIZE];
- CGU_UINT32 indexBitsV = g_modesettings[blockMode].indexBits;
- CGU_UINT32 k;
- CGU_UINT32 ch;
- for (k = 0; k < COMPRESSED_BLOCK_SIZE; k++)
- cmp_out[k] = 0;
- // mode 0 = 1, mode 1 = 01, mode 2 = 001, mode 3 = 0001, ...
- CGU_INT bitPosition = blockMode;
- bitPosition = cmp_Write8Bit2(cmp_out, bitPosition, 1, 1);
- // Write partition bits
- bitPosition = cmp_Write8Bit2(cmp_out, bitPosition, g_modesettings[blockMode].partitionBits, bestPartition);
- // Sort out the index set and tag whether we need to flip the
- // endpoints to get the correct state in the implicit index bits
- // The implicitly encoded MSB of the fixup index must be 0
- CGU_UINT32 fixup[3] = {0, 0, 0};
- cmp_get_fixuptable(fixup, (g_modesettings[blockMode].maxSubSets == 2 ? bestPartition : bestPartition + 64));
- // Extract indices and mark subsets that need to have their colours flipped to get the
- // right state for the implicit MSB of the fixup index
- CGU_INT flipColours[3] = {0, 0, 0};
- for (k = 0; k < SOURCE_BLOCK_SIZE; k++)
- {
- blockindex[k] = index16[k];
- for (CGU_UINT8 j = 0; j < g_modesettings[blockMode].maxSubSets; j++)
- {
- if (k == fixup[j])
- {
- if (blockindex[k] & (1 << (indexBitsV - 1)))
- {
- flipColours[j] = 1;
- }
- }
- }
- }
- // Now we must flip the endpoints where necessary so that the implicitly encoded
- // index bits have the correct state
- for (k = 0; k < g_modesettings[blockMode].maxSubSets; k++)
- {
- if (flipColours[k] == 1)
- {
- CGU_UINT32 temp = packedEndpoints[k * 2 + 0];
- packedEndpoints[k * 2 + 0] = packedEndpoints[k * 2 + 1];
- packedEndpoints[k * 2 + 1] = temp;
- }
- }
- // ...next flip the indices where necessary
- for (k = 0; k < SOURCE_BLOCK_SIZE; k++)
- {
- CGU_UINT8 partsub = cmp_get_partition_subset2(bestPartition, g_modesettings[blockMode].maxSubSets, k);
- if (flipColours[partsub] == 1)
- {
- blockindex[k] = ((1 << indexBitsV) - 1) - blockindex[k];
- }
- }
- // Endpoints are stored in the following order RRRR GGGG BBBB (AAAA) (PPPP)
- // i.e. components are packed together
- CGU_Vec4ui unpackedColours[MAX_SUBSETS * 2];
- CGU_UINT8 parityBits[MAX_SUBSETS][2];
- // Init
- for (k = 0; k < MAX_SUBSETS * 2; k++)
- unpackedColours[k] = 0;
- // Unpack the colour values for the subsets
- for (k = 0; k < g_modesettings[blockMode].maxSubSets; k++)
- {
- CGU_UINT32 packedColours[2] = {packedEndpoints[k * 2 + 0], packedEndpoints[k * 2 + 1]};
- if (blockMode == 0 || blockMode == 3 || blockMode == 7)
- { // TWO_PBIT
- parityBits[k][0] = packedColours[0] & 1;
- parityBits[k][1] = packedColours[1] & 1;
- packedColours[0] >>= 1;
- packedColours[1] >>= 1;
- }
- else if (blockMode == 1)
- { // ONE_PBIT
- parityBits[k][0] = packedColours[1] & 1;
- parityBits[k][1] = packedColours[1] & 1;
- packedColours[0] >>= 1;
- packedColours[1] >>= 1;
- }
- else if (blockMode == 2)
- {
- parityBits[k][0] = 0;
- parityBits[k][1] = 0;
- }
- for (ch = 0; ch < g_modesettings[blockMode].channels3or4; ch++)
- {
- unpackedColours[k * 2][ch] = packedColours[0] & ((1 << g_modesettings[blockMode].componentBits) - 1);
- unpackedColours[k * 2 + 1][ch] = packedColours[1] & ((1 << g_modesettings[blockMode].componentBits) - 1);
- packedColours[0] >>= g_modesettings[blockMode].componentBits;
- packedColours[1] >>= g_modesettings[blockMode].componentBits;
- }
- }
- // Loop over component
- for (ch = 0; ch < g_modesettings[blockMode].channels3or4; ch++)
- {
- // loop over subsets
- for (k = 0; k < g_modesettings[blockMode].maxSubSets; k++)
- {
- bitPosition = cmp_Write8Bit2(cmp_out, bitPosition, g_modesettings[blockMode].componentBits, unpackedColours[k * 2][ch] & 0xFF);
- bitPosition = cmp_Write8Bit2(cmp_out, bitPosition, g_modesettings[blockMode].componentBits, unpackedColours[k * 2 + 1][ch] & 0xFF);
- }
- }
- // write parity bits
- if (blockMode != 2)
- {
- for (k = 0; k < g_modesettings[blockMode].maxSubSets; k++)
- {
- if (blockMode == 1)
- { // ONE_PBIT
- bitPosition = cmp_Write8Bit2(cmp_out, bitPosition, 1, parityBits[k][0] & 0x01);
- }
- else
- { // TWO_PBIT
- bitPosition = cmp_Write8Bit2(cmp_out, bitPosition, 1, parityBits[k][0] & 0x01);
- bitPosition = cmp_Write8Bit2(cmp_out, bitPosition, 1, parityBits[k][1] & 0x01);
- }
- }
- }
- // Encode the index bits
- CGU_INT bitPositionV = bitPosition;
- for (k = 0; k < 16; k++)
- {
- CGU_UINT8 partsub = cmp_get_partition_subset2(bestPartition, g_modesettings[blockMode].maxSubSets, k);
- // If this is a fixup index then drop the MSB which is implicitly 0
- if (k == fixup[partsub])
- {
- cmp_Write8BitV2(cmp_out, bitPositionV, g_modesettings[blockMode].indexBits - 1, blockindex[k] & 0x07F);
- bitPositionV += g_modesettings[blockMode].indexBits - 1;
- }
- else
- {
- cmp_Write8BitV2(cmp_out, bitPositionV, g_modesettings[blockMode].indexBits, blockindex[k]);
- bitPositionV += g_modesettings[blockMode].indexBits;
- }
- }
- }
- CGV_FLOAT cmp_process_mode(CMP_INOUT CGU_UINT32 best_cmp_out[5], CMP_IN CGU_Vec4ui image_src[16], CMP_IN CGU_INT block_mode)
- {
- #ifdef USE_OLDCODE
- CGV_FLOAT best_err = 1e30f;
- CGU_Vec4ui epo_code[6];
- CGU_Vec4ui bestEndpoints[6];
- CGU_UINT8 bestindex[3][16];
- CGU_INT bestEntryCount[3];
- CGU_UINT8 bestindex16[16];
- CGU_UINT32 packedEndpoints[6] = {0, 0, 0, 0, 0, 0};
- CGU_UINT32 k;
- CGU_UINT32 ch;
- CGU_UINT32 subset;
- // Check for a solid color for a fast encode
- CGV_Vec4ui mean_out = 0.0f;
- for (k = 0; k < 16; k++)
- {
- mean_out = mean_out + image_src[k];
- bestindex16[k] = 0;
- }
- mean_out = mean_out / 16;
- // Image has alpha
- if (mean_out.w < 255)
- {
- }
- CGU_UINT8 storedBestindex[64][3][16];
- CGV_FLOAT storedError[64];
- CGU_UINT8 sortedPartition[64];
- CGV_FLOAT quality = 1.0f;
- CGV_FLOAT opaque_err = 0.0f;
- CGV_Vec4ui image_subsets[3][16];
- CGU_INT subset_entryCount[MAX_SUBSETS] = {0, 0, 0};
- CGU_UINT8 bestPartition = 0;
- for (CGU_UINT8 mode_blockPartition = 0; mode_blockPartition < 64; mode_blockPartition++)
- {
- cmp_GetPartitionSubSet2_mode01237(
- image_subsets, subset_entryCount, mode_blockPartition, image_src, block_mode, g_modesettings[block_mode].channels3or4);
- CGV_Vec4ui subset_image_src[16];
- CGU_UINT8 index_out1[16];
- CGV_FLOAT err_quant = 0.0F;
- // Store the quntize error for this partition to be sorted and processed later
- for (subset = 0; subset < g_modesettings[block_mode].maxSubSets; subset++)
- {
- CGU_INT numEntries = subset_entryCount[subset];
- for (CGU_UINT8 ii = 0; ii < 16; ii++)
- subset_image_src[ii] = image_subsets[subset][ii];
- err_quant += cmp_GetQuantizeIndex_old(
- index_out1, subset_image_src, numEntries, g_modesettings[block_mode].clusters, g_modesettings[block_mode].channels3or4);
- for (CGU_UINT8 idx = 0; idx < numEntries; idx++)
- storedBestindex[mode_blockPartition][subset][idx] = index_out1[idx];
- }
- storedError[mode_blockPartition] = err_quant;
- }
- // Sort the results
- cmp_sortPartitionProjection(storedError, sortedPartition, 64); // 64 partitions
- CGU_UINT8 numShakeAttempts = cmp_max8(1, cmp_min8((CGU_UINT8)floor(8 * quality + 0.5), 64)); // 64 partitions
- CGV_FLOAT err_best = CMP_FLOAT_MAX;
- // Now do the endpoint shaking
- for (CGU_UINT8 nSA = 0; nSA < numShakeAttempts; nSA++)
- {
- CGV_FLOAT err_optimized = 0.0F;
- CGU_UINT8 sortedBlockPartition;
- sortedBlockPartition = sortedPartition[nSA];
- //********************************************
- // Get the partition shape for the given mode
- //********************************************
- cmp_GetPartitionSubSet2_mode01237(
- image_subsets, subset_entryCount, sortedBlockPartition, image_src, block_mode, g_modesettings[block_mode].channels3or4);
- //*****************************
- // Process the partition shape
- //*****************************
- for (subset = 0; subset < g_modesettings[block_mode].maxSubSets; subset++)
- {
- CGU_INT numEntries = subset_entryCount[subset];
- CGU_UINT32 index_io[16];
- CGV_Vec4ui src_image_block[16];
- CGU_Vec4ui tmp_epo_code[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- for (k = 0; k < 16; k++)
- src_image_block[k] = image_subsets[subset][k];
- for (k = 0; k < 16; k++)
- index_io[k] = storedBestindex[sortedBlockPartition][subset][k];
- CGU_UINT32 index_packed_out[2] = {0, 0};
- err_optimized += cmp_optimize_IndexAndEndPoints(tmp_epo_code,
- index_io,
- index_packed_out,
- src_image_block,
- numEntries,
- g_modesettings[block_mode].clusters,
- g_modesettings[block_mode].bits,
- g_modesettings[block_mode].channels3or4,
- 0.01f,
- 1);
- for (k = 0; k < 16; k++)
- storedBestindex[sortedBlockPartition][subset][k] = index_io[k];
- epo_code[subset * 2] = tmp_epo_code[0];
- epo_code[subset * 2 + 1] = tmp_epo_code[1];
- shared_temp[subset * 2].endPoint_low = tmp_epo_code[0];
- shared_temp[subset * 2 + 1].endPoint_high = tmp_epo_code[1];
- }
- //****************************************
- // Check if result is better than the last
- //****************************************
- if (err_optimized < err_best)
- {
- bestPartition = sortedBlockPartition;
- CGU_INT bestIndexCount = 0;
- for (subset = 0; subset < g_modesettings[block_mode].maxSubSets; subset++)
- {
- CGU_UINT32 numEntries = subset_entryCount[subset];
- bestEntryCount[subset] = numEntries;
- if (numEntries)
- {
- bestEndpoints[subset * 2] = epo_code[subset * 2];
- bestEndpoints[subset * 2 + 1] = epo_code[subset * 2 + 1];
- shared_temp[subset * 2].endPoint_low = bestEndpoints[subset * 2];
- shared_temp[subset * 2 + 1].endPoint_high = bestEndpoints[subset * 2 + 1];
- for (k = 0; k < numEntries; k++)
- {
- bestindex[subset][k] = storedBestindex[sortedBlockPartition][subset][k];
- bestindex16[bestIndexCount++] = storedBestindex[sortedBlockPartition][subset][k];
- shared_temp[k].colorindex = storedBestindex[sortedBlockPartition][subset][k];
- }
- }
- }
- err_best = err_optimized;
- // Early out if we found we can compress with error below the quality threshold
- if (err_best <= 0.01f) // Thresh hold err
- {
- break;
- }
- }
- }
- if (block_mode != 7)
- err_best += opaque_err;
- if (err_best > best_err)
- return best_err;
- //**************************
- // Save the encoded block
- //**************************
- best_err = err_best;
- // Now we have all the data needed to encode the block
- // We need to pack the endpoints prior to encoding
- for (subset = 0; subset < g_modesettings[block_mode].maxSubSets; subset++)
- {
- packedEndpoints[subset * 2] = 0;
- packedEndpoints[subset * 2 + 1] = 0;
- if (bestEntryCount[subset])
- {
- CGU_UINT32 rightAlignment = 0;
- // Sort out parity bits
- if (block_mode != 2)
- {
- // Sort out BCC parity bits
- packedEndpoints[subset * 2] = bestEndpoints[subset * 2][0] & 1;
- packedEndpoints[subset * 2 + 1] = bestEndpoints[subset * 2 + 1][0] & 1;
- for (ch = 0; ch < g_modesettings[block_mode].channels3or4; ch++)
- {
- bestEndpoints[subset * 2][ch] >>= 1;
- bestEndpoints[subset * 2 + 1][ch] >>= 1;
- }
- rightAlignment++;
- }
- // Fixup endpoints
- for (ch = 0; ch < g_modesettings[block_mode].channels3or4; ch++)
- {
- packedEndpoints[subset * 2] |= bestEndpoints[subset * 2][ch] << rightAlignment;
- packedEndpoints[subset * 2 + 1] |= bestEndpoints[subset * 2 + 1][ch] << rightAlignment;
- rightAlignment += g_modesettings[block_mode].componentBits;
- }
- }
- }
- CGU_UINT8 idxCount[3] = {0, 0, 0};
- for (k = 0; k < SOURCE_BLOCK_SIZE; k++)
- {
- CGU_UINT8 partsub = cmp_get_partition_subset2(bestPartition, g_modesettings[block_mode].maxSubSets, k);
- CGU_UINT8 idxC = idxCount[partsub];
- bestindex16[k] = bestindex[partsub][idxC];
- idxCount[partsub] = idxC + 1;
- shared_temp[k].colorindex = bestindex16[k];
- }
- CGU_UINT8 cmp_out[COMPRESSED_BLOCK_SIZE];
- cmp_Encode_mode01237(block_mode, bestPartition, packedEndpoints, bestindex16, cmp_out);
-
- best_cmp_out[0] = (CGU_UINT32)cmp_out[0] + (CGU_UINT32)(cmp_out[1] << 8) + (CGU_UINT32)(cmp_out[2] << 16) + (CGU_UINT32)(cmp_out[3] << 24);
- best_cmp_out[1] = (CGU_UINT32)cmp_out[4] + (CGU_UINT32)(cmp_out[5] << 8) + (CGU_UINT32)(cmp_out[6] << 16) + (CGU_UINT32)(cmp_out[7] << 24);
- best_cmp_out[2] = (CGU_UINT32)cmp_out[8] + (CGU_UINT32)(cmp_out[9] << 8) + (CGU_UINT32)(cmp_out[10] << 16) + (CGU_UINT32)(cmp_out[11] << 24);
- best_cmp_out[3] = (CGU_UINT32)cmp_out[12] + (CGU_UINT32)(cmp_out[13] << 8) + (CGU_UINT32)(cmp_out[14] << 16) + (CGU_UINT32)(cmp_out[15] << 24);
- //CGU_Vec4ui block = {0, 0, 0, 0};
- //block_package1(block, bestPartition, 0);
- //best_cmp_out[0] = block[0];
- //best_cmp_out[1] = block[1];
- //best_cmp_out[2] = block[2];
- //best_cmp_out[3] = block[3];
- //
- //printSharedTemp();
- return best_err;
- #else
- CGU_UINT8 bestPartition = 0;
- // Find the best partion
- CGU_UINT32 pbit = 0;
- CGU_UINT32 error;
- CGU_UINT32 bestErr = MAX_UINT;
- CGU_UINT32 bestpbit = 0;
- for (CGU_UINT8 mode_blockPartition = 0; mode_blockPartition < 64; mode_blockPartition++)
- {
- error = cmp_GetPartitionError(pbit, mode_blockPartition, image_src);
- if (error < bestErr)
- {
- bestErr = error;
- bestpbit = pbit;
- bestPartition = mode_blockPartition;
- }
- }
- // Get the index for the partition
- for (CGU_INT threadInBlock = 15; threadInBlock >= 0; threadInBlock--)
- {
- ProcessBlock(1, bestPartition, 0, bestpbit, 0, threadInBlock, threadInBlock, 0);
- }
- // print results for debug
- printSharedTemp();
- //=======================
- // Encode final block
- //========================
- {
- // CGU_Vec4ui blockGreen = {0xffe00040, 0xfffe0007, 0x00000001, 0x00000000};
- // CGU_Vec4ui blockBlue = {0x00000040, 0xfffffff8, 0x00000001, 0x00000000};
- // CGU_Vec4ui block00 = {0xf0617fc0, 0xfffe0c3f, 0xff00fe11, 0xff01ef00};
- CGU_Vec4ui blockRed = {0x001fffc0, 0xfffe0000, 0x00000001, 0x00000000};
- CGU_Vec4ui block = {0, 0, 0, 0};
- CGU_UINT32 input_mode = 1;
- switch (input_mode)
- {
- case 1:
- block_package1(block, bestPartition, 0);
- break;
- case 3:
- block_package3(block, bestPartition, 0);
- break;
- case 7:
- block_package7(block, bestPartition, 0);
- break;
- default: // error unsupported mode used!
- block = blockRed;
- break;
- }
- best_cmp_out[0] = block[0];
- best_cmp_out[1] = block[1];
- best_cmp_out[2] = block[2];
- best_cmp_out[3] = block[3];
- }
- return 0.0f;
- #endif
- }
- #endif // Not ASPM_HLSL
- //======================================= MODES 45 =============================================
- #ifndef ASPM_HLSL
- #if defined(ENABLE_CMP_MODE4) || defined(ENABLE_CMP_MODE5)
- // Compression Results
- struct cmp_mode_parameters2
- {
- CGV_INT color_qendpoint[8];
- CGV_INT alpha_qendpoint[8];
- CGV_UINT8 color_index[16];
- CGV_UINT8 alpha_index[16];
- CGV_UINT32 idxMode;
- CGV_UINT32 rotated_channel;
- };
- CMP_STATIC CMP_CONSTANT CGU_UINT8 componentRotations2[4][4] = {
- { COMP_ALPHA, COMP_RED, COMP_GREEN, COMP_BLUE },
- { COMP_RED, COMP_ALPHA, COMP_GREEN, COMP_BLUE },
- { COMP_GREEN, COMP_RED, COMP_ALPHA, COMP_BLUE },
- { COMP_BLUE, COMP_RED, COMP_GREEN, COMP_ALPHA }
- };
- INLINE CGV_UINT8 old_shift_right_uint(CGV_UINT8 v, CGU_UINT8 bits)
- {
- return v >> bits; // (perf warning expected)
- }
- void old_Write8Bit(CGV_UINT8 base[], CGU_INT* uniform offset, CGU_INT bits, CGV_UINT8 bitVal)
- {
- base[*offset / 8] |= bitVal << (*offset % 8);
- if (*offset % 8 + bits > 8)
- {
- base[*offset / 8 + 1] |= old_shift_right_uint(bitVal, 8 - *offset % 8);
- }
- *offset += bits;
- }
- INLINE void old_swap_index(CGV_UINT8 u[], CGV_UINT8 v[], CGU_INT n)
- {
- for (CGU_INT i = 0; i < n; i++)
- {
- CGV_UINT8 t = u[i];
- u[i] = v[i];
- v[i] = t;
- }
- }
- INLINE void old_swap_epo(CGV_INT u[], CGV_INT v[], CGV_INT n)
- {
- for (CGU_INT i = 0; i < n; i++)
- {
- CGV_INT t = u[i];
- u[i] = v[i];
- v[i] = t;
- }
- }
- INLINE void old_encode_swap(CGV_INT endpoint[], CGU_INT channels, CGV_UINT8 block_index[MAX_SUBSET_SIZE], CGU_INT bits)
- {
- CGU_INT levels = 1 << bits;
- if (block_index[0] >= levels / 2)
- {
- old_swap_epo(&endpoint[0], &endpoint[channels], channels);
- for (CGU_INT k = 0; k < SOURCE_BLOCK_SIZE; k++)
- #ifdef ASPM_GPU
- block_index[k] = (levels - 1) - block_index[k];
- #else
- block_index[k] = CGV_UINT8(levels - 1) - block_index[k];
- #endif
- }
- }
- void old_encode_index(CGV_UINT8 data[16], CGU_INT* uniform pPos, CGV_UINT8 block_index[MAX_SUBSET_SIZE], CGU_INT bits)
- {
- old_Write8Bit(data, pPos, bits - 1, block_index[0]);
- for (CGU_INT j = 1; j < SOURCE_BLOCK_SIZE; j++)
- {
- CGV_UINT8 qbits = block_index[j] & 0xFF;
- old_Write8Bit(data, pPos, bits, qbits);
- }
- }
- void cmp_Encode_mode4(CMP_INOUT CGV_UINT8 cmp_out[COMPRESSED_BLOCK_SIZE], cmp_mode_parameters2 params)
- {
- CGU_INT bitPosition = 4; // Position the pointer at the LSB
- for (CGU_INT k = 0; k < COMPRESSED_BLOCK_SIZE; k++)
- cmp_out[k] = 0;
- // mode 4 (5 bits) 00001
- old_Write8Bit(cmp_out, &bitPosition, 1, 1);
- // rotation 2 bits
- old_Write8Bit(cmp_out, &bitPosition, 2, CMP_STATIC_CAST(CGV_UINT8, params.rotated_channel));
- // idxMode 1 bit
- old_Write8Bit(cmp_out, &bitPosition, 1, CMP_STATIC_CAST(CGV_UINT8, params.idxMode));
- CGU_INT idxBits[2] = {2, 3};
- if (params.idxMode)
- {
- idxBits[0] = 3;
- idxBits[1] = 2;
- // Indicate if we need to fixup the index
- old_swap_index(params.color_index, params.alpha_index, 16);
- old_encode_swap(params.alpha_qendpoint, 4, params.color_index, 2);
- old_encode_swap(params.color_qendpoint, 4, params.alpha_index, 3);
- }
- else
- {
- old_encode_swap(params.color_qendpoint, 4, params.color_index, 2);
- old_encode_swap(params.alpha_qendpoint, 4, params.alpha_index, 3);
- }
- // color endpoints 5 bits each
- // R0 : R1
- // G0 : G1
- // B0 : B1
- for (CGU_INT component = 0; component < 3; component++)
- {
- old_Write8Bit(cmp_out, &bitPosition, 5, CMP_STATIC_CAST(CGV_UINT8, params.color_qendpoint[component]));
- old_Write8Bit(cmp_out, &bitPosition, 5, CMP_STATIC_CAST(CGV_UINT8, params.color_qendpoint[4 + component]));
- }
- // alpha endpoints (6 bits each)
- // A0 : A1
- old_Write8Bit(cmp_out, &bitPosition, 6, CMP_STATIC_CAST(CGV_UINT8, params.alpha_qendpoint[0]));
- old_Write8Bit(cmp_out, &bitPosition, 6, CMP_STATIC_CAST(CGV_UINT8, params.alpha_qendpoint[4]));
- // index 2 bits each (31 bits total)
- old_encode_index(cmp_out, &bitPosition, params.color_index, 2);
- // index 3 bits each (47 bits total)
- old_encode_index(cmp_out, &bitPosition, params.alpha_index, 3);
- }
- void cmp_Encode_mode5(CMP_INOUT CGV_UINT8 cmp_out[COMPRESSED_BLOCK_SIZE], cmp_mode_parameters2 params)
- {
- for (CGU_INT k = 0; k < COMPRESSED_BLOCK_SIZE; k++)
- cmp_out[k] = 0;
- // mode 5 bits = 000001
- CGU_INT bitPosition = 5; // Position the pointer at the LSB
- old_Write8Bit(cmp_out, &bitPosition, 1, 1);
- // Write 2 bit rotation
- old_Write8Bit(cmp_out, &bitPosition, 2, CMP_STATIC_CAST(CGV_UINT8, params.rotated_channel));
- old_encode_swap(params.color_qendpoint, 4, params.color_index, 2);
- old_encode_swap(params.alpha_qendpoint, 4, params.alpha_index, 2);
- // color endpoints (7 bits each)
- // R0 : R1
- // G0 : G1
- // B0 : B1
- for (CGU_INT component = 0; component < 3; component++)
- {
- old_Write8Bit(cmp_out, &bitPosition, 7, CMP_STATIC_CAST(CGV_UINT8, params.color_qendpoint[component]));
- old_Write8Bit(cmp_out, &bitPosition, 7, CMP_STATIC_CAST(CGV_UINT8, params.color_qendpoint[4 + component]));
- }
- // alpha endpoints (8 bits each)
- // A0 : A1
- old_Write8Bit(cmp_out, &bitPosition, 8, CMP_STATIC_CAST(CGV_UINT8, params.alpha_qendpoint[0]));
- old_Write8Bit(cmp_out, &bitPosition, 8, CMP_STATIC_CAST(CGV_UINT8, params.alpha_qendpoint[4]));
- // color index 2 bits each (31 bits total)
- // alpha index 2 bits each (31 bits total)
- old_encode_index(cmp_out, &bitPosition, params.color_index, 2);
- old_encode_index(cmp_out, &bitPosition, params.alpha_index, 2);
- }
- void Compress_mode45(CMP_INOUT CGU_UINT32 cmp_out[4], CGU_INT blockMode, CGU_Vec4ui image_src[SOURCE_BLOCK_SIZE])
- {
- cmp_mode_parameters2 best_candidate;
- CGU_UINT32 channels3or4 = 4;
- CGU_UINT8 numClusters0[2];
- CGU_UINT8 numClusters1[2];
- CGU_INT modeBits[2];
- CGU_INT max_idxMode;
- if (blockMode == 4)
- {
- max_idxMode = 2;
- modeBits[0] = 30; // bits = 2 * (Red 5+ Grn 5+ blu 5)
- modeBits[1] = 36; // bits = 2 * (Alpha 6+6+6)
- numClusters0[0] = 4;
- numClusters0[1] = 8;
- numClusters1[0] = 8;
- numClusters1[1] = 4;
- }
- else
- {
- max_idxMode = 1;
- modeBits[0] = 42; // bits = 2 * (Red 7+ Grn 7+ blu 7)
- modeBits[1] = 48; // bits = 2 * (Alpha 8+8+8) = 48
- numClusters0[0] = 4;
- numClusters0[1] = 4;
- numClusters1[0] = 4;
- numClusters1[1] = 4;
- }
- CGU_Vec4ui src_color_Block[SOURCE_BLOCK_SIZE];
- CGU_Vec4ui src_alpha_Block[SOURCE_BLOCK_SIZE];
- CGV_FLOAT best_err = CMP_FLOAT_MAX;
- // Go through each possible rotation and selection of index rotationBits)
- for (CGU_UINT8 rotated_channel = 0; rotated_channel < channels3or4; rotated_channel++)
- {
- // A
- for (CGU_INT k = 0; k < SOURCE_BLOCK_SIZE; k++)
- {
- for (CGU_INT p = 0; p < 3; p++)
- {
- src_color_Block[k][p] = image_src[k][componentRotations2[rotated_channel][p+1]];
- src_alpha_Block[k][p] = image_src[k][componentRotations2[rotated_channel][0]];
- }
- src_color_Block[k][3] = image_src[k][3];
- src_alpha_Block[k][3] = image_src[k][componentRotations2[3][3]];
- }
- CGV_FLOAT err_quantizer;
- CGV_FLOAT err_bestQuantizer = CMP_FLOAT_MAX;
- for (CGU_INT idxMode = 0; idxMode < max_idxMode; idxMode++)
- {
- err_quantizer = cmp_GetQuantizeIndex_old(best_candidate.color_index, src_color_Block, SOURCE_BLOCK_SIZE, numClusters0[idxMode], 3);
- err_quantizer += cmp_GetQuantizeIndex_old(best_candidate.alpha_index, src_alpha_Block, SOURCE_BLOCK_SIZE, numClusters1[idxMode], 3) / 3.0F;
- // If quality is high then run the full shaking for this config and
- // store the result if it beats the best overall error
- // Otherwise only run the shaking if the error is better than the best
- // quantizer error
- if (err_quantizer <= err_bestQuantizer)
- {
- err_bestQuantizer = err_quantizer;
- // Shake size gives the size of the shake cube
- CGV_FLOAT err_overallError;
- CGU_Vec4ui color_qendpoint2[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- CGV_Vec4ui src_image_block[16];
- CGU_Vec4ui alpha_qendpoint2[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- CGU_UINT32 alpha_index[16];
- CGU_UINT32 color_index[16];
- for (int k = 0; k < 16; k++) {
- alpha_index[k] = best_candidate.alpha_index[k];
- color_index[k] = best_candidate.color_index[k];
- }
- CGU_UINT32 color_index_packed_out[2] = {0, 0};
- CGU_UINT32 alpha_index_packed_out[2] = {0, 0};
- err_overallError = cmp_optimize_IndexAndEndPoints(color_qendpoint2,
- color_index,
- color_index_packed_out,
- src_color_Block,
- 16,
- numClusters0[idxMode],
- modeBits[0],
- 3,
- 0.01f,
- blockMode);
- // Alpha scalar block
- err_overallError += cmp_optimize_IndexAndEndPoints(alpha_qendpoint2,
- alpha_index,
- alpha_index_packed_out,
- src_alpha_Block,
- 16,
- numClusters1[idxMode],
- modeBits[1],
- 3,
- 0.01f,
- blockMode) / 3;
- // If we beat the previous best then encode the block
- if (err_overallError < best_err)
- {
- best_err = err_overallError;
- best_candidate.idxMode = idxMode;
- best_candidate.rotated_channel = rotated_channel;
- best_candidate.alpha_qendpoint[0] = alpha_qendpoint2[0].x;
- best_candidate.alpha_qendpoint[1] = alpha_qendpoint2[0].y;
- best_candidate.alpha_qendpoint[2] = alpha_qendpoint2[0].z;
- best_candidate.alpha_qendpoint[3] = alpha_qendpoint2[0].w;
- best_candidate.alpha_qendpoint[4] = alpha_qendpoint2[1].x;
- best_candidate.alpha_qendpoint[5] = alpha_qendpoint2[1].y;
- best_candidate.alpha_qendpoint[6] = alpha_qendpoint2[1].z;
- best_candidate.alpha_qendpoint[7] = alpha_qendpoint2[1].w;
- best_candidate.color_qendpoint[0] = color_qendpoint2[0].x;
- best_candidate.color_qendpoint[1] = color_qendpoint2[0].y;
- best_candidate.color_qendpoint[2] = color_qendpoint2[0].z;
- best_candidate.color_qendpoint[3] = color_qendpoint2[0].w;
- best_candidate.color_qendpoint[4] = color_qendpoint2[1].x;
- best_candidate.color_qendpoint[5] = color_qendpoint2[1].y;
- best_candidate.color_qendpoint[6] = color_qendpoint2[1].z;
- best_candidate.color_qendpoint[7] = color_qendpoint2[1].w;
- for (int k = 0; k < 16; k++) {
- best_candidate.color_index[k] = color_index[k];
- best_candidate.alpha_index[k] = alpha_index[k];
- }
- CGV_UINT8 cmp_out16[COMPRESSED_BLOCK_SIZE];
- if (blockMode == 4)
- cmp_Encode_mode4(cmp_out16, best_candidate);
- else
- cmp_Encode_mode5(cmp_out16, best_candidate);
- cmp_out[0] = (CGU_UINT32)cmp_out16[0] + (CGU_UINT32)(cmp_out16[1] << 8) + (CGU_UINT32)(cmp_out16[2] << 16) + (CGU_UINT32)(cmp_out16[3] << 24);
- cmp_out[1] = (CGU_UINT32)cmp_out16[4] + (CGU_UINT32)(cmp_out16[5] << 8) + (CGU_UINT32)(cmp_out16[6] << 16) + (CGU_UINT32)(cmp_out16[7] << 24);
- cmp_out[2] = (CGU_UINT32)cmp_out16[8] + (CGU_UINT32)(cmp_out16[9] << 8) + (CGU_UINT32)(cmp_out16[10] << 16) + (CGU_UINT32)(cmp_out16[11] << 24);
- cmp_out[3] = (CGU_UINT32)cmp_out16[12] + (CGU_UINT32)(cmp_out16[13] << 8) + (CGU_UINT32)(cmp_out16[14] << 16) + (CGU_UINT32)(cmp_out16[15] << 24);
- }
- }
- } // B
- } // A
- }
- #endif
- #endif
- #ifdef ENABLE_CMP_REFINE_MODE6_API
- CGU_BOOL get_ideal_cluster2(CMP_INOUT CGV_Vec4f image_cluster[2],
- CMP_IN CGU_UINT32 index_cluster[16],
- CMP_IN CGU_INT Mi_,
- CMP_IN CGU_Vec4ui image_src[16],
- CMP_IN CGU_UINT32 numEntries,
- CMP_IN CGU_UINT32 channels3or4)
- {
- // get ideal cluster centers
- CGV_Vec4f image_cluster_mean[16];
- for (CGU_UINT32 ii = 0; ii < 16; ii++)
- {
- image_cluster_mean[ii] = 0.0f;
- }
- GetClusterMean2(image_cluster_mean, image_src, index_cluster, numEntries, channels3or4); // unrounded
- CGV_FLOAT image_matrix0[2] = {0, 0}; // matrix /inverse matrix
- CGV_FLOAT image_matrix1[2] = {0, 0}; // matrix /inverse matrix
- CGV_Vec4f image_rp[2]; // right part for RMS fit problem
- image_rp[0] = 0.0f;
- image_rp[1] = 0.0f;
- // weight with cnt if runnning on compacted index
- for (CGU_UINT32 k = 0; k < numEntries; k++)
- {
- image_matrix0[0] += (Mi_ - index_cluster[k]) * (Mi_ - index_cluster[k]);
- image_matrix0[1] += index_cluster[k] * (Mi_ - index_cluster[k]); // im is symmetric
- image_matrix1[1] += index_cluster[k] * index_cluster[k];
- image_rp[0] += image_cluster_mean[index_cluster[k]] * (CGU_FLOAT)(Mi_ - index_cluster[k]);
- image_rp[1] += image_cluster_mean[index_cluster[k]] * (CGU_FLOAT)index_cluster[k];
- }
- CGV_FLOAT matrix_dd = image_matrix0[0] * image_matrix1[1] - image_matrix0[1] * image_matrix0[1];
- // assert(matrix_dd !=0);
- // matrix_dd=0 means that index_cidx[k] and (Mi_-index_cidx[k]) collinear which implies only one active index;
- // taken care of separately
- if (matrix_dd == 0)
- {
- image_cluster[0] = 0.0f;
- image_cluster[1] = 0.0f;
- return FALSE;
- }
- image_matrix1[0] = image_matrix0[0];
- image_matrix0[0] = image_matrix1[1] / matrix_dd;
- image_matrix1[1] = image_matrix1[0] / matrix_dd;
- image_matrix1[0] = image_matrix0[1] = -image_matrix0[1] / matrix_dd;
- CGV_FLOAT Mif = (CGV_FLOAT)Mi_;
- // values can exceed 255 here, clamp made no diff in quality!
- image_cluster[0] = (((image_rp[0] * image_matrix0[0]) + (image_rp[1] * image_matrix0[1])) * Mif);
- image_cluster[1] = (((image_rp[0] * image_matrix1[0]) + (image_rp[1] * image_matrix1[1])) * Mif);
- return TRUE;
- }
- CGV_FLOAT shake2(CMP_INOUT CGU_Vec4ui epo_code_shake[2],
- CMP_IN CGV_Vec4f image_cluster[2],
- CMP_IN CGU_UINT32 index_cluster[16],
- CMP_IN CGU_Vec4ui image_src[16],
- CMP_IN CGU_UINT32 index_bits,
- CMP_IN CGU_UINT32 mtype,
- CMP_IN CGU_UINT32 max_bits[4],
- CMP_IN CGU_UINT32 use_par,
- CMP_IN CGU_UINT32 numEntries, // max 16
- CMP_IN CGU_UINT32 channels3or4)
- {
- CMP_UNUSED(mtype);
- CGV_FLOAT best_err = CMP_FLOAT_MAX;
- #define SHAKESIZE1 1
- #define SHAKESIZE2 2
- // shake single or - cartesian
- // shake odd/odd and even/even or - same parity
- // shake odd/odd odd/even , even/odd and even/even - bcc
- CGV_FLOAT err_ed[2][2][4];
- CGU_UINT32 epo_code_par[2][2][2][4];
- for (CGU_UINT32 ch = 0; ch < channels3or4; ch++)
- {
- CGU_UINT32 ppA = 0;
- CGU_UINT32 ppB = 0;
- CGU_UINT32 rr = (use_par ? 2 : 1);
- CGU_UINT32 epo_code_epi0[2]; // first/second, coord, begin rage end range
- CGU_UINT32 epo_code_epi1[2]; // first/second, coord, begin rage end range
- for (ppA = 0; ppA < rr; ppA++)
- { // loop max =2
- for (ppB = 0; ppB < rr; ppB++)
- { //loop max =2
- // set default ranges
- switch (ch)
- {
- case 0:
- epo_code_epi0[0] = epo_code_epi0[1] = cmp_ep_find_floor2(image_cluster[0].x, max_bits[0], use_par, ppA);
- epo_code_epi1[0] = epo_code_epi1[1] = cmp_ep_find_floor2(image_cluster[1].x, max_bits[0], use_par, ppB);
- break;
- case 1:
- epo_code_epi0[0] = epo_code_epi0[1] = cmp_ep_find_floor2(image_cluster[0].y, max_bits[1], use_par, ppA);
- epo_code_epi1[0] = epo_code_epi1[1] = cmp_ep_find_floor2(image_cluster[1].y, max_bits[1], use_par, ppB);
- break;
- case 2:
- epo_code_epi0[0] = epo_code_epi0[1] = cmp_ep_find_floor2(image_cluster[0].z, max_bits[2], use_par, ppA);
- epo_code_epi1[0] = epo_code_epi1[1] = cmp_ep_find_floor2(image_cluster[1].z, max_bits[2], use_par, ppB);
- break;
- case 3:
- if (channels3or4 == 4)
- {
- epo_code_epi0[0] = epo_code_epi0[1] = cmp_ep_find_floor2(image_cluster[0].w, max_bits[3], use_par, ppA);
- epo_code_epi1[0] = epo_code_epi1[1] = cmp_ep_find_floor2(image_cluster[1].w, max_bits[3], use_par, ppB);
- }
- break;
- }
- // set begin range
- epo_code_epi0[0] -= ((epo_code_epi0[0] < SHAKESIZE1 ? epo_code_epi0[0] : SHAKESIZE1)) & (~use_par);
- epo_code_epi1[0] -= ((epo_code_epi1[0] < SHAKESIZE1 ? epo_code_epi1[0] : SHAKESIZE1)) & (~use_par);
- // set end range
- epo_code_epi0[1] +=
- ((1 << max_bits[ch]) - 1 - epo_code_epi0[1] < SHAKESIZE2 ? (1 << max_bits[ch]) - 1 - epo_code_epi0[1] : SHAKESIZE2) & (~use_par);
- epo_code_epi1[1] +=
- ((1 << max_bits[ch]) - 1 - epo_code_epi1[1] < SHAKESIZE2 ? (1 << max_bits[ch]) - 1 - epo_code_epi1[1] : SHAKESIZE2) & (~use_par);
- CGU_UINT32 step = (1 << use_par);
- err_ed[ppA][ppB][ch] = CMP_FLOAT_MAX;
- for (CGU_UINT32 epo_p0 = epo_code_epi0[0]; epo_p0 <= epo_code_epi0[1]; epo_p0 += step)
- {
- for (CGU_UINT32 epo_p1 = epo_code_epi1[0]; epo_p1 <= epo_code_epi1[1]; epo_p1 += step)
- {
- CGV_FLOAT image_square_diff = 0.0F;
- CGV_FLOAT image_ramp;
- for (CGU_UINT32 _mc = 1; _mc < numEntries; _mc++)
- {
- image_ramp = GetRamp2(epo_p0, epo_p1, index_cluster[_mc], index_bits);
- switch (ch)
- {
- case 0:
- image_square_diff += cmp_squaref(image_ramp - image_src[_mc].x);
- break;
- case 1:
- image_square_diff += cmp_squaref(image_ramp - image_src[_mc].y);
- break;
- case 2:
- image_square_diff += cmp_squaref(image_ramp - image_src[_mc].z);
- break;
- case 3:
- if (channels3or4 == 4)
- image_square_diff += cmp_squaref(image_ramp - image_src[_mc].w);
- break;
- }
- }
- if (image_square_diff < err_ed[ppA][ppB][ch])
- {
- err_ed[ppA][ppB][ch] = image_square_diff;
- epo_code_par[ppA][ppB][0][ch] = epo_p0;
- epo_code_par[ppA][ppB][1][ch] = epo_p1;
- }
- }
- }
- } // pp1
- } // pp0
- } // j
- //---------------------------------------------------------
- // CMP_CONSTANT CGU_UINT8 npv_nd[2][8] = {
- // {1, 2, 4, 8, 16, 32, 0, 0}, // 3 channel
- // {1, 2, 4, 0, 0, 0, 0, 0} // 4 channel tyep index 0..7
- // };
- // for (CGU_INT pn = 0; pn < npv_nd[channels3or4 - 3][type]; pn++)
- CGU_UINT32 bits = 4; // for mode 6 its 4
- for (CGU_UINT32 pn = 0; pn < bits; pn++)
- {
- CGV_FLOAT err_2 = 0.0F;
- CGU_UINT32 d1 = 0;
- CGU_UINT32 d2 = 0;
- for (CGU_UINT32 ch = 0; ch < channels3or4; ch++)
- {
- d1 = par_vectors42_nd[pn][0][ch];
- d2 = par_vectors42_nd[pn][1][ch];
- err_2 += err_ed[d1][d2][ch];
- }
- if (err_2 < best_err)
- {
- best_err = err_2;
- d1 = par_vectors42_nd[pn][0][0];
- d2 = par_vectors42_nd[pn][1][0];
- epo_code_shake[0].x = epo_code_par[d1][d2][0][0];
- epo_code_shake[1].x = epo_code_par[d1][d2][1][0];
- d1 = par_vectors42_nd[pn][0][1];
- d2 = par_vectors42_nd[pn][1][1];
- epo_code_shake[0].y = epo_code_par[d1][d2][0][1];
- epo_code_shake[1].y = epo_code_par[d1][d2][1][1];
- d1 = par_vectors42_nd[pn][0][2];
- d2 = par_vectors42_nd[pn][1][2];
- epo_code_shake[0].z = epo_code_par[d1][d2][0][2];
- epo_code_shake[1].z = epo_code_par[d1][d2][1][2];
- if (channels3or4 == 4)
- {
- d1 = par_vectors42_nd[pn][0][3];
- d2 = par_vectors42_nd[pn][1][3];
- epo_code_shake[0].w = epo_code_par[d1][d2][0][3];
- epo_code_shake[1].w = epo_code_par[d1][d2][1][3];
- }
- }
- }
- return best_err;
- }
- CGV_FLOAT requantized_image_err2(CMP_INOUT CGU_UINT32 index_best[16],
- CMP_IN CGU_Vec4ui epo_code_best[2],
- CMP_IN CGU_UINT32 index_bits,
- CMP_IN CGU_UINT32 max_bits[4],
- CMP_IN CGU_Vec4ui image_src[16],
- CMP_IN CGU_UINT32 numEntries, // max 16
- CMP_IN CGU_UINT32 channels3or4)
- { // IN: 3 = RGB or 4 = RGBA (4 = MAX_CHANNELS)
- CMP_UNUSED(channels3or4);
- CMP_UNUSED(max_bits);
- //=========================================
- // requantized image based on new epo_code
- //=========================================
- CGV_Vec4f image_requantize[16];
- CGV_FLOAT err_requant = 0.0F;
- for (CGU_UINT32 k = 0; k < numEntries; k++)
- {
- image_requantize[k].x = GetRamp2(epo_code_best[0].x, epo_code_best[1].x, k, index_bits);
- image_requantize[k].y = GetRamp2(epo_code_best[0].y, epo_code_best[1].y, k, index_bits);
- image_requantize[k].z = GetRamp2(epo_code_best[0].z, epo_code_best[1].z, k, index_bits);
- image_requantize[k].w = GetRamp2(epo_code_best[0].w, epo_code_best[1].w, k, index_bits);
- }
- //=========================================
- // Calc the error for the requantized image
- //=========================================
- CGV_FLOAT err_cmin;
- CGU_UINT32 best_indx;
- CGV_FLOAT image_err;
- CGV_Vec4f imageDiff;
- for (CGU_UINT32 k1 = 0; k1 < numEntries; k1++)
- {
- // start with error as sum of 4 channels with Max pixel
- // value 256 squared plus 1 for err min check = (256 * 256 * 4) + 1;
- err_cmin = 262145.0f;
- best_indx = 0;
- for (CGU_UINT8 k2 = 0; k2 < numEntries; k2++)
- {
- image_err = 0.0F;
- imageDiff.x = image_requantize[k2].x - image_src[k1].x;
- imageDiff.y = image_requantize[k2].y - image_src[k1].y;
- imageDiff.z = image_requantize[k2].z - image_src[k1].z;
- imageDiff.w = image_requantize[k2].w - image_src[k1].w;
- image_err = cmp_dot4f(imageDiff, imageDiff);
- if (image_err < err_cmin)
- {
- err_cmin = image_err;
- best_indx = k2;
- }
- }
- index_best[k1] = best_indx;
- err_requant += err_cmin;
- }
- return err_requant;
- }
- CGV_FLOAT cmp_mode6_optimize_IndexAndEndPoints(CMP_INOUT CGU_Vec4ui epo_code_out[2], //
- CMP_INOUT CGU_UINT32 index_io[16], // Make sure input index is 0..15 range
- CMP_IN CGU_Vec4ui image_src[16],
- CMP_IN CGU_UINT32 numEntries, // max 16
- CMP_IN CGU_UINT32 Mi_, // last cluster , This should be no larger than 16
- CMP_IN CGU_UINT32 bits, // total for all components
- CMP_IN CGU_UINT32 channels3or4, // IN: 3 = RGB or 4 = RGBA (4 = MAX_CHANNELS)
- CMP_IN CGU_FLOAT errorThreshold)
- {
- CMP_UNUSED(bits);
- CGV_FLOAT err_best = CMP_FLOAT_MAX;
- CGU_UINT32 type = 2; // = bits % (2 * channels3or4) for Mode 6 with 58 bits and 4 channels type is 2
- CGU_UINT32 use_par = 1; // as type == 2 use par is 1 = (type != 0);
- CGU_UINT32 max_bits[4] = {8, 8, 8, 8}; // Mode 6 max bits is 8 = (bits + channels2 - 1) / channels2;
- CGU_UINT32 index_bits = 4; // channel bits !! = 4
- // CGU_INT iv;
- // iv = Mi_;
- // while (iv >>= 1)
- // index_bits++;
- Mi_ = Mi_ - 1;
- CGU_UINT32 index_tmp[16];
- CGU_UINT32 maxTry = MAX_TRY_SHAKER; // should be set by quality
- CGV_FLOAT err_requant = 0.0F;
- // Init best index to input index
- for (CGU_UINT32 k = 0; k < numEntries; k++)
- index_tmp[k] = index_io[k];
- CGU_UINT32 MaxIndex;
- MaxIndex = index_collapse2(index_tmp, numEntries);
- // we have a solid color 4x4 block no need for optimization!
- if (MaxIndex == 0)
- return 0.0f;
- for (CGU_UINT32 ii = 0; ii < maxTry; ii++)
- {
- //===============================
- // We have ramp colors to process
- //===============================
- CGV_FLOAT err_cluster = CMP_FLOAT_MAX;
- CGV_FLOAT err_shake;
- CGU_UINT32 index_cluster[16];
- CGU_Vec4ui epo_code_best[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- for (CGU_UINT32 ii2 = 0; ii2 < numEntries; ii2++)
- index_cluster[ii2] = 0;
- CGU_UINT32 mi = Mi_;
- for (CGU_UINT32 index_slope = 1; (index_slope * MaxIndex) <= mi; index_slope++)
- {
- CGV_Vec4f image_cluster[2] = {{0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f, 0.0f}};
- for (CGU_UINT32 index_offset = 0; index_offset <= (mi - index_slope * MaxIndex); index_offset++)
- {
- //-------------------------------------
- // set a new index data to try
- //-------------------------------------
- for (CGU_UINT32 k = 0; k < numEntries; k++)
- index_cluster[k] = index_tmp[k] * index_slope + index_offset;
- if (get_ideal_cluster2(image_cluster, index_cluster, Mi_, image_src, numEntries, channels3or4))
- {
- CGU_Vec4ui epo_code_shake[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- err_shake = shake2( epo_code_shake, // return new epo
- image_cluster,
- index_cluster,
- image_src,
- index_bits,
- type,
- max_bits,
- use_par,
- numEntries, // max 16
- channels3or4);
- if (err_shake < err_cluster)
- {
- err_cluster = err_shake;
- epo_code_best[0] = epo_code_shake[0];
- epo_code_best[1] = epo_code_shake[1];
- }
- }
- }
- }
- if ((err_cluster != CMP_FLOAT_MAX))
- {
- //=========================
- // test results for quality
- //=========================
- CGU_UINT32 index_best[16] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
- err_requant = requantized_image_err2( index_best, // new index results
- epo_code_best, // prior result input
- index_bits,
- max_bits,
- image_src,
- numEntries,
- channels3or4);
- if (err_requant < err_best)
- {
- //better = 1;
- for (CGU_UINT32 k = 0; k < numEntries; k++)
- index_io[k] = index_tmp[k] = index_best[k];
- //cmp_pack4bitindex(index_packed_out, index_io);
- epo_code_out[0] = epo_code_best[0];
- epo_code_out[1] = epo_code_best[1];
- err_best = err_requant;
- }
- }
- // Early out if we have our target err
- if (err_best <= errorThreshold)
- break;
- MaxIndex = index_collapse2(index_tmp, numEntries);
- if (MaxIndex == 0)
- break;
- }
- // Did not find anything better over Max trys
- return err_best;
- }
- #endif
- #endif // ENABLE_CMP_API : CPU & GPU Code block
- //=================================================================================
- // GPU API Interfaces
- // mode 4 5 6 all have 1 subset per block, and fix-up index is always index 0
- //=================================================================================
- CMP_NUMTHREADS(THREAD_GROUP_SIZE, 1, 1) void TryMode456CS(CGU_UINT32 GI CMP_SVGROUPINDEX, CGU_Vec3ui groupID CMP_SVGROUPID)
- {
- CMP_CONSTANT CGU_UINT32 MAX_USED_THREAD = 16;
- CGU_UINT32 BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- CGU_UINT32 blockInGroup = GI / MAX_USED_THREAD;
- CGU_UINT32 blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- CGU_UINT32 threadBase = blockInGroup * MAX_USED_THREAD;
- CGU_UINT32 threadInBlock = GI - threadBase;
- CGU_UINT32 block_y = blockID / g_num_block_x;
- CGU_UINT32 block_x = blockID - block_y * g_num_block_x;
- CGU_UINT32 base_x = block_x * BLOCK_SIZE_X;
- CGU_UINT32 base_y = block_y * BLOCK_SIZE_Y;
- #if (defined(ENABLE_MODE4) || defined(ENABLE_MODE5) || defined(ENABLE_MODE6)|| defined(ENABLE_CMP_MODE6))
- if (threadInBlock < 16)
- {
- CGU_Vec4f px = g_Input.Load(CGU_Vec3ui(base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0)) * 255.0f;
- px = clamp(px, 0.0f, 255.0f);
- shared_temp[GI].pixel.r = (CGU_UINT32)px.r;
- shared_temp[GI].pixel.g = (CGU_UINT32)px.g;
- shared_temp[GI].pixel.b = (CGU_UINT32)px.b;
- shared_temp[GI].pixel.a = (CGU_UINT32)px.a;
- shared_temp[GI].endPoint_low = shared_temp[GI].pixel;
- shared_temp[GI].endPoint_high = shared_temp[GI].pixel;
- }
- GroupSync();
- if (threadInBlock < 8)
- {
- shared_temp[GI].endPoint_low = cmp_min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low);
- shared_temp[GI].endPoint_high = cmp_max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high);
- }
- GroupSync();
- if (threadInBlock < 4)
- {
- shared_temp[GI].endPoint_low = cmp_min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low);
- shared_temp[GI].endPoint_high = cmp_max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high);
- }
- GroupSync();
- if (threadInBlock < 2)
- {
- shared_temp[GI].endPoint_low = cmp_min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low);
- shared_temp[GI].endPoint_high = cmp_max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high);
- }
- GroupSync();
- if (threadInBlock < 1)
- {
- shared_temp[GI].endPoint_low = cmp_min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low);
- shared_temp[GI].endPoint_high = cmp_max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high);
- }
- GroupSync();
- CGU_Vec4ui endPoint[2];
- endPoint[0] = shared_temp[threadBase].endPoint_low;
- endPoint[1] = shared_temp[threadBase].endPoint_high;
- CGU_UINT32 error = 0xFFFFFFFF;
- CGU_UINT32 mode = 0;
- CGU_UINT32 index_selector = 0;
- CGU_UINT32 rotation = 0;
- CGU_Vec2ui indexPrec;
- if (threadInBlock < 8) // all threads of threadInBlock < 8 will be working on trying out mode 4, since only mode 4 has index selector bit
- {
- if (0 == (threadInBlock & 1)) // thread 0, 2, 4, 6
- {
- //2 represents 2bit index precision; 1 represents 3bit index precision
- index_selector = 0;
- indexPrec = CGU_Vec2ui( 2, 1 );
- }
- else // thread 1, 3, 5, 7
- {
- //2 represents 2bit index precision; 1 represents 3bit index precision
- index_selector = 1;
- indexPrec = CGU_Vec2ui( 1, 2 );
- }
- }
- else
- {
- //2 represents 2bit index precision
- indexPrec = CGU_Vec2ui( 2, 2 );
- }
- CGU_Vec4ui pixel_r;
- CGU_UINT32 color_index;
- CGU_UINT32 alpha_index;
- CGU_Vec4i span;
- CGU_Vec2i span_norm_sqr;
- CGU_Vec2i dotProduct;
- #if defined(ENABLE_MODE4) || defined(ENABLE_MODE5)
- if (threadInBlock < 12) // Try mode 4 5 in threads 0..11
- {
- CGU_Vec4ui ep_quantized[2];
- // mode 4 5 have component rotation
- if ((threadInBlock < 2) || (8 == threadInBlock)) // rotation = 0 in thread 0, 1
- {
- rotation = 0;
- }
- else if ((threadInBlock < 4) || (9 == threadInBlock)) // rotation = 1 in thread 2, 3
- {
- rotation = 1;
- set_pixel_rotation(endPoint[0],rotation);
- set_pixel_rotation(endPoint[1],rotation);
- }
- else if ((threadInBlock < 6) || (10 == threadInBlock)) // rotation = 2 in thread 4, 5
- {
- rotation = 2;
- set_pixel_rotation(endPoint[0],rotation);
- set_pixel_rotation(endPoint[1],rotation);
- }
- else if ((threadInBlock < 8) || (11 == threadInBlock)) // rotation = 3 in thread 6, 7
- {
- rotation = 3;
- set_pixel_rotation(endPoint[0],rotation);
- set_pixel_rotation(endPoint[1],rotation);
- }
- if (threadInBlock < 8) // try mode 4 in threads 0..7
- {
- // mode 4 thread distribution
- // Thread 0 1 2 3 4 5 6 7
- // Rotation 0 0 1 1 2 2 3 3
- // Index selector 0 1 0 1 0 1 0 1
- mode = 4;
- compress_endpoints4( endPoint,ep_quantized );
- }
- else // try mode 5 in threads 8..11
- {
- // mode 5 thread distribution
- // Thread 8 9 10 11
- // Rotation 0 1 2 3
- mode = 5;
- compress_endpoints5( endPoint,ep_quantized );
- }
- CGU_Vec4ui pixel = shared_temp[threadBase + 0].pixel;
- set_pixel_rotation(pixel,rotation);
- span = cmp_castimp(endPoint[1] - endPoint[0]);
- span_norm_sqr = CGU_Vec2i( dot( span.rgb, span.rgb ), span.a * span.a );
- // should be the same as above
- CGU_Vec3ui diff0 = pixel.rgb - endPoint[0].rgb;
- CGU_Vec3ui diff1 = pixel.rgb - endPoint[1].rgb;
- dotProduct = CGU_Vec2i( dot( diff0, diff0), dot( diff1, diff1) );
- if ( dotProduct.x > dotProduct.y )
- {
- span.rgb.x = -span.rgb.x;
- span.rgb.y = -span.rgb.y;
- span.rgb.z = -span.rgb.z;
- swap(endPoint[0].rgb, endPoint[1].rgb);
- }
- CGU_UINT32 diffa0 = pixel.a - endPoint[0].a;
- CGU_UINT32 diffa1 = pixel.a - endPoint[1].a;
- dotProduct = CGU_Vec2i( dot( diffa0, diffa0 ), dot( diffa1,diffa1 ) );
- if ( dotProduct.x > dotProduct.y )
- {
- span.a = -span.a;
- swap(endPoint[0].a, endPoint[1].a);
- }
- error = 0;
- for ( CGU_UINT32 i = 0; i < 16; i ++ )
- {
- pixel = shared_temp[threadBase + i].pixel;
- set_pixel_rotation(pixel,rotation);
- diff0 = pixel.rgb - endPoint[0].rgb;
- dotProduct.x = dot( span.rgb, diff0 );
- color_index = ( span_norm_sqr.x <= 0 /*endPoint[0] == endPoint[1]*/ || dotProduct.x <= 0 /*pixel == endPoint[0]*/ ) ? 0
- : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[indexPrec.x][ CGU_UINT32( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] );
- diffa0 = pixel.a - endPoint[0].a;
- dotProduct.y = dot( span.a, diffa0 );
- alpha_index = ( span_norm_sqr.y <= 0 || dotProduct.y <= 0 ) ? 0
- : ( ( dotProduct.y < span_norm_sqr.y ) ? aStep[indexPrec.y][ CGU_UINT32( dotProduct.y * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] );
- pixel_r.rgb = ( endPoint[0].rgb * ( 64 - aWeight[indexPrec.x][color_index] ) + endPoint[1].rgb * aWeight[indexPrec.x][color_index] + 32U );
- pixel_r.rgb.x = pixel_r.rgb.x >> 6;
- pixel_r.rgb.y = pixel_r.rgb.y >> 6;
- pixel_r.rgb.z = pixel_r.rgb.z >> 6;
- pixel_r.a = ( endPoint[0].a * ( 64 - aWeight[indexPrec.y][alpha_index] ) + endPoint[1].a * aWeight[indexPrec.y][alpha_index] + 32 ) >> 6;
- Ensure_A_Is_Larger( pixel_r, pixel );
- pixel_r -= pixel;
- set_pixel_rotation(pixel_r,rotation);
- error += ComputeError(pixel_r, pixel_r);
- }
- }
- else
- #endif
- #ifdef ENABLE_MODE6
- if (threadInBlock < 16)// Try mode 6 in threads 12..15, since in mode 4 5 6, only mode 6 has p bit
- {
- CGU_UINT32 p = threadInBlock - 12;
- CGU_Vec4ui ep_quantized[2];
- compress_endpoints6( endPoint,ep_quantized, CGU_Vec2ui(p & 1 , (p >> 1)& 1 ) );
- CGU_Vec4ui pixel = shared_temp[threadBase + 0].pixel;
- span = cmp_castimp( endPoint[1] - endPoint[0] );
- span_norm_sqr = dot( span, span );
- CGU_Vec4ui diff4 = pixel - endPoint[0];
- dotProduct = dot( span, diff4 );
- if ( span_norm_sqr.x > 0 && dotProduct.x >= 0 && CGU_UINT32( dotProduct.x * 63.49999 ) > CGU_UINT32( 32 * span_norm_sqr.x ) )
- {
- span = -span;
- swap(endPoint[0], endPoint[1]);
- }
-
- error = 0;
- for ( CGU_UINT32 i = 0; i < 16; i ++ )
- {
- pixel = shared_temp[threadBase + i].pixel;
- diff4 = pixel - endPoint[0];
- dotProduct.x = dot( span, diff4 );
- color_index = ( span_norm_sqr.x <= 0 || dotProduct.x <= 0 ) ? 0
- : ( ( dotProduct.x < span_norm_sqr.x ) ? aStep[0][ CGU_UINT32( dotProduct.x * 63.49999 / span_norm_sqr.x ) ] : aStep[0][63] );
-
- pixel_r = ( endPoint[0] * ( 64 - aWeight[0][color_index] ) +
- endPoint[1] * aWeight[0][color_index] + 32U ) >> 6;
-
- Ensure_A_Is_Larger( pixel_r, pixel );
- pixel_r -= pixel;
- error += ComputeError(pixel_r, pixel_r);
- }
- mode = 6;
- rotation = p; // Borrow rotation for p
- }
- #endif
- shared_temp[GI].error = error;
- shared_temp[GI].mode = mode;
- shared_temp[GI].index_selector = index_selector;
- shared_temp[GI].rotation = rotation;
- GroupSync();
- if (threadInBlock < 8)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 8].error )
- {
- shared_temp[GI].error = shared_temp[GI + 8].error;
- shared_temp[GI].mode = shared_temp[GI + 8].mode;
- shared_temp[GI].index_selector = shared_temp[GI + 8].index_selector;
- shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 4)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 4].error )
- {
- shared_temp[GI].error = shared_temp[GI + 4].error;
- shared_temp[GI].mode = shared_temp[GI + 4].mode;
- shared_temp[GI].index_selector = shared_temp[GI + 4].index_selector;
- shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 2)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 2].error )
- {
- shared_temp[GI].error = shared_temp[GI + 2].error;
- shared_temp[GI].mode = shared_temp[GI + 2].mode;
- shared_temp[GI].index_selector = shared_temp[GI + 2].index_selector;
- shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 1)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 1].error )
- {
- shared_temp[GI].error = shared_temp[GI + 1].error;
- shared_temp[GI].mode = shared_temp[GI + 1].mode;
- shared_temp[GI].index_selector = shared_temp[GI + 1].index_selector;
- shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
- }
- // Save the fast mode settings for modes 4&5 check if q = 0 for mode 6)
- g_OutBuff1[blockID].error = shared_temp[GI].error;
- g_OutBuff1[blockID].mode = shared_temp[GI].mode & 0x07;
- g_OutBuff1[blockID].rotation = shared_temp[GI].rotation;
- g_OutBuff1[blockID].index_selector = shared_temp[GI].index_selector;
- g_OutBuff1[blockID].partition = 0;
- g_OutBuff1[blockID].data2 = 0;
- // Enable cmp test
- #ifdef ENABLE_CMP_MODE6
- if ((g_quality > 0.05f)
- #ifdef ENABLE_MODE6
- && (shared_temp[GI].mode == 6)
- #endif
- )
- {
- CGU_Vec4ui image_src[16];
- for (int i = 0; i < 16; i++)
- {
- image_src[i].x = shared_temp[threadBase + i].pixel.x;
- image_src[i].y = shared_temp[threadBase + i].pixel.y;
- image_src[i].z = shared_temp[threadBase + i].pixel.z;
- image_src[i].w = shared_temp[threadBase + i].pixel.w;
- }
- CGU_Vec4ui epo_code_out[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- CGU_UINT32 index_packed_out[2] = {0, 0};
- CGU_UINT32 cmp_out6[4] = {0, 0, 0, 0};
- CGU_UINT32 best_index_out[16];
- CGU_UINT32 besterr = cmp_GetIndexedEndPoints(epo_code_out,
- best_index_out,
- image_src,
- 15, // numEntries 0..15 (Note this function is changed from using 16)
- 0xffffffff);
- // Error cal needs updating to be the same all over
- //if (besterr > shared_temp[GI].error)
- {
- cmp_pack4bitindex32(index_packed_out, best_index_out);
- #ifdef ENABLE_CMP_REFINE_MODE6_API
- if (g_quality > 0.5f)
- {
- // Refined for better quailty using prior best_index_out initial input
- besterr = cmp_mode6_optimize_IndexAndEndPoints(epo_code_out,
- best_index_out,
- image_src,
- 16, // numEntries
- g_modesettings[6].clusters, // 16,
- g_modesettings[6].bits, // 58,
- g_modesettings[6].channels3or4, // 4,
- 0.1f);
- cmp_pack4bitindex32(index_packed_out, best_index_out);
- }
- #endif
- cmp_encode_mode6(cmp_out6, epo_code_out, index_packed_out);
- // Addin CMP results
- g_OutBuff1[blockID].error = besterr;
- g_OutBuff1[blockID].mode = 6 | 0x10;
- g_OutBuff1[blockID].data2.x = cmp_out6[0];
- g_OutBuff1[blockID].data2.y = cmp_out6[1];
- g_OutBuff1[blockID].data2.z = cmp_out6[2];
- g_OutBuff1[blockID].data2.w = cmp_out6[3];
- } // if better then fast mode
- }
- #endif
- }
- #else
- // Init
- if (threadInBlock < 1) {
- g_OutBuff1[blockID].error = MAX_UINT;
- g_OutBuff1[blockID].mode = 0;
- g_OutBuff1[blockID].rotation = 0;
- g_OutBuff1[blockID].index_selector = 0;
- g_OutBuff1[blockID].partition = 0;
- g_OutBuff1[blockID].data2 = 0;
- }
- GroupSync();
- #endif
- }
- CMP_NUMTHREADS(THREAD_GROUP_SIZE, 1, 1) void TryMode137CS(CGU_UINT32 GI CMP_SVGROUPINDEX, CGU_Vec3ui groupID CMP_SVGROUPID) // mode 1 3 7 all have 2 subsets per block
- {
- const CGU_UINT32 MAX_USED_THREAD = 64;
- CGU_UINT32 BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- CGU_UINT32 blockInGroup = GI / MAX_USED_THREAD;
- CGU_UINT32 blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- CGU_UINT32 threadBase = blockInGroup * MAX_USED_THREAD;
- CGU_UINT32 threadInBlock = GI - threadBase;
- CGU_UINT32 block_y = blockID / g_num_block_x;
- CGU_UINT32 block_x = blockID - block_y * g_num_block_x;
- CGU_UINT32 base_x = block_x * BLOCK_SIZE_X;
- CGU_UINT32 base_y = block_y * BLOCK_SIZE_Y;
- if (threadInBlock < 16)
- {
- CGU_Vec4f px = g_Input.Load(CGU_Vec3ui(base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0)) * 255.0f;
- px = clamp(px, 0.0f, 255.0f);
- shared_temp[GI].pixel.r = (CGU_UINT32)px.r;
- shared_temp[GI].pixel.g = (CGU_UINT32)px.g;
- shared_temp[GI].pixel.b = (CGU_UINT32)px.b;
- shared_temp[GI].pixel.a = (CGU_UINT32)px.a;
- }
- GroupSync();
- shared_temp[GI].error = 0xFFFFFFFF;
- // Use this to test only one of modes 1,3, or 7
- // if (g_mode_id != 7) {
- // if (threadInBlock == 0)
- // g_OutBuff1[blockID].error = g_InBuff[blockID].error;
- // g_OutBuff1[blockID].mode = g_InBuff[blockID].mode;
- // g_OutBuff1[blockID].partition = g_InBuff[blockID].partition;
- // g_OutBuff1[blockID].index_selector = g_InBuff[blockID].index_selector;
- // g_OutBuff1[blockID].rotation = g_InBuff[blockID].rotation;
- // g_OutBuff1[blockID].data2 = g_InBuff[blockID].data2;
- // return;
- // }
- #if defined(ENABLE_MODE1) || defined(ENABLE_MODE3) || defined(ENABLE_MODE7)
- CGU_Vec4ui pixel_r;
- CGU_Vec4ui endPoint[2][2]; // endPoint[0..1 for subset id][0..1 for low and high in the subset]
- CGU_Vec4ui endPointBackup[2][2];
- CGU_UINT32 color_index;
- if (threadInBlock < 64)
- {
- CGU_UINT32 partition = threadInBlock;
- CGU_UINT32 i;
- endPoint[0][0] = MAX_UINT;
- endPoint[0][1] = MIN_UINT;
- endPoint[1][0] = MAX_UINT;
- endPoint[1][1] = MIN_UINT;
- CGU_UINT32 bits = blockPartitions[partition];
- for (i = 0; i < 16; i++)
- {
- CGU_Vec4ui pixel = shared_temp[threadBase + i].pixel;
- if (((bits >> i) & 0x01) == 1)
- {
- endPoint[1][0] = cmp_min(endPoint[1][0], pixel);
- endPoint[1][1] = cmp_max(endPoint[1][1], pixel);
- }
- else
- {
- endPoint[0][0] = cmp_min(endPoint[0][0], pixel);
- endPoint[0][1] = cmp_max(endPoint[0][1], pixel);
- }
- }
- endPointBackup[0][0] = endPoint[0][0];
- endPointBackup[0][1] = endPoint[0][1];
- endPointBackup[1][0] = endPoint[1][0];
- endPointBackup[1][1] = endPoint[1][1];
- CGU_UINT32 max_p = 2; // mode 1
- #if defined(ENABLE_MODE3) || defined(ENABLE_MODE7)
- if (g_mode_id != 1)
- {
- // in mode 3 7, there are two p bits per subset, one for each end point
- max_p = 4;
- }
- #endif
- CGU_UINT32 final_p[2] = {0, 0};
- CGU_UINT32 error[2] = {MAX_UINT, MAX_UINT};
- for (CGU_UINT32 p = 0; p < max_p; p++)
- {
- endPoint[0][0] = endPointBackup[0][0];
- endPoint[0][1] = endPointBackup[0][1];
- endPoint[1][0] = endPointBackup[1][0];
- endPoint[1][1] = endPointBackup[1][1];
- for (i = 0; i < 2; i++) // loop through 2 subsets
- {
- #if defined(ENABLE_MODE1)
- if (g_mode_id == 1)
- {
- CGU_Vec4ui quantized[2];
- compress_endpoints1(endPoint[i], quantized, p);
- }
- #endif
- #if defined(ENABLE_MODE3)
- if (g_mode_id == 3)
- {
- CGU_Vec4ui quantized[2];
- compress_endpoints3(endPoint[i], quantized, CGU_Vec2ui(p & 1, (p >> 1) & 1));
- }
- #endif
- #if defined(ENABLE_MODE7)
- if (g_mode_id == 7)
- {
- CGU_Vec4ui quantized[2];
- compress_endpoints7(endPoint[i], quantized, CGU_Vec2ui(p & 1, (p >> 1) & 1));
- }
- #endif
- }
- CGU_Vec4i span[2];
- span[0].x = endPoint[0][1].x - endPoint[0][0].x;
- span[0].y = endPoint[0][1].y - endPoint[0][0].y;
- span[0].z = endPoint[0][1].z - endPoint[0][0].z;
- span[0].w = endPoint[0][1].w - endPoint[0][0].w;
- span[1].x = endPoint[1][1].x - endPoint[1][0].x;
- span[1].y = endPoint[1][1].y - endPoint[1][0].y;
- span[1].z = endPoint[1][1].z - endPoint[1][0].z;
- span[1].w = endPoint[1][1].w - endPoint[1][0].w;
- #if defined(ENABLE_MODE3)
- if (g_mode_id != 7)
- {
- span[0].w = span[1].w = 0;
- }
- #endif
- CGU_INT span_norm_sqr[2];
- span_norm_sqr[0] = dot(span[0], span[0]);
- span_norm_sqr[1] = dot(span[1], span[1]);
- CGU_Vec4i diff;
- diff.x = shared_temp[threadBase + 0].pixel.x - endPoint[0][0].x;
- diff.y = shared_temp[threadBase + 0].pixel.y - endPoint[0][0].y;
- diff.z = shared_temp[threadBase + 0].pixel.z - endPoint[0][0].z;
- diff.w = shared_temp[threadBase + 0].pixel.w - endPoint[0][0].w;
- // TODO: again, this shouldn't be necessary here in error calculation
- CGU_INT dotProduct = dot(span[0],diff);
- if (span_norm_sqr[0] > 0 && dotProduct > 0 && CGU_UINT32(dotProduct * 63.49999) > CGU_UINT32(32 * span_norm_sqr[0]))
- {
- span[0].x = -span[0].x;
- span[0].y = -span[0].y;
- span[0].z = -span[0].z;
- span[0].w = -span[0].w;
- swap(endPoint[0][0], endPoint[0][1]);
- }
- diff.x = shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel.x - endPoint[1][0].x;
- diff.y = shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel.y - endPoint[1][0].y;
- diff.z = shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel.z - endPoint[1][0].z;
- diff.w = shared_temp[threadBase + candidateFixUpIndex1D[partition].x].pixel.w - endPoint[1][0].w;
- dotProduct = dot(span[1], diff);
- if (span_norm_sqr[1] > 0 && dotProduct > 0 && CGU_UINT32(dotProduct * 63.49999) > CGU_UINT32(32 * span_norm_sqr[1]))
- {
- span[1].x = -span[1].x;
- span[1].y = -span[1].y;
- span[1].z = -span[1].z;
- span[1].w = -span[1].w;
- swap(endPoint[1][0], endPoint[1][1]);
- }
- CGU_UINT32 step_selector = 1; // mode 1 has 3 bit index
- #if defined(ENABLE_MODE3) || defined(ENABLE_MODE7)
- if (g_mode_id != 1)
- {
- step_selector = 2; // mode 3 7 have 2 bit index
- }
- #endif
- CGU_UINT32 p_error[2] = {0, 0};
- for (i = 0; i < 16; i++)
- {
- CGU_UINT32 subset_index = (bits >> i) & 0x01;
- if (subset_index == 1)
- {
- diff.x = shared_temp[threadBase + i].pixel.x - endPoint[1][0].x;
- diff.y = shared_temp[threadBase + i].pixel.y - endPoint[1][0].y;
- diff.z = shared_temp[threadBase + i].pixel.z - endPoint[1][0].z;
- diff.w = shared_temp[threadBase + i].pixel.w - endPoint[1][0].w;
- dotProduct = dot(span[1], diff);
- color_index = (span_norm_sqr[1] <= 0 || dotProduct <= 0)
- ? 0
- : ((dotProduct < span_norm_sqr[1]) ? aStep[step_selector][CGU_UINT32(dotProduct * 63.49999 / span_norm_sqr[1])]
- : aStep[step_selector][63]);
- }
- else
- {
- diff.x = shared_temp[threadBase + i].pixel.x - endPoint[0][0].x;
- diff.y = shared_temp[threadBase + i].pixel.y - endPoint[0][0].y;
- diff.z = shared_temp[threadBase + i].pixel.z - endPoint[0][0].z;
- diff.w = shared_temp[threadBase + i].pixel.w - endPoint[0][0].w;
- dotProduct = dot(span[0], diff);
- color_index = (span_norm_sqr[0] <= 0 || dotProduct <= 0)
- ? 0
- : ((dotProduct < span_norm_sqr[0]) ? aStep[step_selector][CGU_UINT32(dotProduct * 63.49999 / span_norm_sqr[0])]
- : aStep[step_selector][63]);
- }
- pixel_r = (endPoint[subset_index][0] * (64 - aWeight[step_selector][color_index]) +
- endPoint[subset_index][1] * aWeight[step_selector][color_index] + 32U) >>
- 6;
- if (g_mode_id != 7)
- {
- pixel_r.a = 255;
- }
- CGU_Vec4ui pixel = shared_temp[threadBase + i].pixel;
- Ensure_A_Is_Larger(pixel_r, pixel);
- pixel_r -= pixel;
- CGU_UINT32 pixel_error = ComputeError(pixel_r, pixel_r);
- if (subset_index == 1)
- p_error[1] += pixel_error;
- else
- p_error[0] += pixel_error;
- }
- for (i = 0; i < 2; i++)
- {
- if (p_error[i] < error[i])
- {
- error[i] = p_error[i];
- final_p[i] = p;
- }
- }
- }
- shared_temp[GI].error = error[0] + error[1];
- shared_temp[GI].mode = g_mode_id;
- shared_temp[GI].partition = partition;
- // mode 1 3 7 don't have rotation, we use rotation for p bits
- if (g_mode_id == 1)
- shared_temp[GI].rotation = (final_p[1] << 1) | final_p[0];
- else
- shared_temp[GI].rotation = (final_p[1] << 2) | final_p[0];
- }
- GroupSync();
- if (threadInBlock < 32)
- {
- if (shared_temp[GI].error > shared_temp[GI + 32].error)
- {
- shared_temp[GI].error = shared_temp[GI + 32].error;
- shared_temp[GI].mode = shared_temp[GI + 32].mode;
- shared_temp[GI].partition = shared_temp[GI + 32].partition;
- shared_temp[GI].rotation = shared_temp[GI + 32].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 16)
- {
- if (shared_temp[GI].error > shared_temp[GI + 16].error)
- {
- shared_temp[GI].error = shared_temp[GI + 16].error;
- shared_temp[GI].mode = shared_temp[GI + 16].mode;
- shared_temp[GI].partition = shared_temp[GI + 16].partition;
- shared_temp[GI].rotation = shared_temp[GI + 16].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 8)
- {
- if (shared_temp[GI].error > shared_temp[GI + 8].error)
- {
- shared_temp[GI].error = shared_temp[GI + 8].error;
- shared_temp[GI].mode = shared_temp[GI + 8].mode;
- shared_temp[GI].partition = shared_temp[GI + 8].partition;
- shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 4)
- {
- if (shared_temp[GI].error > shared_temp[GI + 4].error)
- {
- shared_temp[GI].error = shared_temp[GI + 4].error;
- shared_temp[GI].mode = shared_temp[GI + 4].mode;
- shared_temp[GI].partition = shared_temp[GI + 4].partition;
- shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 2)
- {
- if (shared_temp[GI].error > shared_temp[GI + 2].error)
- {
- shared_temp[GI].error = shared_temp[GI + 2].error;
- shared_temp[GI].mode = shared_temp[GI + 2].mode;
- shared_temp[GI].partition = shared_temp[GI + 2].partition;
- shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 1)
- {
- if (shared_temp[GI].error > shared_temp[GI + 1].error)
- {
- shared_temp[GI].error = shared_temp[GI + 1].error;
- shared_temp[GI].mode = shared_temp[GI + 1].mode;
- shared_temp[GI].partition = shared_temp[GI + 1].partition;
- shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
- }
- if ((g_InBuff[blockID].error > shared_temp[GI].error)){
- g_OutBuff1[blockID].error = shared_temp[GI].error;
- g_OutBuff1[blockID].mode = shared_temp[GI].mode;
- g_OutBuff1[blockID].partition = shared_temp[GI].partition;
- g_OutBuff1[blockID].rotation = shared_temp[GI].rotation;
- g_OutBuff1[blockID].index_selector = 0;
- g_OutBuff1[blockID].data2 = 0;
- }
- else
- {
- g_OutBuff1[blockID].error = g_InBuff[blockID].error;
- g_OutBuff1[blockID].mode = g_InBuff[blockID].mode;
- g_OutBuff1[blockID].partition = g_InBuff[blockID].partition;
- g_OutBuff1[blockID].index_selector = g_InBuff[blockID].index_selector;
- g_OutBuff1[blockID].rotation = g_InBuff[blockID].rotation;
- g_OutBuff1[blockID].data2 = g_InBuff[blockID].data2;
- }
- }
- #else
- GroupSync();
- if (threadInBlock < 1)
- {
- // cary over prior results
- g_OutBuff1[blockID].error = g_InBuff[blockID].error;
- g_OutBuff1[blockID].mode = g_InBuff[blockID].mode;
- g_OutBuff1[blockID].partition = g_InBuff[blockID].partition;
- g_OutBuff1[blockID].index_selector = g_InBuff[blockID].index_selector;
- g_OutBuff1[blockID].rotation = g_InBuff[blockID].rotation;
- g_OutBuff1[blockID].data2 = g_InBuff[blockID].data2;
- }
- #endif
- }
- CMP_NUMTHREADS(THREAD_GROUP_SIZE, 1, 1) void TryMode02CS(CGU_UINT32 GI CMP_SVGROUPINDEX, CGU_Vec3ui groupID CMP_SVGROUPID) // mode 0 2 have 3 subsets per block
- {
- const CGU_UINT32 MAX_USED_THREAD = 64;
- CGU_UINT32 BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- CGU_UINT32 blockInGroup = GI / MAX_USED_THREAD;
- CGU_UINT32 blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- CGU_UINT32 threadBase = blockInGroup * MAX_USED_THREAD;
- CGU_UINT32 threadInBlock = GI - threadBase;
- CGU_UINT32 block_y = blockID / g_num_block_x;
- CGU_UINT32 block_x = blockID - block_y * g_num_block_x;
- CGU_UINT32 base_x = block_x * BLOCK_SIZE_X;
- CGU_UINT32 base_y = block_y * BLOCK_SIZE_Y;
- #if defined(ENABLE_MODE0) || defined(ENABLE_MODE2)
- if (threadInBlock < 16)
- {
- CGU_Vec4f px = g_Input.Load(CGU_Vec3ui(base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0)) * 255.0f;
- px = clamp(px, 0.0f, 255.0f);
- shared_temp[GI].pixel.r = (CGU_UINT32)px.r;
- shared_temp[GI].pixel.g = (CGU_UINT32)px.g;
- shared_temp[GI].pixel.b = (CGU_UINT32)px.b;
- shared_temp[GI].pixel.a = (CGU_UINT32)px.a;
- }
- GroupSync();
- shared_temp[GI].error = 0xFFFFFFFF;
- CGU_UINT32 num_partitions;
- if (0 == g_mode_id)
- {
- num_partitions = 16;
- }
- else
- {
- num_partitions = 64;
- }
- CGU_Vec4ui pixel_r;
- CGU_Vec4ui endPoint[3][2]; // endPoint[0..1 for subset id][0..1 for low and high in the subset]
- CGU_Vec4ui endPointBackup[3][2];
- CGU_UINT32 color_index[16];
- if (threadInBlock < num_partitions)
- {
- CGU_UINT32 partition = threadInBlock + 64;
- endPoint[0][0] = MAX_UINT;
- endPoint[0][1] = MIN_UINT;
- endPoint[1][0] = MAX_UINT;
- endPoint[1][1] = MIN_UINT;
- endPoint[2][0] = MAX_UINT;
- endPoint[2][1] = MIN_UINT;
- CGU_UINT32 bits2 = blockPartitions2[partition - 64];
- CGU_UINT32 i;
- for ( i = 0; i < 16; i ++ )
- {
- CGU_Vec4ui pixel = shared_temp[threadBase + i].pixel;
- CGU_UINT32 subset_index = ( bits2 >> ( i * 2 ) ) & 0x03;
- if ( subset_index == 2 )
- {
- endPoint[2][0] = cmp_min( endPoint[2][0], pixel );
- endPoint[2][1] = cmp_max( endPoint[2][1], pixel );
- }
- else if ( subset_index == 1 )
- {
- endPoint[1][0] = cmp_min( endPoint[1][0], pixel );
- endPoint[1][1] = cmp_max( endPoint[1][1], pixel );
- }
- else
- {
- endPoint[0][0] = cmp_min( endPoint[0][0], pixel );
- endPoint[0][1] = cmp_max( endPoint[0][1], pixel );
- }
- }
- endPointBackup[0][0] = endPoint[0][0];
- endPointBackup[0][1] = endPoint[0][1];
- endPointBackup[1][0] = endPoint[1][0];
- endPointBackup[1][1] = endPoint[1][1];
- endPointBackup[2][0] = endPoint[2][0];
- endPointBackup[2][1] = endPoint[2][1];
- CGU_UINT32 max_p;
- if (0 == g_mode_id)
- {
- max_p = 4;
- }
- else
- {
- max_p = 1;
- }
- CGU_UINT32 final_p[3] = { 0, 0, 0 };
- CGU_UINT32 error[3] = { MAX_UINT, MAX_UINT, MAX_UINT };
- CGU_Vec4ui ep_quantized[2];
- for ( CGU_UINT32 p = 0; p < max_p; p ++ )
- {
- endPoint[0][0] = endPointBackup[0][0];
- endPoint[0][1] = endPointBackup[0][1];
- endPoint[1][0] = endPointBackup[1][0];
- endPoint[1][1] = endPointBackup[1][1];
- endPoint[2][0] = endPointBackup[2][0];
- endPoint[2][1] = endPointBackup[2][1];
- for ( i = 0; i < 3; i ++ )
- {
- if (0 == g_mode_id)
- {
- compress_endpoints0( endPoint[i],ep_quantized, CGU_Vec2ui(p& 1, (p >> 1)& 1));
- }
- else
- {
- compress_endpoints2( endPoint[i],ep_quantized );
- }
- }
- CGU_UINT32 step_selector = 1 + (2 == g_mode_id);
- CGU_Vec4i span[3];
- span[0] = cmp_castimp(endPoint[0][1] - endPoint[0][0]);
- span[1] = cmp_castimp(endPoint[1][1] - endPoint[1][0]);
- span[2] = cmp_castimp(endPoint[2][1] - endPoint[2][0]);
- span[0].w = span[1].w = span[2].w = 0;
- CGU_INT span_norm_sqr[3];
- span_norm_sqr[0] = dot( span[0], span[0] );
- span_norm_sqr[1] = dot( span[1], span[1] );
- span_norm_sqr[2] = dot( span[2], span[2] );
- // TODO: again, this shouldn't be necessary here in error calculation
- CGU_UINT32 ci[3] = { 0, candidateFixUpIndex1D[partition].x, candidateFixUpIndex1D[partition].y };
- CGU_Vec4ui diff;
- for (i = 0; i < 3; i ++)
- {
- diff = shared_temp[threadBase + ci[i]].pixel - endPoint[i][0];
- CGU_INT dotProduct = dot( span[i], diff );
- if ( span_norm_sqr[i] > 0 && dotProduct > 0 && CGU_UINT32( dotProduct * 63.49999 ) > CGU_UINT32( 32 * span_norm_sqr[i] ) )
- {
- span[i] = -span[i];
- swap(endPoint[i][0], endPoint[i][1]);
- }
- }
- CGU_UINT32 p_error[3] = { 0, 0, 0 };
- for ( i = 0; i < 16; i ++ )
- {
- CGU_UINT32 subset_index = ( bits2 >> ( i * 2 ) ) & 0x03;
- if ( subset_index == 2 )
- {
- diff = shared_temp[threadBase + i].pixel - endPoint[2][0];
- CGU_INT dotProduct = dot( span[2], diff );
- color_index[i] = ( span_norm_sqr[2] <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr[2] ) ? aStep[step_selector][ CGU_UINT32( dotProduct * 63.49999 / span_norm_sqr[2] ) ] : aStep[step_selector][63] );
- }
- else if ( subset_index == 1 )
- {
- diff = shared_temp[threadBase + i].pixel - endPoint[1][0];
- CGU_INT dotProduct = dot( span[1], diff );
- color_index[i] = ( span_norm_sqr[1] <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr[1] ) ? aStep[step_selector][ CGU_UINT32( dotProduct * 63.49999 / span_norm_sqr[1] ) ] : aStep[step_selector][63] );
- }
- else
- {
- diff = shared_temp[threadBase + i].pixel - endPoint[0][0];
- CGU_INT dotProduct = dot( span[0], diff );
- color_index[i] = ( span_norm_sqr[0] <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr[0] ) ? aStep[step_selector][ CGU_UINT32( dotProduct * 63.49999 / span_norm_sqr[0] ) ] : aStep[step_selector][63] );
- }
- pixel_r = ( endPoint[subset_index][0]*( 64 - aWeight[step_selector][color_index[i]] ) +
- endPoint[subset_index][1]* aWeight[step_selector][color_index[i]] + 32U ) >> 6;
- pixel_r.a = 255;
- CGU_Vec4ui pixel = shared_temp[threadBase + i].pixel;
- Ensure_A_Is_Larger( pixel_r, pixel );
- pixel_r -= pixel;
- CGU_UINT32 pixel_error = ComputeError(pixel_r, pixel_r);
- if ( subset_index == 2 )
- p_error[2] += pixel_error;
- else if ( subset_index == 1 )
- p_error[1] += pixel_error;
- else
- p_error[0] += pixel_error;
- }
- for ( i = 0; i < 3; i++ )
- {
- if (p_error[i] < error[i])
- {
- error[i] = p_error[i];
- final_p[i] = p; // Borrow rotation for p
- }
- }
- }
- shared_temp[GI].error = error[0] + error[1] + error[2];
- shared_temp[GI].partition = partition;
- shared_temp[GI].rotation = (final_p[2] << 4) | (final_p[1] << 2) | final_p[0];
- }
- GroupSync();
- if (threadInBlock < 32)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 32].error )
- {
- shared_temp[GI].error = shared_temp[GI + 32].error;
- shared_temp[GI].partition = shared_temp[GI + 32].partition;
- shared_temp[GI].rotation = shared_temp[GI + 32].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 16)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 16].error )
- {
- shared_temp[GI].error = shared_temp[GI + 16].error;
- shared_temp[GI].partition = shared_temp[GI + 16].partition;
- shared_temp[GI].rotation = shared_temp[GI + 16].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 8)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 8].error )
- {
- shared_temp[GI].error = shared_temp[GI + 8].error;
- shared_temp[GI].partition = shared_temp[GI + 8].partition;
- shared_temp[GI].rotation = shared_temp[GI + 8].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 4)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 4].error )
- {
- shared_temp[GI].error = shared_temp[GI + 4].error;
- shared_temp[GI].partition = shared_temp[GI + 4].partition;
- shared_temp[GI].rotation = shared_temp[GI + 4].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 2)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 2].error )
- {
- shared_temp[GI].error = shared_temp[GI + 2].error;
- shared_temp[GI].partition = shared_temp[GI + 2].partition;
- shared_temp[GI].rotation = shared_temp[GI + 2].rotation;
- }
- }
- GroupSync();
- if (threadInBlock < 1)
- {
- if ( shared_temp[GI].error > shared_temp[GI + 1].error )
- {
- shared_temp[GI].error = shared_temp[GI + 1].error;
- shared_temp[GI].partition = shared_temp[GI + 1].partition;
- shared_temp[GI].rotation = shared_temp[GI + 1].rotation;
- }
- if (g_InBuff[blockID].error > shared_temp[GI].error)
- {
- g_OutBuff1[blockID].error = shared_temp[GI].error;
- g_OutBuff1[blockID].mode = g_mode_id;
- g_OutBuff1[blockID].partition = shared_temp[GI].partition;
- g_OutBuff1[blockID].rotation = shared_temp[GI].rotation;
- g_OutBuff1[blockID].data2 = 0;
- }
- else
- {
- g_OutBuff1[blockID].error = g_InBuff[blockID].error;
- g_OutBuff1[blockID].mode = g_InBuff[blockID].mode;
- g_OutBuff1[blockID].partition = g_InBuff[blockID].partition;
- g_OutBuff1[blockID].index_selector = g_InBuff[blockID].index_selector;
- g_OutBuff1[blockID].rotation = g_InBuff[blockID].rotation;
- g_OutBuff1[blockID].data2 = g_InBuff[blockID].data2;
- }
- }
- #endif
- }
- CMP_NUMTHREADS(THREAD_GROUP_SIZE, 1, 1) void EncodeBlocks(CGU_UINT32 GI CMP_SVGROUPINDEX, CGU_Vec3ui groupID CMP_SVGROUPID)
- {
- CMP_CONSTANT CGU_UINT32 MAX_USED_THREAD = 16;
- CGU_UINT32 BLOCK_IN_GROUP = THREAD_GROUP_SIZE / MAX_USED_THREAD;
- CGU_UINT32 blockInGroup = GI / MAX_USED_THREAD;
- CGU_UINT32 blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
- CGU_UINT32 threadBase = blockInGroup * MAX_USED_THREAD;
- CGU_UINT32 threadInBlock = GI - threadBase;
- CGU_UINT32 block_y = blockID / g_num_block_x;
- CGU_UINT32 block_x = blockID - block_y * g_num_block_x;
- CGU_UINT32 base_x = block_x * BLOCK_SIZE_X;
- CGU_UINT32 base_y = block_y * BLOCK_SIZE_Y;
- CGU_UINT32 use_cmp = g_InBuff[blockID].mode & 0x10;
- CGU_UINT32 best_mode = g_InBuff[blockID].mode & 0x07;
- CGU_UINT32 best_partition = g_InBuff[blockID].partition;
- CGU_UINT32 best_index_selector = g_InBuff[blockID].index_selector;
- CGU_UINT32 best_rotation = g_InBuff[blockID].rotation;
- if (threadInBlock < 16)
- {
- CGU_Vec4f px = g_Input.Load(CGU_Vec3ui(base_x + threadInBlock % 4, base_y + threadInBlock / 4, 0)) * 255.0f;
- px = clamp(px, 0.0f, 255.0f);
- CGU_Vec4ui pixel;
- pixel.r = (CGU_UINT32)px.r;
- pixel.g = (CGU_UINT32)px.g;
- pixel.b = (CGU_UINT32)px.b;
- pixel.a = (CGU_UINT32)px.a;
- if ((4 == best_mode) || (5 == best_mode))
- set_pixel_rotation(pixel,best_rotation);
- shared_temp[GI].pixel = pixel;
- }
- GroupSync();
- CGU_UINT32 bits = blockPartitions[best_partition];
- CGU_UINT32 bits2 = blockPartitions2[best_partition - 64];
- CGU_Vec4ui ep[2];
- ep[0] = MAX_UINT;
- ep[1] = MIN_UINT;
- CGU_Vec4ui ep_quantized[2];
- CGU_Vec3ui diff3;
- CGU_Vec4ui diff4;
- CMP_UNROLL for (CGU_INT ii = 2; ii >= 0; -- ii)
- {
- if (threadInBlock < 16)
- {
- CGU_Vec4ui epTemp[2];
- epTemp[0] = MAX_UINT;
- epTemp[1] = MIN_UINT;
- CGU_Vec4ui pixel = shared_temp[GI].pixel;
- CGU_UINT32 subset_index = ( bits >> threadInBlock ) & 0x01;
- CGU_UINT32 subset_index2 = ( bits2 >> ( threadInBlock * 2 ) ) & 0x03;
- if (0 == ii)
- {
- if ((0 == best_mode) || (2 == best_mode))
- {
- if (0 == subset_index2)
- {
- epTemp[0] = epTemp[1] = pixel;
- }
- }
- else if ((1 == best_mode) || (3 == best_mode) || (7 == best_mode))
- {
- if (0 == subset_index)
- {
- epTemp[0] = epTemp[1] = pixel;
- }
- }
- else if ((4 == best_mode) || (5 == best_mode) || (6 == best_mode))
- {
- epTemp[0] = epTemp[1] = pixel;
- }
- }
- else if (1 == ii)
- {
- if ((0 == best_mode) || (2 == best_mode))
- {
- if (1 == subset_index2)
- {
- epTemp[0] = epTemp[1] = pixel;
- }
- }
- else if ((1 == best_mode) || (3 == best_mode) || (7 == best_mode))
- {
- if (1 == subset_index)
- {
- epTemp[0] = epTemp[1] = pixel;
- }
- }
- }
- else
- {
- if ((0 == best_mode) || (2 == best_mode))
- {
- if (2 == subset_index2)
- {
- epTemp[0] = epTemp[1] = pixel;
- }
- }
- }
- shared_temp[GI].endPoint_low = epTemp[0];
- shared_temp[GI].endPoint_high = epTemp[1];
- }
- GroupSync();
- if (threadInBlock < 8)
- {
- shared_temp[GI].endPoint_low = cmp_min(shared_temp[GI].endPoint_low, shared_temp[GI + 8].endPoint_low);
- shared_temp[GI].endPoint_high = cmp_max(shared_temp[GI].endPoint_high, shared_temp[GI + 8].endPoint_high);
- }
- GroupSync();
- if (threadInBlock < 4)
- {
- shared_temp[GI].endPoint_low = cmp_min(shared_temp[GI].endPoint_low, shared_temp[GI + 4].endPoint_low);
- shared_temp[GI].endPoint_high = cmp_max(shared_temp[GI].endPoint_high, shared_temp[GI + 4].endPoint_high);
- }
- GroupSync();
- if (threadInBlock < 2)
- {
- shared_temp[GI].endPoint_low = cmp_min(shared_temp[GI].endPoint_low, shared_temp[GI + 2].endPoint_low);
- shared_temp[GI].endPoint_high = cmp_max(shared_temp[GI].endPoint_high, shared_temp[GI + 2].endPoint_high);
- }
- GroupSync();
- if (threadInBlock < 1)
- {
- shared_temp[GI].endPoint_low = cmp_min(shared_temp[GI].endPoint_low, shared_temp[GI + 1].endPoint_low);
- shared_temp[GI].endPoint_high = cmp_max(shared_temp[GI].endPoint_high, shared_temp[GI + 1].endPoint_high);
- }
- GroupSync();
- if (ii == (int)threadInBlock)
- {
- ep[0] = shared_temp[threadBase].endPoint_low;
- ep[1] = shared_temp[threadBase].endPoint_high;
- }
- }
- if (threadInBlock < 3)
- {
- CGU_Vec2ui P;
- if (1 == best_mode)
- {
- P = (best_rotation >> threadInBlock) & 1;
- }
- else
- {
- P = CGU_Vec2ui((best_rotation >> (threadInBlock * 2 + 0))&1, (best_rotation >> (threadInBlock * 2 + 1))&1);
- }
- if (0 == best_mode)
- {
- compress_endpoints0( ep,ep_quantized, P );
- }
- else if (1 == best_mode)
- {
- compress_endpoints1( ep,ep_quantized, P );
- }
- else if (2 == best_mode)
- {
- compress_endpoints2( ep,ep_quantized );
- }
- else if (3 == best_mode)
- {
- compress_endpoints3( ep,ep_quantized, P );
- }
- else if (4 == best_mode)
- {
- compress_endpoints4( ep,ep_quantized );
- }
- else if (5 == best_mode)
- {
- compress_endpoints5( ep,ep_quantized);
- }
- else if (6 == best_mode)
- {
- compress_endpoints6( ep,ep_quantized, P );
- }
- else //if (7 == mode)
- {
- compress_endpoints7( ep,ep_quantized, P );
- }
- CGU_Vec4i span = cmp_castimp(ep[1] - ep[0]);
- if (best_mode < 4)
- span.w = 0;
- if ((4 == best_mode) || (5 == best_mode))
- {
- if (0 == threadInBlock)
- {
- CGU_Vec2i span_norm_sqr = CGU_Vec2i( dot( span.rgb, span.rgb ),span.a * span.a );
-
- diff3 = shared_temp[threadBase + 0].pixel.rgb - ep[0].rgb;
- CGU_Vec2i dotProduct = CGU_Vec2i( dot( span.rgb, diff3 ), span.a * ( shared_temp[threadBase + 0].pixel.a - ep[0].a ) );
- if ( span_norm_sqr.x > 0 && dotProduct.x > 0 && CGU_UINT32( dotProduct.x * 63.49999 ) > CGU_UINT32( 32 * span_norm_sqr.x ) )
- {
- swap(ep[0].rgb, ep[1].rgb);
- swap(ep_quantized[0].rgb, ep_quantized[1].rgb);
- }
- if ( span_norm_sqr.y > 0 && dotProduct.y > 0 && CGU_UINT32( dotProduct.y * 63.49999 ) > CGU_UINT32( 32 * span_norm_sqr.y ) )
- {
- swap(ep[0].a, ep[1].a);
- swap(ep_quantized[0].a, ep_quantized[1].a);
- }
- }
- }
- else //if ((0 == mode) || (2 == mode) || (1 == mode) || (3 == mode) || (7 == mode) || (6 == mode))
- {
- CGU_INT p;
- if (0 == threadInBlock)
- {
- p = 0;
- }
- else if (1 == threadInBlock)
- {
- p = candidateFixUpIndex1D[best_partition].x;
- }
- else //if (2 == threadInBlock)
- {
- p = candidateFixUpIndex1D[best_partition].y;
- }
- CGU_INT span_norm_sqr = dot( span, span );
- diff4 = shared_temp[threadBase + p].pixel - ep[0];
- CGU_INT dotProduct = dot( span, diff4 );
- if ( span_norm_sqr > 0 && dotProduct > 0 && CGU_UINT32( dotProduct * 63.49999 ) > CGU_UINT32( 32 * span_norm_sqr ) )
- {
- swap(ep[0], ep[1]);
- swap(ep_quantized[0], ep_quantized[1]);
- }
- }
- shared_temp[GI].endPoint_low = ep[0];
- shared_temp[GI].endPoint_high = ep[1];
- shared_temp[GI].endPoint_low_quantized = ep_quantized[0];
- shared_temp[GI].endPoint_high_quantized = ep_quantized[1];
- }
- GroupSync();
- if (threadInBlock < 16)
- {
- CGU_UINT32 color_index = 0;
- CGU_UINT32 alpha_index = 0;
- CGU_Vec4ui epTemp[2];
- CGU_Vec2ui indexPrec;
- if ((0 == best_mode) || (1 == best_mode))
- {
- indexPrec = 1;
- }
- else if (6 == best_mode)
- {
- indexPrec = 0;
- }
- else if (4 == best_mode)
- {
- if (0 == best_index_selector)
- {
- indexPrec = CGU_Vec2ui(2, 1);
- }
- else
- {
- indexPrec = CGU_Vec2ui(1, 2);
- }
- }
- else
- {
- indexPrec = 2;
- }
- CGU_INT subset_index;
- if ((0 == best_mode) || (2 == best_mode))
- {
- subset_index = (bits2 >> (threadInBlock * 2)) & 0x03;
- }
- else if ((1 == best_mode) || (3 == best_mode) || (7 == best_mode))
- {
- subset_index = (bits >> threadInBlock) & 0x01;
- }
- else
- {
- subset_index = 0;
- }
- epTemp[0] = shared_temp[threadBase + subset_index].endPoint_low;
- epTemp[1] = shared_temp[threadBase + subset_index].endPoint_high;
- CGU_Vec4i span = cmp_castimp(epTemp[1] - epTemp[0]);
- if (best_mode < 4)
- {
- span.w = 0;
- }
- if ((4 == best_mode) || (5 == best_mode))
- {
- CGU_Vec2i span_norm_sqr;
- span_norm_sqr.x = dot( span.rgb, span.rgb );
- span_norm_sqr.y = span.a * span.a;
- diff3 = shared_temp[threadBase + threadInBlock].pixel.rgb - epTemp[0].rgb;
- CGU_INT dotProduct = dot( span.rgb, diff3 );
- color_index = ( span_norm_sqr.x <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr.x ) ? aStep[indexPrec.x][ CGU_UINT32( dotProduct * 63.49999 / span_norm_sqr.x ) ] : aStep[indexPrec.x][63] );
- CGU_UINT32 diffa = shared_temp[threadBase + threadInBlock].pixel.a - epTemp[0].a;
- dotProduct = dot( span.a, diffa );
- alpha_index = ( span_norm_sqr.y <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr.y ) ? aStep[indexPrec.y][ CGU_UINT32( dotProduct * 63.49999 / span_norm_sqr.y ) ] : aStep[indexPrec.y][63] );
- if (best_index_selector)
- {
- swap(color_index, alpha_index);
- }
- }
- else
- {
- CGU_INT span_norm_sqr = dot( span, span );
- diff4 = shared_temp[threadBase + threadInBlock].pixel - epTemp[0] ;
- CGU_INT dotProduct = dot( span, diff4);
- color_index = ( span_norm_sqr <= 0 || dotProduct <= 0 ) ? 0
- : ( ( dotProduct < span_norm_sqr ) ? aStep[indexPrec.x][ CGU_UINT32( dotProduct * 63.49999 / span_norm_sqr ) ] : aStep[indexPrec.x][63] );
- }
- shared_temp[GI].error = color_index;
- shared_temp[GI].mode = alpha_index;
- }
- GroupSync();
- if (0 == threadInBlock)
- {
- CGU_Vec4ui blockRed = {0x001fffc0, 0xfffe0000, 0x00000001, 0x00000000};
- CGU_Vec4ui blockBlue = {0x00000040, 0xfffffff8, 0x00000001, 0x00000000};
- CGU_Vec4ui block = {0, 0, 0, 0};
- switch (best_mode)
- {
- case 0:
- block_package0(block, best_partition, threadBase);
- //block = blockRed;
- break;
- case 1:
- block_package1(block, best_partition, threadBase);
- //block = blockRed;
- break;
- case 2:
- block_package2(block, best_partition, threadBase);
- //block = blockRed;
- break;
- case 3:
- block_package3(block, best_partition, threadBase);
- //block = blockRed;
- break;
- case 4:
- block_package4(block, best_rotation, best_index_selector, threadBase);
- //block = blockRed;
- break;
- case 5:
- block_package5(block, best_rotation, threadBase);
- //block = blockRed;
- break;
- case 6:
- if (use_cmp) {
- block = g_InBuff[blockID].data2;
- //block = blockBlue;
- }
- else {
- block_package6( block, threadBase );
- //block = blockRed;
- }
- break;
- case 7:
- block_package7(block, best_partition, threadBase);
- //block = blockRed;
- break;
- default: // error!
- block = blockRed;
- break;
- }
- g_OutBuff[blockID] = block;
- }
- }
- //=================================================
- // This is a prototype API interface to run on CPU
- // move to GPU when completed
- //=================================================
- CMP_STATIC CGU_Vec4ui CompressBlockBC7_CMPMSC(CMP_IN CGU_Vec4f image_src[16], CMP_IN CGU_FLOAT fquality)
- {
- CMP_UNUSED(fquality);
- CGU_Vec4ui cmp = {0, 0, 0, 0};
- #ifndef ASPM_HLSL
- #ifdef SIMULATE_GPU
- HLSLHost(image_src);
- cmp = g_OutBuff[0];
- #else
- CGU_Vec4ui image_srcui[16];
- // Transfer local pixel data over to shared global
- for (CGU_INT ii = 0; ii < 16; ii++)
- {
- image_srcui[ii].x = image_src[ii].x;
- image_srcui[ii].y = image_src[ii].y;
- image_srcui[ii].z = image_src[ii].z;
- image_srcui[ii].w = image_src[ii].w;
- }
- #if defined (ENABLE_CMP_MODE6)
- CGU_Vec4ui epo_code_out[2] = {{0, 0, 0, 0}, {0, 0, 0, 0}};
- CGU_UINT32 best_index_out[16];
- CGU_FLOAT besterr;
- CGU_FLOAT err;
- // Fast Encode of block
- besterr = cmp_GetIndexedEndPoints(epo_code_out,
- best_index_out,
- image_srcui,
- 15, // numEntries 0..15 (Note this function is changed from using 16)
- 0xffffffff);
- CGU_UINT32 index_packed_out[2] = {0, 0};
- cmp_pack4bitindex32(index_packed_out, best_index_out);
- #ifdef ENABLE_CMP_REFINE_MODE6_API
- // Refined for better quailty
- err = cmp_mode6_optimize_IndexAndEndPoints(epo_code_out,
- best_index_out,
- image_srcui, // using shared_temp[].pixel with 0 thread offset
- 16, // numEntries
- g_modesettings[6].clusters, // 16,
- g_modesettings[6].bits, // 58,
- g_modesettings[6].channels3or4, // 4,
- 0.1f);
- cmp_pack4bitindex32(index_packed_out, best_index_out);
- #endif
- // encode results
- CGU_UINT32 cmp_out6[4] = {0, 0, 0, 0};
- cmp_encode_mode6(cmp_out6, epo_code_out, index_packed_out);
- cmp.x = cmp_out6[0];
- cmp.y = cmp_out6[1];
- cmp.z = cmp_out6[2];
- cmp.w = cmp_out6[3];
- #endif
- #if defined (ENABLE_CMP_MODE4) || defined(ENABLE_CMP_MODE5)
- {
- CGU_UINT32 cmp_out[4] = {0, 0, 0, 0};
- Compress_mode45(cmp_out, 4, image_srcui);
- cmp.x = cmp_out[0];
- cmp.y = cmp_out[1];
- cmp.z = cmp_out[2];
- cmp.w = cmp_out[3];
- }
- #endif
- #if defined(ENABLE_CMP_MODE1)
- {
- CGU_UINT32 cmp_out1[5] = {0, 0, 0, 0, 0};
- cmp_process_mode(cmp_out1, image_srcui, 1);
- cmp.x = cmp_out1[0];
- cmp.y = cmp_out1[1];
- cmp.z = cmp_out1[2];
- cmp.w = cmp_out1[3];
- }
- #endif
- #endif // SIMULATE_GPU
- #endif // Not HLSL
- return cmp;
- }
|