| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433 |
- //=====================================================================
- // Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
- //
- // Permission is hereby granted, free of charge, to any person obtaining a copy
- // of this software and associated documentation files(the "Software"), to deal
- // in the Software without restriction, including without limitation the rights to
- // use, copy, modify, merge, publish, distribute, sublicense, and / or sell
- // copies of the Software, and to permit persons to whom the Software is
- // furnished to do so, subject to the following conditions :
- //
- // The above copyright notice and this permission notice shall be included in
- // all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- // THE SOFTWARE.
- //
- //=====================================================================
- #include "bc6_encode_kernel.h"
- #ifndef ASPM_OPENCL
- //#define USE_NEW_SINGLE_HEADER_INTERFACES
- #ifdef USE_NEW_SINGLE_HEADER_INTERFACES
- //#define USE_CMP // Testung Betsy GPU Code on CPU
- //#define USE_BETSY // Testung Betsy GPU Code on CPU
- //#define USE_HPC // EnableCPU Definitions
- //#define USE_UNITY
- #endif
- #endif
- #include "bc6_common_encoder.h"
- #ifdef ASPM_GPU
- void memset(CGU_UINT8* srcdata, CGU_UINT8 value, CGU_INT size)
- {
- for (CGU_INT i = 0; i < size; i++)
- *srcdata++ = value;
- }
- void memcpy(CGU_UINT8* dstdata, CGU_UINT8* srcdata, CGU_INT size)
- {
- for (CGU_INT i = 0; i < size; i++)
- {
- *dstdata = *srcdata;
- srcdata++;
- dstdata++;
- }
- }
- void swap(CGU_INT A, CGU_INT B)
- {
- CGU_INT hold = A;
- A = B;
- B = hold;
- }
- #define abs fabs
- #define floorf floor
- #define sqrtf sqrt
- #define logf log
- #define ceilf ceil
- #endif
- __constant CGU_UINT8 BC6_PARTITIONS[MAX_BC6H_PARTITIONS][MAX_SUBSET_SIZE] = {
- {// 0
- 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
- {// 1
- 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1},
- {// 2
- 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1},
- {// 3
- 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1},
- {// 4
- 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1},
- {// 5
- 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
- {// 6
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1},
- {// 7
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 1,
- 1,
- 1},
- {// 8
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 1,
- 1},
- {// 9
- 0,
- 0,
- 1,
- 1,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1},
- {// 10
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1},
- {// 11
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 1,
- 1,
- 1},
- {// 12
- 0,
- 0,
- 0,
- 1,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1},
- {// 13
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1},
- {// 14
- 0,
- 0,
- 0,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1},
- {// 15
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 1,
- 1,
- 1},
- {// 16
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 1,
- 1,
- 1,
- 0,
- 1,
- 1,
- 1,
- 1},
- {// 17
- 0,
- 1,
- 1,
- 1,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0},
- {// 18
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 1,
- 1,
- 1,
- 0},
- {// 19
- 0,
- 1,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 0},
- {// 20
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0},
- {// 21
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 1,
- 0},
- {// 22
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0},
- {// 23
- 0,
- 1,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 0,
- 1},
- {// 24
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 0},
- {// 25
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0},
- {// 26
- 0,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0},
- {// 27
- 0,
- 0,
- 1,
- 1,
- 0,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 1,
- 1,
- 0,
- 0},
- {// 28
- 0,
- 0,
- 0,
- 1,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 0,
- 1,
- 0,
- 0,
- 0},
- {// 29
- 0,
- 0,
- 0,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 0,
- 0,
- 0,
- 0},
- {// 30
- 0,
- 1,
- 1,
- 1,
- 0,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 0,
- 1,
- 1,
- 1,
- 0},
- {// 31
- 0,
- 0,
- 1,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 1,
- 0,
- 0},
- };
- CGU_DWORD get_partition_subset(CGU_INT subset, CGU_INT partI, CGU_INT index)
- {
- if (subset)
- return BC6_PARTITIONS[partI][index];
- else
- return 0;
- }
- void Partition(CGU_INT shape,
- CGU_FLOAT in[][MAX_DIMENSION_BIG],
- CGU_FLOAT subsets[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], //[3][16][4]
- CGU_INT count[MAX_SUBSETS],
- CGU_INT8 ShapeTableToUse,
- CGU_INT dimension)
- {
- int i, j;
- int insubset = -1, inpart = 0;
- // Dont use memset: this is better for now
- for (i = 0; i < MAX_SUBSETS; i++)
- count[i] = 0;
- switch (ShapeTableToUse)
- {
- case 0:
- case 1:
- insubset = 0;
- inpart = 0;
- break;
- case 2:
- insubset = 1;
- inpart = shape;
- break;
- default:
- break;
- }
- // Nothing to do!!: Must indicate an error to user
- if (insubset == -1)
- return; // Nothing to do!!
- for (i = 0; i < MAX_SUBSET_SIZE; i++)
- {
- int subset = get_partition_subset(insubset, inpart, i);
- for (j = 0; j < dimension; j++)
- {
- subsets[subset][count[subset]][j] = in[i][j];
- }
- if (dimension < MAX_DIMENSION_BIG)
- {
- subsets[subset][count[subset]][j] = 0.0;
- }
- count[subset]++;
- }
- }
- void GetEndPoints(CGU_FLOAT EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_FLOAT outB[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],
- CGU_INT max_subsets,
- int entryCount[MAX_SUBSETS])
- {
- // Should have some sort of error notification!
- if (max_subsets > MAX_SUBSETS)
- return;
- // Save Min and Max OutB points as EndPoints
- for (int subset = 0; subset < max_subsets; subset++)
- {
- // We now have points on direction vector(s)
- // find the min and max points
- CGU_FLOAT min = CMP_HALF_MAX;
- CGU_FLOAT max = 0;
- CGU_FLOAT val;
- int mini = 0;
- int maxi = 0;
- for (int i = 0; i < entryCount[subset]; i++)
- {
- val = outB[subset][i][0] + outB[subset][i][1] + outB[subset][i][2];
- if (val < min)
- {
- min = val;
- mini = i;
- }
- if (val > max)
- {
- max = val;
- maxi = i;
- }
- }
- // Is round best for this !
- for (int c = 0; c < MAX_DIMENSION_BIG; c++)
- {
- EndPoints[subset][0][c] = outB[subset][mini][c];
- }
- for (int c = 0; c < MAX_DIMENSION_BIG; c++)
- {
- EndPoints[subset][1][c] = outB[subset][maxi][c];
- }
- }
- }
- void covariance_d(CGU_FLOAT data[][MAX_DIMENSION_BIG], CGU_INT numEntries, CGU_FLOAT cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], CGU_INT dimension)
- {
- #ifdef USE_DBGTRACE
- DbgTrace(());
- #endif
- int i, j, k;
- for (i = 0; i < dimension; i++)
- for (j = 0; j <= i; j++)
- {
- cov[i][j] = 0;
- for (k = 0; k < numEntries; k++)
- cov[i][j] += data[k][i] * data[k][j];
- }
- for (i = 0; i < dimension; i++)
- for (j = i + 1; j < dimension; j++)
- cov[i][j] = cov[j][i];
- }
- void centerInPlace_d(CGU_FLOAT data[][MAX_DIMENSION_BIG], int numEntries, CGU_FLOAT mean[MAX_DIMENSION_BIG], CGU_INT dimension)
- {
- #ifdef USE_DBGTRACE
- DbgTrace(());
- #endif
- int i, k;
- for (i = 0; i < dimension; i++)
- {
- mean[i] = 0;
- for (k = 0; k < numEntries; k++)
- mean[i] += data[k][i];
- }
- if (!numEntries)
- return;
- for (i = 0; i < dimension; i++)
- {
- mean[i] /= numEntries;
- for (k = 0; k < numEntries; k++)
- data[k][i] -= mean[i];
- }
- }
- void eigenVector_d(CGU_FLOAT cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], CGU_FLOAT vector[MAX_DIMENSION_BIG], CGU_INT dimension)
- {
- #ifdef USE_DBGTRACE
- DbgTrace(());
- #endif
- // calculate an eigenvecto corresponding to a biggest eigenvalue
- // will work for non-zero non-negative matricies only
- #define EV_ITERATION_NUMBER 20
- #define EV_SLACK 2 /* additive for exp base 2)*/
- CGU_INT i, j, k, l, m, n, p, q;
- CGU_FLOAT c[2][MAX_DIMENSION_BIG][MAX_DIMENSION_BIG];
- CGU_FLOAT maxDiag;
- for (i = 0; i < dimension; i++)
- for (j = 0; j < dimension; j++)
- c[0][i][j] = cov[i][j];
- p = (int)floorf(log((BC6_FLT_MAX_EXP - EV_SLACK) / ceilf(logf((CGU_FLOAT)dimension) / logf(2.0f))) / logf(2.0f));
- //assert(p>0);
- p = p > 0 ? p : 1;
- q = (EV_ITERATION_NUMBER + p - 1) / p;
- l = 0;
- for (n = 0; n < q; n++)
- {
- maxDiag = 0;
- for (i = 0; i < dimension; i++)
- maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag;
- if (maxDiag <= 0)
- {
- return;
- }
- //assert(maxDiag >0);
- for (i = 0; i < dimension; i++)
- for (j = 0; j < dimension; j++)
- c[l][i][j] /= maxDiag;
- for (m = 0; m < p; m++)
- {
- for (i = 0; i < dimension; i++)
- for (j = 0; j < dimension; j++)
- {
- CGU_FLOAT temp = 0;
- for (k = 0; k < dimension; k++)
- {
- // Notes:
- // This is the most consuming portion of the code and needs optimizing for perfromance
- temp += c[l][i][k] * c[l][k][j];
- }
- c[1 - l][i][j] = temp;
- }
- l = 1 - l;
- }
- }
- maxDiag = 0;
- k = 0;
- for (i = 0; i < dimension; i++)
- {
- k = c[l][i][i] > maxDiag ? i : k;
- maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag;
- }
- CGU_FLOAT t;
- t = 0;
- for (i = 0; i < dimension; i++)
- {
- t += c[l][k][i] * c[l][k][i];
- vector[i] = c[l][k][i];
- }
- // normalization is really optional
- t = sqrtf(t);
- //assert(t>0);
- if (t <= 0)
- {
- return;
- }
- for (i = 0; i < dimension; i++)
- vector[i] /= t;
- }
- void project_d(CGU_FLOAT data[][MAX_DIMENSION_BIG],
- CGU_INT numEntries,
- CGU_FLOAT vector[MAX_DIMENSION_BIG],
- CGU_FLOAT projection[MAX_ENTRIES],
- CGU_INT dimension)
- {
- #ifdef USE_DBGTRACE
- DbgTrace(());
- #endif
- // assume that vector is normalized already
- int i, k;
- for (k = 0; k < numEntries; k++)
- {
- projection[k] = 0;
- for (i = 0; i < dimension; i++)
- {
- projection[k] += data[k][i] * vector[i];
- }
- }
- }
- typedef struct
- {
- CGU_FLOAT d;
- int i;
- } a;
- inline CGU_INT a_compare(const void* arg1, const void* arg2)
- {
- if (((a*)arg1)->d - ((a*)arg2)->d > 0)
- return 1;
- if (((a*)arg1)->d - ((a*)arg2)->d < 0)
- return -1;
- return 0;
- };
- void sortProjection(CGU_FLOAT projection[MAX_ENTRIES], CGU_INT order[MAX_ENTRIES], CGU_INT numEntries)
- {
- int i;
- a what[MAX_ENTRIES + MAX_PARTITIONS_TABLE];
- for (i = 0; i < numEntries; i++)
- what[what[i].i = i].d = projection[i];
- #ifdef USE_QSORT
- qsort((void*)&what, numEntries, sizeof(a), a_compare);
- #else
- {
- int j;
- int tmp;
- CGU_FLOAT tmp_d;
- for (i = 1; i < numEntries; i++)
- {
- for (j = i; j > 0; j--)
- {
- if (what[j - 1].d > what[j].d)
- {
- tmp = what[j].i;
- tmp_d = what[j].d;
- what[j].i = what[j - 1].i;
- what[j].d = what[j - 1].d;
- what[j - 1].i = tmp;
- what[j - 1].d = tmp_d;
- }
- }
- }
- }
- #endif
- for (i = 0; i < numEntries; i++)
- order[i] = what[i].i;
- };
- CGU_FLOAT totalError_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_FLOAT data2[MAX_ENTRIES][MAX_DIMENSION_BIG], CGU_INT numEntries, CGU_INT dimension)
- {
- int i, j;
- CGU_FLOAT t = 0;
- for (i = 0; i < numEntries; i++)
- for (j = 0; j < dimension; j++)
- t += (data[i][j] - data2[i][j]) * (data[i][j] - data2[i][j]);
- return t;
- };
- // input:
- //
- // v_ points, might be uncentered
- // k - number of points in the ramp
- // n - number of points in v_
- //
- // output:
- // index, uncentered, in the range 0..k-1
- //
- void quant_AnD_Shell(CGU_FLOAT* v_, CGU_INT k, CGU_INT n, CGU_INT idx[MAX_ENTRIES])
- {
- #define MAX_BLOCK MAX_ENTRIES
- CGU_INT i, j;
- CGU_FLOAT v[MAX_BLOCK];
- CGU_FLOAT z[MAX_BLOCK];
- a d[MAX_BLOCK];
- CGU_FLOAT l;
- CGU_FLOAT mm;
- CGU_FLOAT r = 0;
- CGU_INT mi;
- CGU_FLOAT m, M, s, dm = 0.;
- m = M = v_[0];
- for (i = 1; i < n; i++)
- {
- m = m < v_[i] ? m : v_[i];
- M = M > v_[i] ? M : v_[i];
- }
- if (M == m)
- {
- for (i = 0; i < n; i++)
- idx[i] = 0;
- return;
- }
- //assert(M - m >0);
- s = (k - 1) / (M - m);
- for (i = 0; i < n; i++)
- {
- v[i] = v_[i] * s;
- idx[i] = (int)(z[i] = (v[i] + 0.5f /* stabilizer*/ - m * s)); //floorf(v[i] + 0.5f /* stabilizer*/ - m *s));
- d[i].d = v[i] - z[i] - m * s;
- d[i].i = i;
- dm += d[i].d;
- r += d[i].d * d[i].d;
- }
- if (n * r - dm * dm >= (CGU_FLOAT)(n - 1) / 4 /*slack*/ / 2)
- {
- dm /= (CGU_FLOAT)n;
- for (i = 0; i < n; i++)
- d[i].d -= dm;
- //!!! Need an OpenCL version of qsort
- #ifdef USE_QSORT
- qsort((void*)&d, n, sizeof(a), a_compare);
- #else
- {
- CGU_INT tmp;
- CGU_FLOAT tmp_d;
- for (i = 1; i < n; i++)
- {
- for (j = i; j > 0; j--)
- {
- if (d[j - 1].d > d[j].d)
- {
- tmp = d[j].i;
- tmp_d = d[j].d;
- d[j].i = d[j - 1].i;
- d[j].d = d[j - 1].d;
- d[j - 1].i = tmp;
- d[j - 1].d = tmp_d;
- }
- }
- }
- }
- #endif
- // got into fundamental simplex
- // move coordinate system origin to its center
- for (i = 0; i < n; i++)
- d[i].d -= (2.0f * (CGU_FLOAT)i + 1.0f - (CGU_FLOAT)n) / 2.0f / (CGU_FLOAT)n;
- mm = l = 0.;
- j = -1;
- for (i = 0; i < n; i++)
- {
- l += d[i].d;
- if (l < mm)
- {
- mm = l;
- j = i;
- }
- }
- // position which should be in 0
- j = j + 1;
- j = j % n;
- for (i = j; i < n; i++)
- idx[d[i].i]++;
- }
- // get rid of an offset in idx
- mi = idx[0];
- for (i = 1; i < n; i++)
- mi = mi < idx[i] ? mi : idx[i];
- for (i = 0; i < n; i++)
- idx[i] -= mi;
- }
- CGU_FLOAT optQuantAnD_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG],
- CGU_INT numEntries,
- CGU_INT numClusters,
- CGU_INT index[MAX_ENTRIES],
- CGU_FLOAT out[MAX_ENTRIES][MAX_DIMENSION_BIG],
- CGU_FLOAT direction[MAX_DIMENSION_BIG],
- CGU_FLOAT* step,
- CGU_INT dimension,
- CGU_FLOAT quality)
- {
- CGU_INT index_[MAX_ENTRIES];
- CGU_INT maxTry = (int)(MAX_TRY * quality);
- CGU_INT try_two = 50;
- CGU_INT i, j, k;
- CGU_FLOAT t, s;
- CGU_FLOAT centered[MAX_ENTRIES][MAX_DIMENSION_BIG];
- CGU_FLOAT mean[MAX_DIMENSION_BIG];
- CGU_FLOAT cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG];
- CGU_FLOAT projected[MAX_ENTRIES];
- CGU_INT order_[MAX_ENTRIES];
- for (i = 0; i < numEntries; i++)
- for (j = 0; j < dimension; j++)
- centered[i][j] = data[i][j];
- centerInPlace_d(centered, numEntries, mean, dimension);
- covariance_d(centered, numEntries, cov, dimension);
- // check if they all are the same
- t = 0;
- for (j = 0; j < dimension; j++)
- t += cov[j][j];
- if (numEntries == 0)
- {
- for (i = 0; i < numEntries; i++)
- {
- index[i] = 0;
- for (j = 0; j < dimension; j++)
- out[i][j] = mean[j];
- }
- return 0.0f;
- }
- eigenVector_d(cov, direction, dimension);
- project_d(centered, numEntries, direction, projected, dimension);
- for (i = 0; i < maxTry; i++)
- {
- CGU_INT done = 0;
- if (i)
- {
- do
- {
- CGU_FLOAT q;
- q = s = t = 0;
- for (k = 0; k < numEntries; k++)
- {
- s += index[k];
- t += index[k] * index[k];
- }
- for (j = 0; j < dimension; j++)
- {
- direction[j] = 0;
- for (k = 0; k < numEntries; k++)
- direction[j] += centered[k][j] * index[k];
- q += direction[j] * direction[j];
- }
- s /= (CGU_FLOAT)numEntries;
- t = t - s * s * (CGU_FLOAT)numEntries;
- //assert(t != 0);
- t = (t == 0.0f ? 0.0f : 1.0f / t);
- // We need to requantize
- q = sqrtf(q);
- t *= q;
- if (q != 0)
- for (j = 0; j < dimension; j++)
- direction[j] /= q;
- // direction normalized
- project_d(centered, numEntries, direction, projected, dimension);
- sortProjection(projected, order_, numEntries);
- CGU_INT index__[MAX_ENTRIES];
- // it's projected and centered; cluster centers are (index[i]-s)*t (*dir)
- k = 0;
- for (j = 0; j < numEntries; j++)
- {
- while (projected[order_[j]] > (k + 0.5 - s) * t && k < numClusters - 1)
- k++;
- index__[order_[j]] = k;
- }
- done = 1;
- for (j = 0; j < numEntries; j++)
- {
- done = (done && (index__[j] == index[j]));
- index[j] = index__[j];
- }
- } while (!done && try_two--);
- if (i == 1)
- for (j = 0; j < numEntries; j++)
- index_[j] = index[j];
- else
- {
- done = 1;
- for (j = 0; j < numEntries; j++)
- {
- done = (done && (index_[j] == index[j]));
- index_[j] = index_[j];
- }
- if (done)
- break;
- }
- }
- quant_AnD_Shell(projected, numClusters, numEntries, index);
- }
- s = t = 0;
- CGU_FLOAT q = 0;
- for (k = 0; k < numEntries; k++)
- {
- s += index[k];
- t += index[k] * index[k];
- }
- for (j = 0; j < dimension; j++)
- {
- direction[j] = 0;
- for (k = 0; k < numEntries; k++)
- direction[j] += centered[k][j] * index[k];
- q += direction[j] * direction[j];
- }
- s /= (CGU_FLOAT)numEntries;
- t = t - s * s * (CGU_FLOAT)numEntries;
- //assert(t != 0);
- t = (t == 0.0 ? 0.0f : 1.0f / t);
- for (i = 0; i < numEntries; i++)
- for (j = 0; j < dimension; j++)
- out[i][j] = mean[j] + direction[j] * t * (index[i] - s);
- // normalize direction for output
- q = sqrtf(q);
- *step = t * q;
- for (j = 0; j < dimension; j++)
- direction[j] /= q;
- return totalError_d(data, out, numEntries, dimension);
- }
- void clampF16Max(CGU_FLOAT EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_BOOL isSigned)
- {
- for (CGU_INT region = 0; region < 2; region++)
- for (CGU_INT ab = 0; ab < 2; ab++)
- for (CGU_INT rgb = 0; rgb < 3; rgb++)
- {
- if (isSigned)
- {
- if (EndPoints[region][ab][rgb] < -FLT16_MAX)
- EndPoints[region][ab][rgb] = -FLT16_MAX;
- else if (EndPoints[region][ab][rgb] > FLT16_MAX)
- EndPoints[region][ab][rgb] = FLT16_MAX;
- }
- else
- {
- if (EndPoints[region][ab][rgb] < 0.0)
- EndPoints[region][ab][rgb] = 0.0;
- else if (EndPoints[region][ab][rgb] > FLT16_MAX)
- EndPoints[region][ab][rgb] = FLT16_MAX;
- }
- // Zero region
- // if ((EndPoints[region][ab][rgb] > -0.01) && ((EndPoints[region][ab][rgb] < 0.01))) EndPoints[region][ab][rgb] = 0.0;
- }
- }
- //=====================================================================================================================
- #define LOG_CL_BASE 2
- #define BIT_BASE 5
- #define LOG_CL_RANGE 5
- #define BIT_RANGE 9
- #define MAX_CLUSTERS_BIG 16
- #ifdef USE_BC6RAMPS
- int spidx(int in_data, int in_clogs, int in_bits, int in_p2, int in_o1, int in_o2, int in_i)
- {
- // use BC7 sp_idx
- return 0;
- }
- float sperr(int in_data, int clogs, int bits, int p2, int o1, int o2)
- {
- // use BC7 sp_err
- return 0, 0f;
- }
- #endif
- __constant CGU_FLOAT rampLerpWeightsBC6[5][16] = {
- {0.0}, // 0 bit index
- {0.0, 1.0}, // 1 bit index
- {0.0, 21.0 / 64.0, 43.0 / 64.0, 1.0}, // 2 bit index
- {0.0, 9.0 / 64.0, 18.0 / 64.0, 27.0 / 64.0, 37.0 / 64.0, 46.0 / 64.0, 55.0 / 64.0, 1.0}, // 3 bit index
- {0.0,
- 4.0 / 64.0,
- 9.0 / 64.0,
- 13.0 / 64.0,
- 17.0 / 64.0,
- 21.0 / 64.0,
- 26.0 / 64.0,
- 30.0 / 64.0,
- 34.0 / 64.0,
- 38.0 / 64.0,
- 43.0 / 64.0,
- 47.0 / 64.0,
- 51.0 / 64.0,
- 55.0 / 64.0,
- 60.0 / 64.0,
- 1.0} // 4 bit index
- };
- CGU_FLOAT rampf(CGU_INT clogs, CGU_FLOAT p1, CGU_FLOAT p2, CGU_INT indexPos)
- {
- // (clogs+ LOG_CL_BASE) starts from 2 to 4
- return (CGU_FLOAT)p1 + rampLerpWeightsBC6[clogs + LOG_CL_BASE][indexPos] * (p2 - p1);
- }
- CGU_INT all_same_d(CGU_FLOAT d[][MAX_DIMENSION_BIG], CGU_INT n, CGU_INT dimension)
- {
- CGU_INT i, j;
- CGU_INT same = 1;
- for (i = 1; i < n; i++)
- for (j = 0; j < dimension; j++)
- same = same && (d[0][j] == d[i][j]);
- return (same);
- }
- // return the max index from a set of indexes
- CGU_INT max_index(CGU_INT a[], CGU_INT n)
- {
- CGU_INT i, m = a[0];
- for (i = 0; i < n; i++)
- m = m > a[i] ? m : a[i];
- return (m);
- }
- CGU_INT cluster_mean_d_d(CGU_FLOAT d[MAX_ENTRIES][MAX_DIMENSION_BIG],
- CGU_FLOAT mean[MAX_ENTRIES][MAX_DIMENSION_BIG],
- CGU_INT index[],
- CGU_INT i_comp[],
- CGU_INT i_cnt[],
- CGU_INT n,
- CGU_INT dimension)
- {
- // unused index values are underfined
- CGU_INT i, j, k;
- //assert(n!=0);
- for (i = 0; i < n; i++)
- for (j = 0; j < dimension; j++)
- {
- // assert(index[i]<MAX_CLUSTERS_BIG);
- mean[index[i]][j] = 0;
- i_cnt[index[i]] = 0;
- }
- k = 0;
- for (i = 0; i < n; i++)
- {
- for (j = 0; j < dimension; j++)
- mean[index[i]][j] += d[i][j];
- if (i_cnt[index[i]] == 0)
- i_comp[k++] = index[i];
- i_cnt[index[i]]++;
- }
- for (i = 0; i < k; i++)
- for (j = 0; j < dimension; j++)
- mean[i_comp[i]][j] /= (CGU_FLOAT)i_cnt[i_comp[i]];
- return k;
- }
- void mean_d_d(CGU_FLOAT d[][MAX_DIMENSION_BIG], CGU_FLOAT mean[MAX_DIMENSION_BIG], CGU_INT n, CGU_INT dimension)
- {
- CGU_INT i, j;
- for (j = 0; j < dimension; j++)
- mean[j] = 0;
- for (i = 0; i < n; i++)
- for (j = 0; j < dimension; j++)
- mean[j] += d[i][j];
- for (j = 0; j < dimension; j++)
- mean[j] /= (CGU_FLOAT)n;
- }
- void index_collapse_kernel(CGU_INT index[], CGU_INT numEntries)
- {
- CGU_INT k;
- CGU_INT d, D;
- CGU_INT mi;
- CGU_INT Mi;
- if (numEntries == 0)
- return;
- mi = Mi = index[0];
- for (k = 1; k < numEntries; k++)
- {
- mi = mi < index[k] ? mi : index[k];
- Mi = Mi > index[k] ? Mi : index[k];
- }
- D = 1;
- for (d = 2; d <= Mi - mi; d++)
- {
- for (k = 0; k < numEntries; k++)
- if ((index[k] - mi) % d != 0)
- break;
- if (k >= numEntries)
- D = d;
- }
- for (k = 0; k < numEntries; k++)
- index[k] = (index[k] - mi) / D;
- }
- CGU_INT max_int(CGU_INT a[], CGU_INT n)
- {
- CGU_INT i, m = a[0];
- for (i = 0; i < n; i++)
- m = m > a[i] ? m : a[i];
- return (m);
- }
- __constant CGU_INT npv_nd[2][2 * MAX_DIMENSION_BIG] = {
- {1, 2, 4, 8, 16, 32, 0, 0}, //dimension = 3
- {1, 2, 4, 0, 0, 0, 0, 0} //dimension = 4
- };
- __constant short par_vectors_nd[2][8][128][2][MAX_DIMENSION_BIG] = {
- {// Dimension = 3
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- // 3*n+1 BCC 3*n+1 Cartesian 3*n //same parity
- {// SAME_PAR
- {{0, 0, 0}, {0, 0, 0}},
- {{1, 1, 1}, {1, 1, 1}}},
- // 3*n+2 BCC 3*n+1 BCC 3*n+1
- {// BCC
- {{0, 0, 0}, {0, 0, 0}},
- {{0, 0, 0}, {1, 1, 1}},
- {{1, 1, 1}, {0, 0, 0}},
- {{1, 1, 1}, {1, 1, 1}}},
- // 3*n+3 FCC ??? // ??????
- // BCC with FCC same or inverted, symmetric
- {
- // BCC_SAME_FCC
- {{0, 0, 0}, {0, 0, 0}},
- {{1, 1, 0}, {1, 1, 0}},
- {{1, 0, 1}, {1, 0, 1}},
- {{0, 1, 1}, {0, 1, 1}},
- {{0, 0, 0}, {1, 1, 1}},
- {{1, 1, 1}, {0, 0, 0}},
- {{0, 1, 0}, {0, 1, 0}}, // ??
- {{1, 1, 1}, {1, 1, 1}},
- },
- // 3*n+4 FCC 3*n+2 FCC 3*n+2
- {
- {{0, 0, 0}, {0, 0, 0}},
- {{1, 1, 0}, {0, 0, 0}},
- {{1, 0, 1}, {0, 0, 0}},
- {{0, 1, 1}, {0, 0, 0}},
- {{0, 0, 0}, {1, 1, 0}},
- {{1, 1, 0}, {1, 1, 0}},
- {{1, 0, 1}, {1, 1, 0}},
- {{0, 1, 1}, {1, 1, 0}},
- {{0, 0, 0}, {1, 0, 1}},
- {{1, 1, 0}, {1, 0, 1}},
- {{1, 0, 1}, {1, 0, 1}},
- {{0, 1, 1}, {1, 0, 1}},
- {{0, 0, 0}, {0, 1, 1}},
- {{1, 1, 0}, {0, 1, 1}},
- {{1, 0, 1}, {0, 1, 1}},
- {{0, 1, 1}, {0, 1, 1}}},
- // 3*n+5 Cartesian 3*n+3 FCC 3*n+2 //D^*[6]
- {
- {{0, 0, 0}, {0, 0, 0}}, {{1, 1, 0}, {0, 0, 0}}, {{1, 0, 1}, {0, 0, 0}}, {{0, 1, 1}, {0, 0, 0}},
- {{0, 0, 0}, {1, 1, 0}}, {{1, 1, 0}, {1, 1, 0}}, {{1, 0, 1}, {1, 1, 0}}, {{0, 1, 1}, {1, 1, 0}},
- {{0, 0, 0}, {1, 0, 1}}, {{1, 1, 0}, {1, 0, 1}}, {{1, 0, 1}, {1, 0, 1}}, {{0, 1, 1}, {1, 0, 1}},
- {{0, 0, 0}, {0, 1, 1}}, {{1, 1, 0}, {0, 1, 1}}, {{1, 0, 1}, {0, 1, 1}}, {{0, 1, 1}, {0, 1, 1}},
- {{1, 0, 0}, {1, 1, 1}}, {{0, 1, 0}, {1, 1, 1}}, {{0, 0, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}},
- {{1, 0, 0}, {0, 0, 1}}, {{0, 1, 0}, {0, 0, 1}}, {{0, 0, 1}, {0, 0, 1}}, {{1, 1, 1}, {0, 0, 1}},
- {{1, 0, 0}, {1, 0, 0}}, {{0, 1, 0}, {1, 0, 0}}, {{0, 0, 1}, {1, 0, 0}}, {{1, 1, 1}, {1, 0, 0}},
- {{1, 0, 0}, {0, 1, 0}}, {{0, 1, 0}, {0, 1, 0}}, {{0, 0, 1}, {0, 1, 0}}, {{1, 1, 1}, {0, 1, 0}}}}, // Dimension = 3
- {
- // Dimension = 4
- {{{0, 0, 0, 0}, {0, 0, 0, 0}}, {{0, 0, 0, 0}, {0, 0, 0, 0}}},
- // 3*n+1 BCC 3*n+1 Cartesian 3*n //same parity
- {// SAME_PAR
- {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{1, 1, 1, 1}, {1, 1, 1, 1}}},
- // 3*n+2 BCC 3*n+1 BCC 3*n+1
- {// BCC
- {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {1, 1, 1, 1}},
- {{1, 1, 1, 1}, {0, 0, 0, 0}},
- {{1, 1, 1, 1}, {1, 1, 1, 1}}},
- // 3 PBIT
- {{{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 1, 1, 1}},
- {{0, 1, 1, 1}, {0, 0, 0, 0}},
- {{0, 1, 1, 1}, {0, 1, 1, 1}},
- {{1, 0, 0, 0}, {1, 0, 0, 0}},
- {{1, 0, 0, 0}, {1, 1, 1, 1}},
- {{1, 1, 1, 1}, {1, 0, 0, 0}},
- {{1, 1, 1, 1}, {1, 1, 1, 1}}},
- // 4 PBIT
- {
- {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 1, 1, 1}},
- {{0, 1, 1, 1}, {0, 0, 0, 0}},
- {{0, 1, 1, 1}, {0, 1, 1, 1}},
- {{1, 0, 0, 0}, {1, 0, 0, 0}},
- {{1, 0, 0, 0}, {1, 1, 1, 1}},
- {{1, 1, 1, 1}, {1, 0, 0, 0}},
- {{1, 1, 1, 1}, {1, 1, 1, 1}},
- {{0, 0, 0, 0}, {0, 0, 0, 0}},
- {{0, 0, 0, 0}, {0, 0, 1, 1}},
- {{0, 0, 1, 1}, {0, 0, 0, 0}},
- {{0, 1, 0, 1}, {0, 1, 0, 1}},
- {{1, 0, 0, 0}, {1, 0, 0, 0}},
- {{1, 0, 0, 0}, {1, 0, 1, 1}},
- {{1, 0, 1, 1}, {1, 0, 0, 0}},
- {{1, 1, 0, 1}, {1, 1, 0, 1}},
- },
- } // Dimension = 4
- };
- CGU_INT get_par_vector(CGU_INT dim1, CGU_INT dim2, CGU_INT dim3, CGU_INT dim4, CGU_INT dim5)
- {
- return par_vectors_nd[dim1][dim2][dim3][dim4][dim5];
- }
- CGU_FLOAT quant_single_point_d(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG],
- CGU_INT numEntries,
- CGU_INT index[MAX_ENTRIES],
- CGU_FLOAT out[MAX_ENTRIES][MAX_DIMENSION_BIG],
- CGU_INT epo_1[2][MAX_DIMENSION_BIG],
- CGU_INT Mi_, // last cluster
- CGU_INT type,
- CGU_INT dimension)
- {
- if (dimension < 3)
- return CMP_FLOAT_MAX;
- CGU_INT i, j;
- CGU_FLOAT err_0 = CMP_FLOAT_MAX;
- CGU_FLOAT err_1 = CMP_FLOAT_MAX;
- CGU_INT idx = 0;
- CGU_INT idx_1 = 0;
- CGU_INT epo_0[2][MAX_DIMENSION_BIG];
- CGU_INT use_par = (type != 0);
- CGU_INT clogs = 0;
- i = Mi_ + 1;
- while (i >>= 1)
- clogs++;
- // assert((1<<clogs)== Mi_+1);
- CGU_INT pn;
- for (pn = 0; pn < npv_nd[dimension - 3][type]; pn++)
- {
- //1
- CGU_INT dim1 = dimension - 3;
- CGU_INT dim2 = type;
- CGU_INT dim3 = pn;
- CGU_INT o1[2][MAX_DIMENSION_BIG]; // = { 0,2 };
- CGU_INT o2[2][MAX_DIMENSION_BIG]; // = { 0,2 };
- for (j = 0; j < dimension; j++)
- {
- //A
- o2[0][j] = o1[0][j] = 0;
- o2[1][j] = o1[1][j] = 2;
- if (use_par)
- {
- if (get_par_vector(dim1, dim2, dim3, 0, j))
- o1[0][j] = 1;
- else
- o1[1][j] = 1;
- if (get_par_vector(dim1, dim2, dim3, 1, j))
- o2[0][j] = 1;
- else
- o2[1][j] = 1;
- }
- } //A
- CGU_INT t1, t2;
- CGU_INT dr[MAX_DIMENSION_BIG];
- CGU_INT dr_0[MAX_DIMENSION_BIG];
- //CGU_FLOAT tr;
- for (i = 0; i < (1 << clogs); i++)
- {
- //E
- CGU_FLOAT t = 0;
- CGU_INT t1o[MAX_DIMENSION_BIG], t2o[MAX_DIMENSION_BIG];
- for (j = 0; j < dimension; j++)
- {
- // D
- CGU_FLOAT t_ = CMP_FLOAT_MAX;
- for (t1 = o1[0][j]; t1 < o1[1][j]; t1++)
- {
- // C
- for (t2 = o2[0][j]; t2 < o2[1][j]; t2++)
- // This is needed for non-integer mean points of "collapsed" sets
- {
- // B
- #ifdef USE_BC6RAMPS
- CGU_INT tf = (int)floorf(data[0][j]);
- CGU_INT tc = (int)ceilf(data[0][j]);
- // if they are not equal, the same representalbe point is used for
- // both of them, as all representable points are integers in the rage
- if (sperr(tf, CLT(clogs), BTT(bits[j]), t1, t2, i) > sperr(tc, CLT(clogs), BTT(bits[j]), t1, t2, i))
- dr[j] = tc;
- else if (sperr(tf, CLT(clogs), BTT(bits[j]), t1, t2, i) < sperr(tc, CLT(clogs), BTT(bits[j]), t1, t2, i))
- dr[j] = tf;
- else
- #endif
- dr[j] = (int)floorf(data[0][j] + 0.5f);
- #ifdef USE_BC6RAMPS
- tr = sperr(dr[j], CLT(clogs), BTT(bits[j]), t1, t2, i) +
- 2.0f * sqrtf(sperr(dr[j], CLT(clogs), BTT(bits[j]), t1, t2, i)) * fabsf((float)dr[j] - data[0][j]) +
- (dr[j] - data[0][j]) * (dr[j] - data[0][j]);
- if (tr < t_)
- {
- t_ = tr;
- #else
- t_ = 0;
- #endif
- t1o[j] = t1;
- t2o[j] = t2;
- dr_0[j] = dr[j];
- #ifdef USE_BC6RAMPS
- if ((dr_0[j] < 0) || (dr_0[j] > 255))
- {
- dr_0[j] = 0; // Error!
- }
- }
- #endif
- } // B
- } //C
- t += t_;
- } // D
- if (t < err_0)
- {
- idx = i;
- for (j = 0; j < dimension; j++)
- {
- #ifdef USE_BC6RAMPS
- CGU_INT p1 = CLT(clogs); // < 3
- CGU_INT p2 = BTT(bits[j]); // < 4
- CGU_INT in_data = dr_0[j]; // < SP_ERRIDX_MAX
- CGU_INT p4 = t1o[j]; // < 2
- CGU_INT p5 = t2o[j]; // < 2
- CGU_INT p6 = i; // < 16
- // New spidx
- epo_0[0][j] = spidx(in_data, p1, p2, p4, p5, p6, 0);
- epo_0[1][j] = spidx(in_data, p1, p2, p4, p5, p6, 1);
- if (epo_0[1][j] >= SP_ERRIDX_MAX)
- {
- epo_0[1][j] = 0; // Error!!
- }
- #else
- epo_0[0][j] = 0;
- epo_0[1][j] = 0;
- #endif
- }
- err_0 = t;
- }
- if (err_0 == 0)
- break;
- } // E
- if (err_0 < err_1)
- {
- idx_1 = idx;
- for (j = 0; j < dimension; j++)
- {
- epo_1[0][j] = epo_0[0][j];
- epo_1[1][j] = epo_0[1][j];
- }
- err_1 = err_0;
- }
- if (err_1 == 0)
- break;
- } //1
- for (i = 0; i < numEntries; i++)
- {
- index[i] = idx_1;
- for (j = 0; j < dimension; j++)
- {
- CGU_INT p1 = CLT(clogs); // < 3
- CGU_INT p3 = epo_1[0][j]; // < SP_ERRIDX_MAX
- CGU_INT p4 = epo_1[1][j]; // < SP_ERRIDX_MAX
- CGU_INT p5 = idx_1; // < 16
- #pragma warning(push)
- #pragma warning(disable : 4244)
- out[i][j] = (int)rampf(p1, p3, p4, p5);
- #pragma warning(pop)
- }
- }
- return err_1 * numEntries;
- }
- //========================================================================================================================
- CGU_FLOAT ep_shaker_HD(CGU_FLOAT data[MAX_ENTRIES][MAX_DIMENSION_BIG],
- CGU_INT numEntries,
- CGU_INT index_[MAX_ENTRIES],
- CGU_FLOAT out[MAX_ENTRIES][MAX_DIMENSION_BIG],
- CGU_INT epo_code_out[2][MAX_DIMENSION_BIG],
- CGU_INT Mi_, // last cluster
- CGU_INT bits[3], // including parity
- CGU_INT channels3or4)
- {
- CGU_INT i, j, k;
- CGU_INT use_par = 0;
- CGU_INT clogs = 0;
- i = Mi_ + 1;
- while (i >>= 1)
- clogs++;
- CGU_FLOAT mean[MAX_DIMENSION_BIG];
- CGU_INT index[MAX_ENTRIES];
- CGU_INT Mi;
- CGU_INT maxTry = 1;
- for (k = 0; k < numEntries; k++)
- {
- index[k] = index_[k];
- }
- CGU_INT done;
- CGU_INT change;
- CGU_INT better;
- CGU_FLOAT err_o = CMP_FLOAT_MAX;
- CGU_FLOAT out_2[MAX_ENTRIES][MAX_DIMENSION_BIG];
- CGU_INT idx_2[MAX_ENTRIES];
- CGU_INT epo_2[2][MAX_DIMENSION_BIG];
- CGU_INT max_bits[MAX_DIMENSION_BIG];
- CGU_INT type = bits[0] % (2 * channels3or4);
- for (j = 0; j < channels3or4; j++)
- max_bits[j] = (bits[0] + 2 * channels3or4 - 1) / (2 * channels3or4);
- // handled below automatically
- CGU_INT alls = all_same_d(data, numEntries, channels3or4);
- mean_d_d(data, mean, numEntries, channels3or4);
- do
- {
- index_collapse_kernel(index, numEntries);
- Mi = max_index(index, numEntries); // index can be from requantizer
- CGU_INT p, q;
- CGU_INT p0 = -1, q0 = -1;
- CGU_FLOAT err_2 = CMP_FLOAT_MAX;
- if (Mi == 0)
- {
- CGU_FLOAT t;
- CGU_INT epo_0[2][MAX_DIMENSION_BIG];
- // either sinle point from the beginning or collapsed index
- if (alls)
- {
- t = quant_single_point_d(data, numEntries, index, out_2, epo_0, Mi_, type, channels3or4);
- }
- else
- {
- quant_single_point_d(&mean, numEntries, index, out_2, epo_0, Mi_, type, channels3or4);
- t = totalError_d(data, out_2, numEntries, channels3or4);
- }
- if (t < err_o)
- {
- for (k = 0; k < numEntries; k++)
- {
- index_[k] = index[k];
- for (j = 0; j < channels3or4; j++)
- {
- out[k][j] = out_2[k][j];
- epo_code_out[0][j] = epo_0[0][j];
- epo_code_out[1][j] = epo_0[1][j];
- }
- };
- err_o = t;
- }
- return err_o;
- }
- //===============================
- // We have ramp colors to process
- //===============================
- for (q = 1; Mi != 0 && q * Mi <= Mi_; q++)
- { // does not work for single point collapsed index!!!
- for (p = 0; p <= Mi_ - q * Mi; p++)
- {
- //-------------------------------------
- // set a new index data to try
- //-------------------------------------
- CGU_INT cidx[MAX_ENTRIES];
- for (k = 0; k < numEntries; k++)
- {
- cidx[k] = index[k] * q + p;
- }
- CGU_FLOAT epa[2][MAX_DIMENSION_BIG];
- //
- // solve RMS problem for center
- //
- CGU_FLOAT im[2][2] = {{0, 0}, {0, 0}}; // matrix /inverse matrix
- CGU_FLOAT rp[2][MAX_DIMENSION_BIG]; // right part for RMS fit problem
- // get ideal clustr centers
- CGU_FLOAT cc[MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG];
- CGU_INT index_cnt[MAX_CLUSTERS_BIG]; // count of index entries
- CGU_INT index_comp[MAX_CLUSTERS_BIG]; // compacted index
- CGU_INT index_ncl; // number of unique indexes
- index_ncl = cluster_mean_d_d(data, cc, cidx, index_comp, index_cnt, numEntries, channels3or4); // unrounded
- for (i = 0; i < index_ncl; i++)
- for (j = 0; j < channels3or4; j++)
- cc[index_comp[i]][j] = (CGU_FLOAT)floorf(cc[index_comp[i]][j] + 0.5f); // more or less ideal location
- for (j = 0; j < channels3or4; j++)
- {
- rp[0][j] = rp[1][j] = 0;
- }
- // weight with cnt if runnning on compacted index
- for (k = 0; k < numEntries; k++)
- {
- im[0][0] += (Mi_ - cidx[k]) * (Mi_ - cidx[k]);
- im[0][1] += cidx[k] * (Mi_ - cidx[k]); // im is symmetric
- im[1][1] += cidx[k] * cidx[k];
- for (j = 0; j < channels3or4; j++)
- {
- rp[0][j] += (Mi_ - cidx[k]) * cc[cidx[k]][j];
- rp[1][j] += cidx[k] * cc[cidx[k]][j];
- }
- }
- CGU_FLOAT dd = im[0][0] * im[1][1] - im[0][1] * im[0][1];
- //assert(dd !=0);
- // dd=0 means that cidx[k] and (Mi_-cidx[k]) collinear which implies only one active index;
- // taken care of separately
- im[1][0] = im[0][0];
- im[0][0] = im[1][1] / dd;
- im[1][1] = im[1][0] / dd;
- im[1][0] = im[0][1] = -im[0][1] / dd;
- for (j = 0; j < channels3or4; j++)
- {
- epa[0][j] = (im[0][0] * rp[0][j] + im[0][1] * rp[1][j]) * Mi_;
- epa[1][j] = (im[1][0] * rp[0][j] + im[1][1] * rp[1][j]) * Mi_;
- }
- CGU_FLOAT err_1 = CMP_FLOAT_MAX;
- CGU_FLOAT out_1[MAX_ENTRIES][MAX_DIMENSION_BIG];
- CGU_INT idx_1[MAX_ENTRIES];
- CGU_INT epo_1[2][MAX_DIMENSION_BIG];
- CGU_INT s1 = 0;
- CGU_FLOAT epd[2][MAX_DIMENSION_BIG][2]; // first second, coord, begin range end range
- for (j = 0; j < channels3or4; j++)
- {
- for (i = 0; i < 2; i++)
- {
- // set range
- epd[i][j][0] = epd[i][j][1] = epa[i][j];
- epd[i][j][1] +=
- ((1 << bits[j]) - 1 - (int)epd[i][j][1] < (1 << use_par) ? (1 << bits[j]) - 1 - (int)epd[i][j][1] : (1 << use_par)) & (~use_par);
- }
- }
- CGU_FLOAT ce[MAX_ENTRIES][MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG];
- CGU_FLOAT err_0 = 0;
- CGU_FLOAT out_0[MAX_ENTRIES][MAX_DIMENSION_BIG];
- CGU_INT idx_0[MAX_ENTRIES];
- for (i = 0; i < numEntries; i++)
- {
- CGU_FLOAT d[4];
- d[0] = data[i][0];
- d[1] = data[i][1];
- d[2] = data[i][2];
- d[3] = data[i][3];
- for (j = 0; j < (1 << clogs); j++)
- for (k = 0; k < channels3or4; k++)
- {
- ce[i][j][k] = (rampf(CLT(clogs), epd[0][k][0], epd[1][k][0], j) - d[k]) * (rampf(CLT(clogs), epd[0][k][0], epd[1][k][0], j) - d[k]);
- }
- }
- CGU_INT s = 0, p1, g;
- CGU_INT ei0 = 0, ei1 = 0;
- for (p1 = 0; p1 < 64; p1++)
- {
- CGU_INT j0 = 0;
- // Gray code increment
- g = p1 & (-p1);
- err_0 = 0;
- for (j = 0; j < channels3or4; j++)
- {
- if (((g >> (2 * j)) & 0x3) != 0)
- {
- j0 = j;
- // new cords
- ei0 = (((s ^ g) >> (2 * j)) & 0x1);
- ei1 = (((s ^ g) >> (2 * j + 1)) & 0x1);
- }
- }
- s = s ^ g;
- err_0 = 0;
- for (i = 0; i < numEntries; i++)
- {
- CGU_FLOAT d[4];
- d[0] = data[i][0];
- d[1] = data[i][1];
- d[2] = data[i][2];
- d[3] = data[i][3];
- CGU_INT ci = 0;
- CGU_FLOAT cmin = CMP_FLOAT_MAX;
- for (j = 0; j < (1 << clogs); j++)
- {
- float t_ = 0.;
- ce[i][j][j0] = (rampf(CLT(clogs), epd[0][j0][ei0], epd[1][j0][ei1], j) - d[j0]) *
- (rampf(CLT(clogs), epd[0][j0][ei0], epd[1][j0][ei1], j) - d[j0]);
- for (k = 0; k < channels3or4; k++)
- {
- t_ += ce[i][j][k];
- }
- if (t_ < cmin)
- {
- cmin = t_;
- ci = j;
- }
- }
- idx_0[i] = ci;
- for (k = 0; k < channels3or4; k++)
- {
- out_0[i][k] = rampf(CLT(clogs), epd[0][k][ei0], epd[1][k][ei1], ci);
- }
- err_0 += cmin;
- }
- if (err_0 < err_1)
- {
- // best in the curent ep cube run
- for (i = 0; i < numEntries; i++)
- {
- idx_1[i] = idx_0[i];
- for (j = 0; j < channels3or4; j++)
- out_1[i][j] = out_0[i][j];
- }
- err_1 = err_0;
- s1 = s; // epo coding
- }
- }
- // reconstruct epo
- for (j = 0; j < channels3or4; j++)
- {
- {
- // new cords
- ei0 = ((s1 >> (2 * j)) & 0x1);
- ei1 = ((s1 >> (2 * j + 1)) & 0x1);
- epo_1[0][j] = (int)epd[0][j][ei0];
- epo_1[1][j] = (int)epd[1][j][ei1];
- }
- }
- if (err_1 < err_2)
- {
- // best in the curent ep cube run
- for (i = 0; i < numEntries; i++)
- {
- idx_2[i] = idx_1[i];
- for (j = 0; j < channels3or4; j++)
- out_2[i][j] = out_1[i][j];
- }
- err_2 = err_1;
- for (j = 0; j < channels3or4; j++)
- {
- epo_2[0][j] = epo_1[0][j];
- epo_2[1][j] = epo_1[1][j];
- }
- p0 = p;
- q0 = q;
- }
- }
- }
- // change/better
- change = 0;
- for (k = 0; k < numEntries; k++)
- change = change || (index[k] * q0 + p0 != idx_2[k]);
- better = err_2 < err_o;
- if (better)
- {
- for (k = 0; k < numEntries; k++)
- {
- index_[k] = index[k] = idx_2[k];
- for (j = 0; j < channels3or4; j++)
- {
- out[k][j] = out_2[k][j];
- epo_code_out[0][j] = epo_2[0][j];
- epo_code_out[1][j] = epo_2[1][j];
- }
- }
- err_o = err_2;
- }
- done = !(change && better);
- if (maxTry > 0)
- maxTry--;
- else
- maxTry = 0;
- } while (!done && maxTry);
- return err_o;
- }
- #ifndef ASPM_GPU
- static CGU_INT g_aWeights3[] = {0, 9, 18, 27, 37, 46, 55, 64}; // 3 bit color Indices
- static CGU_INT g_aWeights4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // 4 bit color indices
- CGU_FLOAT lerpf(CGU_FLOAT a, CGU_FLOAT b, CGU_INT i, CGU_INT denom)
- {
- assert(denom == 3 || denom == 7 || denom == 15);
- assert(i >= 0 && i <= denom);
- CGU_INT* weights = NULL;
- switch (denom)
- {
- case 3:
- denom *= 5;
- i *= 5; // fall through to case 15
- case 7:
- weights = g_aWeights3;
- break;
- case 15:
- weights = g_aWeights4;
- break;
- default:
- assert(0);
- }
- return (a * weights[denom - i] + b * weights[i]) / 64.0f;
- }
- #else
- CGU_FLOAT lerpf(CGU_FLOAT a, CGU_FLOAT b, CGU_INT i, CGU_INT denom)
- {
- CGU_INT g_aWeights3[] = {0, 9, 18, 27, 37, 46, 55, 64}; // 3 bit color Indices
- CGU_INT g_aWeights4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // 4 bit color indices
- switch (denom)
- {
- case 7:
- return ((a * g_aWeights3[denom - i] + b * g_aWeights3[i]) / 64.0f);
- break;
- case 15:
- return ((a * g_aWeights4[denom - i] + b * g_aWeights4[i]) / 64.0f);
- break;
- default:
- case 3: // fall through to case 15
- denom *= 5;
- i *= 5;
- return ((a * g_aWeights3[denom - i] + b * g_aWeights3[i]) / 64.0f);
- break;
- }
- }
- #endif
- void palitizeEndPointsF(BC6H_Encode_local* BC6H_data, CGU_FLOAT fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG])
- {
- // scale endpoints
- CGU_FLOAT Ar, Ag, Ab, Br, Bg, Bb;
- // Compose index colors from end points
- if (BC6H_data->region == 1)
- {
- Ar = fEndPoints[0][0][0];
- Ag = fEndPoints[0][0][1];
- Ab = fEndPoints[0][0][2];
- Br = fEndPoints[0][1][0];
- Bg = fEndPoints[0][1][1];
- Bb = fEndPoints[0][1][2];
- for (CGU_INT i = 0; i < 16; i++)
- {
- // Red
- BC6H_data->Paletef[0][i].x = lerpf(Ar, Br, i, 15);
- // Green
- BC6H_data->Paletef[0][i].y = lerpf(Ag, Bg, i, 15);
- // Blue
- BC6H_data->Paletef[0][i].z = lerpf(Ab, Bb, i, 15);
- }
- }
- else
- { //mode.type == BC6_TWO
- for (CGU_INT region = 0; region < 2; region++)
- {
- Ar = fEndPoints[region][0][0];
- Ag = fEndPoints[region][0][1];
- Ab = fEndPoints[region][0][2];
- Br = fEndPoints[region][1][0];
- Bg = fEndPoints[region][1][1];
- Bb = fEndPoints[region][1][2];
- for (CGU_INT i = 0; i < 8; i++)
- {
- // Red
- BC6H_data->Paletef[region][i].x = lerpf(Ar, Br, i, 7);
- // Greed
- BC6H_data->Paletef[region][i].y = lerpf(Ag, Bg, i, 7);
- // Blue
- BC6H_data->Paletef[region][i].z = lerpf(Ab, Bb, i, 7);
- }
- }
- }
- }
- CGU_FLOAT CalcShapeError(BC6H_Encode_local* BC6H_data, CGU_FLOAT fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], CGU_BOOL SkipPallet)
- {
- CGU_INT maxPallet;
- CGU_INT subset = 0;
- CGU_FLOAT totalError = 0.0f;
- CGU_INT region = (BC6H_data->region - 1);
- if (region == 0)
- maxPallet = 16;
- else
- maxPallet = 8;
- if (!SkipPallet)
- palitizeEndPointsF(BC6H_data, fEndPoints);
- for (CGU_INT i = 0; i < MAX_SUBSET_SIZE; i++)
- {
- CGU_FLOAT error = 0.0f;
- CGU_FLOAT bestError = 0.0f;
- if (region == 0)
- {
- subset = 0;
- }
- else
- {
- // get the shape subset 0 or 1
- subset = BC6_PARTITIONS[BC6H_data->d_shape_index][i];
- }
- // initialize bestError to the difference for first data
- bestError = abs(BC6H_data->din[i][0] - BC6H_data->Paletef[subset][0].x) + abs(BC6H_data->din[i][1] - BC6H_data->Paletef[subset][0].y) +
- abs(BC6H_data->din[i][2] - BC6H_data->Paletef[subset][0].z);
- // loop through the rest of the data until find the best error
- for (CGU_INT j = 1; j < maxPallet && bestError > 0; j++)
- {
- error = abs(BC6H_data->din[i][0] - BC6H_data->Paletef[subset][j].x) + abs(BC6H_data->din[i][1] - BC6H_data->Paletef[subset][j].y) +
- abs(BC6H_data->din[i][2] - BC6H_data->Paletef[subset][j].z);
- if (error <= bestError)
- bestError = error;
- else
- break;
- }
- totalError += bestError;
- }
- return totalError;
- }
- CGU_FLOAT FindBestPattern(BC6H_Encode_local* BC6H_data, CGU_BOOL TwoRegionShapes, CGU_INT8 shape_pattern, CGU_FLOAT quality)
- {
- // Index bit size for the patterns been used.
- // All two zone shapes have 3 bits per color, max index value < 8
- // All one zone shapes gave 4 bits per color, max index value < 16
- CGU_INT8 Index_BitSize = TwoRegionShapes ? 8 : 16;
- CGU_INT8 max_subsets = TwoRegionShapes ? 2 : 1;
- CGU_FLOAT direction[NCHANNELS];
- CGU_FLOAT step;
- BC6H_data->region = max_subsets;
- BC6H_data->index = 0;
- BC6H_data->d_shape_index = shape_pattern;
- memset((CGU_UINT8*)BC6H_data->partition, 0, sizeof(BC6H_data->partition));
- memset((CGU_UINT8*)BC6H_data->shape_indices, 0, sizeof(BC6H_data->shape_indices));
- // Get the pattern to encode with
- Partition(shape_pattern, // Shape pattern we want to get
- BC6H_data->din, // Input data
- BC6H_data->partition, // Returns the patterned shape data
- BC6H_data->entryCount, // counts the number of pixel used in each subset region num of 0's amd 1's
- max_subsets, // Table Shapes to use eithe one regions 1 or two regions 2
- 3); // rgb no alpha always = 3
- CGU_FLOAT error[MAX_SUBSETS] = {0.0, CMP_FLOAT_MAX, CMP_FLOAT_MAX};
- CGU_INT BestOutB = 0;
- CGU_FLOAT BestError; //the lowest error from vector direction quantization
- CGU_FLOAT BestError_endpts; //the lowest error from endpoints extracted from the vector direction quantization
- CGU_FLOAT outB[2][2][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
- CGU_INT shape_indicesB[2][MAX_SUBSETS][MAX_SUBSET_SIZE];
- for (CGU_INT subset = 0; subset < max_subsets; subset++)
- {
- error[0] += optQuantAnD_d(BC6H_data->partition[subset], // input data
- BC6H_data->entryCount[subset], // number of input points above (not clear about 1, better to avoid)
- Index_BitSize, // number of clusters on the ramp, 8 or 16
- shape_indicesB[0][subset], // output index, if not all points of the ramp used, 0 may not be assigned
- outB[0][subset], // resulting quantization
- direction, // direction vector of the ramp (check normalization)
- &step, // step size (check normalization)
- 3, // number of channels (always 3 = RGB for BC6H)
- quality // Quality set number of retry to get good end points
- // Max retries = MAX_TRY = 4000 when Quality is 1.0
- // Min = 0 and default with quality 0.05 is 200 times
- );
- }
- BestError = error[0];
- BestOutB = 0;
- // The following code is almost complete - runs very slow and not sure if % of improvement is justified..
- #ifdef USE_SHAKERHD
- // Valid only for 2 region shapes
- if ((max_subsets > 1) && (quality > 0.80))
- {
- CGU_INT tempIndices[MAX_SUBSET_SIZE];
- // CGU_INT temp_epo_code[2][2][MAX_DIMENSION_BIG];
- CGU_INT bits[3] = {8, 8, 8}; // Channel index bit size
- // CGU_FLOAT epo[2][MAX_DIMENSION_BIG];
- CGU_INT epo_code[MAX_SUBSETS][2][MAX_DIMENSION_BIG];
- // CGU_INT shakeSize = 8;
- error[1] = 0.0;
- for (CGU_INT subset = 0; subset < max_subsets; subset++)
- {
- for (CGU_INT k = 0; k < BC6H_data->entryCount[subset]; k++)
- {
- tempIndices[k] = shape_indicesB[0][subset][k];
- }
- error[1] += ep_shaker_HD(BC6H_data->partition[subset],
- BC6H_data->entryCount[subset],
- tempIndices, // output index, if not all points of the ramp used, 0 may not be assigned
- outB[1][subset], // resulting quantization
- epo_code[subset],
- BC6H_data->entryCount[subset] - 1,
- bits,
- 3);
- // error[1] += ep_shaker_2_d(
- // BC6H_data.partition[subset],
- // BC6H_data.entryCount[subset],
- // tempIndices, // output index, if not all points of the ramp used, 0 may not be assigned
- // outB[1][subset], // resulting quantization
- // epo_code[subset],
- // shakeSize,
- // BC6H_data.entryCount[subset] - 1,
- // bits[0],
- // 3,
- // epo
- // );
- for (CGU_INT k = 0; k < BC6H_data->entryCount[subset]; k++)
- {
- shape_indicesB[1][subset][k] = tempIndices[k];
- }
- } // subsets
- if (BestError > error[1])
- {
- BestError = error[1];
- BestOutB = 1;
- for (CGU_INT subset = 0; subset < max_subsets; subset++)
- {
- for (CGU_INT k = 0; k < MAX_DIMENSION_BIG; k++)
- {
- BC6H_data->fEndPoints[subset][0][k] = (CGU_FLOAT)epo_code[subset][0][k];
- BC6H_data->fEndPoints[subset][1][k] = (CGU_FLOAT)epo_code[subset][1][k];
- }
- }
- }
- }
- #endif
- // Save the best for BC6H data processing later
- if (BestOutB == 0)
- GetEndPoints(BC6H_data->fEndPoints, outB[BestOutB], max_subsets, BC6H_data->entryCount);
- memcpy((CGU_UINT8*)BC6H_data->shape_indices, (CGU_UINT8*)shape_indicesB[BestOutB], sizeof(BC6H_data->shape_indices));
- clampF16Max(BC6H_data->fEndPoints, BC6H_data->issigned);
- BestError_endpts = CalcShapeError(BC6H_data, BC6H_data->fEndPoints, false);
- return BestError_endpts;
- }
- #ifndef ASPM_GPU
- void SaveDataBlock(BC6H_Encode_local* bc6h_format, CMP_GLOBAL CGU_UINT8 cmpout[COMPRESSED_BLOCK_SIZE])
- {
- BitHeader header(NULL, COMPRESSED_BLOCK_SIZE);
- // Save the RGB end point values
- switch (bc6h_format->m_mode)
- {
- case 1: //0x00
- header.setvalue(0, 2, 0x00);
- header.setvalue(2, 1, bc6h_format->gy, 4); // gy[4]
- header.setvalue(3, 1, bc6h_format->by, 4); // by[4]
- header.setvalue(4, 1, bc6h_format->bz, 4); // bz[4]
- header.setvalue(5, 10, bc6h_format->rw); // 10: rw[9:0]
- header.setvalue(15, 10, bc6h_format->gw); // 10: gw[9:0]
- header.setvalue(25, 10, bc6h_format->bw); // 10: bw[9:0]
- header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0]
- header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4]
- header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0]
- header.setvalue(45, 5, bc6h_format->gx); // 5: gx[4:0]
- header.setvalue(50, 1, bc6h_format->bz); // 5: bz[0]
- header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0]
- header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0]
- header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0]
- header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0]
- header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0]
- header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3]
- break;
- case 2: // 0x01
- header.setvalue(0, 2, 0x01);
- header.setvalue(2, 1, bc6h_format->gy, 5); // gy[5]
- header.setvalue(3, 1, bc6h_format->gz, 4); // gz[4]
- header.setvalue(4, 1, bc6h_format->gz, 5); // gz[5]
- header.setvalue(5, 7, bc6h_format->rw); // rw[6:0]
- header.setvalue(12, 1, bc6h_format->bz); // bz[0]
- header.setvalue(13, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(14, 1, bc6h_format->by, 4); // by[4]
- header.setvalue(15, 7, bc6h_format->gw); // gw[6:0]
- header.setvalue(22, 1, bc6h_format->by, 5); // by[5]
- header.setvalue(23, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4]
- header.setvalue(25, 7, bc6h_format->bw); // 7: bw[6:0]
- header.setvalue(32, 1, bc6h_format->bz, 3); // bz[3]
- header.setvalue(33, 1, bc6h_format->bz, 5); // bz[5]
- header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4]
- header.setvalue(35, 6, bc6h_format->rx); // 6: rx[5:0]
- header.setvalue(41, 4, bc6h_format->gy); // 6: gy[3:0]
- header.setvalue(45, 6, bc6h_format->gx); // 6: gx[5:0]
- header.setvalue(51, 4, bc6h_format->gz); // 6: gz[3:0]
- header.setvalue(55, 6, bc6h_format->bx); // 6: bx[5:0]
- header.setvalue(61, 4, bc6h_format->by); // 6: by[3:0]
- header.setvalue(65, 6, bc6h_format->ry); // 6: ry[5:0]
- header.setvalue(71, 6, bc6h_format->rz); // 6: rz[5:0]
- break;
- case 3: // 0x02
- header.setvalue(0, 5, 0x02);
- header.setvalue(5, 10, bc6h_format->rw); // 11: rw[9:0]
- header.setvalue(15, 10, bc6h_format->gw); // 11: gw[9:0]
- header.setvalue(25, 10, bc6h_format->bw); // 11: bw[9:0]
- header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0]
- header.setvalue(40, 1, bc6h_format->rw, 10); // rw[10]
- header.setvalue(41, 4, bc6h_format->gy); // 4: gy[3:0]
- header.setvalue(45, 4, bc6h_format->gx); // 4: gx[3:0]
- header.setvalue(49, 1, bc6h_format->gw, 10); // gw[10]
- header.setvalue(50, 1, bc6h_format->bz); // 4: bz[0]
- header.setvalue(51, 4, bc6h_format->gz); // 4: gz[3:0]
- header.setvalue(55, 4, bc6h_format->bx); // 4: bx[3:0]
- header.setvalue(59, 1, bc6h_format->bw, 10); // bw[10]
- header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(61, 4, bc6h_format->by); // 4: by[3:0]
- header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0]
- header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0]
- header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3]
- break;
- case 4: // 0x06
- header.setvalue(0, 5, 0x06);
- header.setvalue(5, 10, bc6h_format->rw); // 11: rw[9:0]
- header.setvalue(15, 10, bc6h_format->gw); // 11: gw[9:0]
- header.setvalue(25, 10, bc6h_format->bw); // 11: bw[9:0]
- header.setvalue(35, 4, bc6h_format->rx); // rx[3:0]
- header.setvalue(39, 1, bc6h_format->rw, 10); // rw[10]
- header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4]
- header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0]
- header.setvalue(45, 5, bc6h_format->gx); // gx[4:0]
- header.setvalue(50, 1, bc6h_format->gw, 10); // 5: gw[10]
- header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0]
- header.setvalue(55, 4, bc6h_format->bx); // 4: bx[3:0]
- header.setvalue(59, 1, bc6h_format->bw, 10); // bw[10]
- header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(61, 4, bc6h_format->by); // 4: by[3:0]
- header.setvalue(65, 4, bc6h_format->ry); // 4: ry[3:0]
- header.setvalue(69, 1, bc6h_format->bz); // 4: bz[0]
- header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(71, 4, bc6h_format->rz); // 4: rz[3:0]
- header.setvalue(75, 1, bc6h_format->gy, 4); // gy[4]
- header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3]
- break;
- case 5: // 0x0A
- header.setvalue(0, 5, 0x0A);
- header.setvalue(5, 10, bc6h_format->rw); // 11: rw[9:0]
- header.setvalue(15, 10, bc6h_format->gw); // 11: gw[9:0]
- header.setvalue(25, 10, bc6h_format->bw); // 11: bw[9:0]
- header.setvalue(35, 4, bc6h_format->rx); // 4: rx[3:0]
- header.setvalue(39, 1, bc6h_format->rw, 10); // rw[10]
- header.setvalue(40, 1, bc6h_format->by, 4); // by[4]
- header.setvalue(41, 4, bc6h_format->gy); // 4: gy[3:0]
- header.setvalue(45, 4, bc6h_format->gx); // 4: gx[3:0]
- header.setvalue(49, 1, bc6h_format->gw, 10); // gw[10]
- header.setvalue(50, 1, bc6h_format->bz); // 5: bz[0]
- header.setvalue(51, 4, bc6h_format->gz); // 4: gz[3:0]
- header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0]
- header.setvalue(60, 1, bc6h_format->bw, 10); // bw[10]
- header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0]
- header.setvalue(65, 4, bc6h_format->ry); // 4: ry[3:0]
- header.setvalue(69, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(71, 4, bc6h_format->rz); // 4: rz[3:0]
- header.setvalue(75, 1, bc6h_format->bz, 4); // bz[4]
- header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3]
- break;
- case 6: // 0x0E
- header.setvalue(0, 5, 0x0E);
- header.setvalue(5, 9, bc6h_format->rw); // 9: rw[8:0]
- header.setvalue(14, 1, bc6h_format->by, 4); // by[4]
- header.setvalue(15, 9, bc6h_format->gw); // 9: gw[8:0]
- header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4]
- header.setvalue(25, 9, bc6h_format->bw); // 9: bw[8:0]
- header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4]
- header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0]
- header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4]
- header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0]
- header.setvalue(45, 5, bc6h_format->gx); // 5: gx[4:0]
- header.setvalue(50, 1, bc6h_format->bz); // 5: bz[0]
- header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0]
- header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0]
- header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0]
- header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0]
- header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0]
- header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3]
- break;
- case 7: // 0x12
- header.setvalue(0, 5, 0x12);
- header.setvalue(5, 8, bc6h_format->rw); // 8: rw[7:0]
- header.setvalue(13, 1, bc6h_format->gz, 4); // gz[4]
- header.setvalue(14, 1, bc6h_format->by, 4); // by[4]
- header.setvalue(15, 8, bc6h_format->gw); // 8: gw[7:0]
- header.setvalue(23, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4]
- header.setvalue(25, 8, bc6h_format->bw); // 8: bw[7:0]
- header.setvalue(33, 1, bc6h_format->bz, 3); // bz[3]
- header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4]
- header.setvalue(35, 6, bc6h_format->rx); // 6: rx[5:0]
- header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0]
- header.setvalue(45, 5, bc6h_format->gx); // 5: gx[4:0]
- header.setvalue(50, 1, bc6h_format->bz); // 5: bz[0]
- header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0]
- header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0]
- header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0]
- header.setvalue(65, 6, bc6h_format->ry); // 6: ry[5:0]
- header.setvalue(71, 6, bc6h_format->rz); // 6: rz[5:0]
- break;
- case 8: // 0x16
- header.setvalue(0, 5, 0x16);
- header.setvalue(5, 8, bc6h_format->rw); // 8: rw[7:0]
- header.setvalue(13, 1, bc6h_format->bz); // 5: bz[0]
- header.setvalue(14, 1, bc6h_format->by, 4); // by[4]
- header.setvalue(15, 8, bc6h_format->gw); // 8: gw[7:0]
- header.setvalue(23, 1, bc6h_format->gy, 5); // gy[5]
- header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4]
- header.setvalue(25, 8, bc6h_format->bw); // 8: bw[7:0]
- header.setvalue(33, 1, bc6h_format->gz, 5); // gz[5]
- header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4]
- header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0]
- header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4]
- header.setvalue(41, 4, bc6h_format->gy); // 6: gy[3:0]
- header.setvalue(45, 6, bc6h_format->gx); // 6: gx[5:0]
- header.setvalue(51, 4, bc6h_format->gz); // 6: gz[3:0]
- header.setvalue(55, 5, bc6h_format->bx); // 5: bx[4:0]
- header.setvalue(60, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(61, 4, bc6h_format->by); // 5: by[3:0]
- header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0]
- header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0]
- header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3]
- break;
- case 9: // 0x1A
- header.setvalue(0, 5, 0x1A);
- header.setvalue(5, 8, bc6h_format->rw); // 8: rw[7:0]
- header.setvalue(13, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(14, 1, bc6h_format->by, 4); // by[4]
- header.setvalue(15, 8, bc6h_format->gw); // 8: gw[7:0]
- header.setvalue(23, 1, bc6h_format->by, 5); // by[5]
- header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4]
- header.setvalue(25, 8, bc6h_format->bw); // 8: bw[7:0]
- header.setvalue(33, 1, bc6h_format->bz, 5); // bz[5]
- header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4]
- header.setvalue(35, 5, bc6h_format->rx); // 5: rx[4:0]
- header.setvalue(40, 1, bc6h_format->gz, 4); // gz[4]
- header.setvalue(41, 4, bc6h_format->gy); // 5: gy[3:0]
- header.setvalue(45, 5, bc6h_format->gx); // 5: gx[4:0]
- header.setvalue(50, 1, bc6h_format->bz); // 6: bz[0]
- header.setvalue(51, 4, bc6h_format->gz); // 5: gz[3:0]
- header.setvalue(55, 6, bc6h_format->bx); // 6: bx[5:0]
- header.setvalue(61, 4, bc6h_format->by); // 6: by[3:0]
- header.setvalue(65, 5, bc6h_format->ry); // 5: ry[4:0]
- header.setvalue(70, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(71, 5, bc6h_format->rz); // 5: rz[4:0]
- header.setvalue(76, 1, bc6h_format->bz, 3); // bz[3]
- break;
- case 10: // 0x1E
- header.setvalue(0, 5, 0x1E);
- header.setvalue(5, 6, bc6h_format->rw); // 6: rw[5:0]
- header.setvalue(11, 1, bc6h_format->gz, 4); // gz[4]
- header.setvalue(12, 1, bc6h_format->bz); // 6: bz[0]
- header.setvalue(13, 1, bc6h_format->bz, 1); // bz[1]
- header.setvalue(14, 1, bc6h_format->by, 4); // by[4]
- header.setvalue(15, 6, bc6h_format->gw); // 6: gw[5:0]
- header.setvalue(21, 1, bc6h_format->gy, 5); // gy[5]
- header.setvalue(22, 1, bc6h_format->by, 5); // by[5]
- header.setvalue(23, 1, bc6h_format->bz, 2); // bz[2]
- header.setvalue(24, 1, bc6h_format->gy, 4); // gy[4]
- header.setvalue(25, 6, bc6h_format->bw); // 6: bw[5:0]
- header.setvalue(31, 1, bc6h_format->gz, 5); // gz[5]
- header.setvalue(32, 1, bc6h_format->bz, 3); // bz[3]
- header.setvalue(33, 1, bc6h_format->bz, 5); // bz[5]
- header.setvalue(34, 1, bc6h_format->bz, 4); // bz[4]
- header.setvalue(35, 6, bc6h_format->rx); // 6: rx[5:0]
- header.setvalue(41, 4, bc6h_format->gy); // 6: gy[3:0]
- header.setvalue(45, 6, bc6h_format->gx); // 6: gx[5:0]
- header.setvalue(51, 4, bc6h_format->gz); // 6: gz[3:0]
- header.setvalue(55, 6, bc6h_format->bx); // 6: bx[5:0]
- header.setvalue(61, 4, bc6h_format->by); // 6: by[3:0]
- header.setvalue(65, 6, bc6h_format->ry); // 6: ry[5:0]
- header.setvalue(71, 6, bc6h_format->rz); // 6: rz[5:0]
- break;
- // Single regions Modes
- case 11: // 0x03
- header.setvalue(0, 5, 0x03);
- header.setvalue(5, 10, bc6h_format->rw); // 10: rw[9:0]
- header.setvalue(15, 10, bc6h_format->gw); // 10: gw[9:0]
- header.setvalue(25, 10, bc6h_format->bw); // 10: bw[9:0]
- header.setvalue(35, 10, bc6h_format->rx); // 10: rx[9:0]
- header.setvalue(45, 10, bc6h_format->gx); // 10: gx[9:0]
- header.setvalue(55, 10, bc6h_format->bx); // 10: bx[9:0]
- break;
- case 12: // 0x07
- header.setvalue(0, 5, 0x07);
- header.setvalue(5, 10, bc6h_format->rw); // 11: rw[9:0]
- header.setvalue(15, 10, bc6h_format->gw); // 11: gw[9:0]
- header.setvalue(25, 10, bc6h_format->bw); // 11: bw[9:0]
- header.setvalue(35, 9, bc6h_format->rx); // 9: rx[8:0]
- header.setvalue(44, 1, bc6h_format->rw, 10); // rw[10]
- header.setvalue(45, 9, bc6h_format->gx); // 9: gx[8:0]
- header.setvalue(54, 1, bc6h_format->gw, 10); // gw[10]
- header.setvalue(55, 9, bc6h_format->bx); // 9: bx[8:0]
- header.setvalue(64, 1, bc6h_format->bw, 10); // bw[10]
- break;
- case 13: // 0x0B
- header.setvalue(0, 5, 0x0B);
- header.setvalue(5, 10, bc6h_format->rw); // 12: rw[9:0]
- header.setvalue(15, 10, bc6h_format->gw); // 12: gw[9:0]
- header.setvalue(25, 10, bc6h_format->bw); // 12: bw[9:0]
- header.setvalue(35, 8, bc6h_format->rx); // 8: rx[7:0]
- header.setvalue(43, 1, bc6h_format->rw, 11); // rw[11]
- header.setvalue(44, 1, bc6h_format->rw, 10); // rw[10]
- header.setvalue(45, 8, bc6h_format->gx); // 8: gx[7:0]
- header.setvalue(53, 1, bc6h_format->gw, 11); // gw[11]
- header.setvalue(54, 1, bc6h_format->gw, 10); // gw[10]
- header.setvalue(55, 8, bc6h_format->bx); // 8: bx[7:0]
- header.setvalue(63, 1, bc6h_format->bw, 11); // bw[11]
- header.setvalue(64, 1, bc6h_format->bw, 10); // bw[10]
- break;
- case 14: // 0x0F
- header.setvalue(0, 5, 0x0F);
- header.setvalue(5, 10, bc6h_format->rw); // 16: rw[9:0]
- header.setvalue(15, 10, bc6h_format->gw); // 16: gw[9:0]
- header.setvalue(25, 10, bc6h_format->bw); // 16: bw[9:0]
- header.setvalue(35, 4, bc6h_format->rx); // 4: rx[3:0]
- header.setvalue(39, 6, bc6h_format->rw, 10); // rw[15:10]
- header.setvalue(45, 4, bc6h_format->gx); // 4: gx[3:0]
- header.setvalue(49, 6, bc6h_format->gw, 10); // gw[15:10]
- header.setvalue(55, 4, bc6h_format->bx); // 4: bx[3:0]
- header.setvalue(59, 6, bc6h_format->bw, 10); // bw[15:10]
- break;
- default: // Need to indicate error!
- return;
- }
- // Each format in the mode table can be uniquely identified by the mode bits.
- // The first ten modes are used for two-region tiles, and the mode bit field
- // can be either two or five bits long. These blocks also have fields for
- // the compressed color endpoints (72 or 75 bits), the partition (5 bits),
- // and the partition indices (46 bits).
- if (bc6h_format->m_mode >= MIN_MODE_FOR_ONE_REGION)
- {
- CGU_INT startbit = ONE_REGION_INDEX_OFFSET;
- header.setvalue(startbit, 3, bc6h_format->indices16[0]);
- startbit += 3;
- for (CGU_INT i = 1; i < 16; i++)
- {
- header.setvalue(startbit, 4, bc6h_format->indices16[i]);
- startbit += 4;
- }
- }
- else
- {
- header.setvalue(77, 5, bc6h_format->d_shape_index); // Shape Index
- CGU_INT startbit = TWO_REGION_INDEX_OFFSET, nbits = 2;
- header.setvalue(startbit, nbits, bc6h_format->indices16[0]);
- for (CGU_INT i = 1; i < 16; i++)
- {
- startbit += nbits; // offset start bit for next index using prior nbits used
- nbits = g_indexfixups[bc6h_format->d_shape_index] == i ? 2 : 3; // get new number of bit to save index with
- header.setvalue(startbit, nbits, bc6h_format->indices16[i]);
- }
- }
- // save to output buffer our new bit values
- // this can be optimized if header is part of bc6h_format struct
- header.transferbits(cmpout, 16);
- }
- #else
- void SaveDataBlock(BC6H_Encode_local* bc6h_format, CMP_GLOBAL CGU_UINT8 out[COMPRESSED_BLOCK_SIZE])
- {
- // ToDo
- }
- #endif
- void SwapIndices(CGU_INT32 iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT32 iIndices[3][MAX_SUBSET_SIZE],
- CGU_INT entryCount[MAX_SUBSETS],
- CGU_INT max_subsets,
- CGU_INT mode,
- CGU_INT shape_pattern)
- {
- CGU_UINT32 uNumIndices = 1 << ModePartition[mode].IndexPrec;
- CGU_UINT32 uHighIndexBit = uNumIndices >> 1;
- for (CGU_INT subset = 0; subset < max_subsets; ++subset)
- {
- // region 0 (subset = 0) The fix-up index for this subset is allways index 0
- // region 1 (subset = 1) The fix-up index for this subset varies based on the shape
- size_t i = subset ? g_Region2FixUp[shape_pattern] : 0;
- if (iIndices[subset][i] & uHighIndexBit)
- {
- #ifdef ASPM_GPU
- // high bit is set, swap the aEndPts and indices for this region
- swap(iEndPoints[subset][0][0], iEndPoints[subset][1][0]);
- swap(iEndPoints[subset][0][1], iEndPoints[subset][1][1]);
- swap(iEndPoints[subset][0][2], iEndPoints[subset][1][2]);
- #else
- // high bit is set, swap the aEndPts and indices for this region
- std::swap(iEndPoints[subset][0][0], iEndPoints[subset][1][0]);
- std::swap(iEndPoints[subset][0][1], iEndPoints[subset][1][1]);
- std::swap(iEndPoints[subset][0][2], iEndPoints[subset][1][2]);
- #endif
- for (size_t j = 0; j < (size_t)entryCount[subset]; ++j)
- {
- iIndices[subset][j] = uNumIndices - 1 - iIndices[subset][j];
- }
- }
- }
- }
- // helper function to check transform overflow
- // todo: check overflow by checking against sign
- CGU_BOOL isOverflow(CGU_INT endpoint, CGU_INT nbit)
- {
- CGU_INT maxRange = (int)pow(2.0f, (CGU_FLOAT)nbit - 1.0f) - 1;
- CGU_INT minRange = (int)-(pow(2.0f, (CGU_FLOAT)nbit - 1.0f));
- //no overflow
- if ((endpoint >= minRange) && (endpoint <= maxRange))
- return false;
- else //overflow
- return true;
- }
- CGU_BOOL TransformEndPoints(BC6H_Encode_local* BC6H_data,
- CGU_INT iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT max_subsets,
- CGU_INT mode)
- {
- CGU_INT Mask;
- if (ModePartition[mode].transformed)
- {
- BC6H_data->istransformed = true;
- for (CGU_INT i = 0; i < 3; ++i)
- {
- Mask = MASK(ModePartition[mode].nbits);
- oEndPoints[0][0][i] = iEndPoints[0][0][i] & Mask; // [0][A]
- Mask = MASK(ModePartition[mode].prec[i]);
- oEndPoints[0][1][i] = iEndPoints[0][1][i] - iEndPoints[0][0][i]; // [0][B] - [0][A]
- if (isOverflow(oEndPoints[0][1][i], ModePartition[mode].prec[i]))
- return false;
- oEndPoints[0][1][i] = (oEndPoints[0][1][i] & Mask);
- //redo the check for sign overflow for one region case
- if (max_subsets <= 1)
- {
- if (isOverflow(oEndPoints[0][1][i], ModePartition[mode].prec[i]))
- return false;
- }
- if (max_subsets > 1)
- {
- oEndPoints[1][0][i] = iEndPoints[1][0][i] - iEndPoints[0][0][i]; // [1][A] - [0][A]
- if (isOverflow(oEndPoints[1][0][i], ModePartition[mode].prec[i]))
- return false;
- oEndPoints[1][0][i] = (oEndPoints[1][0][i] & Mask);
- oEndPoints[1][1][i] = iEndPoints[1][1][i] - iEndPoints[0][0][i]; // [1][B] - [0][A]
- if (isOverflow(oEndPoints[1][1][i], ModePartition[mode].prec[i]))
- return false;
- oEndPoints[1][1][i] = (oEndPoints[1][1][i] & Mask);
- }
- }
- }
- else
- {
- BC6H_data->istransformed = false;
- for (CGU_INT i = 0; i < 3; ++i)
- {
- Mask = MASK(ModePartition[mode].nbits);
- oEndPoints[0][0][i] = iEndPoints[0][0][i] & Mask;
- Mask = MASK(ModePartition[mode].prec[i]);
- oEndPoints[0][1][i] = iEndPoints[0][1][i] & Mask;
- if (max_subsets > 1)
- {
- oEndPoints[1][0][i] = iEndPoints[1][0][i] & Mask;
- oEndPoints[1][1][i] = iEndPoints[1][1][i] & Mask;
- }
- }
- }
- return true;
- }
- void SaveCompressedBlockData(BC6H_Encode_local* BC6H_data,
- CGU_INT oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT iIndices[2][MAX_SUBSET_SIZE],
- CGU_INT8 max_subsets,
- CGU_INT8 mode)
- {
- BC6H_data->m_mode = mode;
- BC6H_data->index++;
- // Save the data to output
- BC6H_data->rw = oEndPoints[0][0][0]; // rw
- BC6H_data->gw = oEndPoints[0][0][1]; // gw
- BC6H_data->bw = oEndPoints[0][0][2]; // bw
- BC6H_data->rx = oEndPoints[0][1][0]; // rx
- BC6H_data->gx = oEndPoints[0][1][1]; // gx
- BC6H_data->bx = oEndPoints[0][1][2]; // bx
- if (max_subsets > 1)
- {
- // Save the data to output
- BC6H_data->ry = oEndPoints[1][0][0]; // ry
- BC6H_data->gy = oEndPoints[1][0][1]; // gy
- BC6H_data->by = oEndPoints[1][0][2]; // by
- BC6H_data->rz = oEndPoints[1][1][0]; // rz
- BC6H_data->gz = oEndPoints[1][1][1]; // gz
- BC6H_data->bz = oEndPoints[1][1][2]; // bz
- }
- // Map our two subset Indices for the shape to output 4x4 block
- CGU_INT pos[2] = {0, 0};
- CGU_INT asubset;
- for (CGU_INT i = 0; i < MAX_SUBSET_SIZE; i++)
- {
- if (max_subsets > 1)
- asubset = BC6_PARTITIONS[BC6H_data->d_shape_index][i]; // Two region shapes
- else
- asubset = 0; // One region shapes
- BC6H_data->indices16[i] = (CGU_UINT8)iIndices[asubset][pos[asubset]];
- pos[asubset]++;
- }
- }
- CGU_FLOAT CalcOneRegionEndPtsError(BC6H_Encode_local* BC6H_data,
- CGU_FLOAT fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE])
- {
- CGU_FLOAT error = 0;
- for (CGU_INT i = 0; i < MAX_SUBSET_SIZE; i++)
- {
- for (CGU_INT m = 0; m < MAX_END_POINTS; m++)
- {
- for (CGU_INT n = 0; n < NCHANNELS; n++)
- {
- CGU_FLOAT calencpts = fEndPoints[0][m][n] + (abs(fEndPoints[0][m][n] - fEndPoints[0][m][n]) * (shape_indices[0][i] / 15));
- error += abs(BC6H_data->din[i][n] - calencpts);
- }
- }
- }
- return error;
- }
- void ReIndexShapef(BC6H_Encode_local* BC6H_data, CGU_INT shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE])
- {
- CGU_FLOAT error = 0;
- CGU_FLOAT bestError;
- CGU_INT bestIndex = 0;
- CGU_INT sub0index = 0;
- CGU_INT sub1index = 0;
- CGU_INT MaxPallet;
- CGU_INT region = (BC6H_data->region - 1);
- if (region == 0)
- MaxPallet = 16;
- else
- MaxPallet = 8;
- CGU_UINT8 isSet = 0;
- for (CGU_INT i = 0; i < MAX_SUBSET_SIZE; i++)
- {
- // subset 0 or subset 1
- if (region)
- isSet = BC6_PARTITIONS[BC6H_data->d_shape_index][i];
- if (isSet)
- {
- bestError = CMP_HALF_MAX;
- bestIndex = 0;
- // For two shape regions max Pallet is 8
- for (CGU_INT j = 0; j < MaxPallet; j++)
- {
- // Calculate error from original
- error = abs(BC6H_data->din[i][0] - BC6H_data->Paletef[1][j].x) + abs(BC6H_data->din[i][1] - BC6H_data->Paletef[1][j].y) +
- abs(BC6H_data->din[i][2] - BC6H_data->Paletef[1][j].z);
- if (error < bestError)
- {
- bestError = error;
- bestIndex = j;
- }
- }
- shape_indices[1][sub1index] = bestIndex;
- sub1index++;
- }
- else
- {
- // This is shared for one or two shape regions max Pallet either 16 or 8
- bestError = CMP_FLOAT_MAX;
- bestIndex = 0;
- for (CGU_INT j = 0; j < MaxPallet; j++)
- {
- // Calculate error from original
- error = abs(BC6H_data->din[i][0] - BC6H_data->Paletef[0][j].x) + abs(BC6H_data->din[i][1] - BC6H_data->Paletef[0][j].y) +
- abs(BC6H_data->din[i][2] - BC6H_data->Paletef[0][j].z);
- if (error < bestError)
- {
- bestError = error;
- bestIndex = j;
- }
- }
- shape_indices[0][sub0index] = bestIndex;
- sub0index++;
- }
- }
- }
- CGU_INT Unquantize(CGU_INT comp, unsigned char uBitsPerComp, CGU_BOOL bSigned)
- {
- CGU_INT unq = 0, s = 0;
- if (bSigned)
- {
- if (uBitsPerComp >= 16)
- {
- unq = comp;
- }
- else
- {
- if (comp < 0)
- {
- s = 1;
- comp = -comp;
- }
- if (comp == 0)
- unq = 0;
- else if (comp >= ((1 << (uBitsPerComp - 1)) - 1))
- unq = 0x7FFF;
- else
- unq = ((comp << 15) + 0x4000) >> (uBitsPerComp - 1);
- if (s)
- unq = -unq;
- }
- }
- else
- {
- if (uBitsPerComp >= 15)
- unq = comp;
- else if (comp == 0)
- unq = 0;
- else if (comp == ((1 << uBitsPerComp) - 1))
- unq = 0xFFFF;
- else
- unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
- }
- return unq;
- }
- CGU_INT finish_unquantizef16(CGU_INT q, CGU_BOOL isSigned)
- {
- // Is it F16 Signed else F16 Unsigned
- if (isSigned)
- return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32
- else
- return (q * 31) >> 6; // scale the magnitude by 31/64
- // Note for Undefined we should return q as is
- }
- // decompress endpoints
- void decompress_endpoints1(BC6H_Encode_local* bc6h_format,
- CGU_INT oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_FLOAT outf[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT mode)
- {
- CGU_INT i;
- CGU_INT t;
- CGU_FLOAT out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
- if (bc6h_format->issigned)
- {
- if (bc6h_format->istransformed)
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- out[0][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][0][i], ModePartition[mode].nbits);
- t = SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]); //C_RED
- t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
- out[0][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits);
- // Unquantize all points to nbits
- out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
- // F16 format
- outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
- outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
- }
- }
- else
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- out[0][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][0][i], ModePartition[mode].nbits);
- out[0][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
- // Unquantize all points to nbits
- out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
- // F16 format
- outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
- outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
- }
- }
- }
- else
- {
- if (bc6h_format->istransformed)
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- out[0][0][i] = (CGU_FLOAT)oEndPoints[0][0][i];
- t = SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
- out[0][1][i] = (CGU_FLOAT)((t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits));
- // Unquantize all points to nbits
- out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
- // F16 format
- outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
- outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
- }
- }
- else
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- out[0][0][i] = (CGU_FLOAT)oEndPoints[0][0][i];
- out[0][1][i] = (CGU_FLOAT)oEndPoints[0][1][i];
- // Unquantize all points to nbits
- out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
- // F16 format
- outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
- outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
- }
- }
- }
- }
- void decompress_endpoints2(BC6H_Encode_local* bc6h_format,
- CGU_INT oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_FLOAT outf[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT mode)
- {
- CGU_INT i;
- CGU_INT t;
- CGU_FLOAT out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
- if (bc6h_format->issigned)
- {
- if (bc6h_format->istransformed)
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- // get the quantized values
- out[0][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][0][i], ModePartition[mode].nbits);
- t = SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
- t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
- out[0][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits);
- t = SIGN_EXTEND_TYPELESS(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
- t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
- out[1][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits);
- t = SIGN_EXTEND_TYPELESS(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
- t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
- out[1][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits);
- // Unquantize all points to nbits
- out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
- out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
- out[1][0][i] = (CGU_FLOAT)Unquantize((int)out[1][0][i], (unsigned char)ModePartition[mode].nbits, true);
- out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, true);
- // F16 format
- outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], true);
- outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], true);
- outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], true);
- outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], true);
- }
- }
- else
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- out[0][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][0][i], ModePartition[mode].nbits);
- out[0][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
- out[1][0][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
- out[1][1][i] = (CGU_FLOAT)SIGN_EXTEND_TYPELESS(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
- // Unquantize all points to nbits
- out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
- out[1][0][i] = (CGU_FLOAT)Unquantize((int)out[1][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);
- // nbits to F16 format
- outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
- outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
- outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
- outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);
- }
- }
- }
- else
- {
- if (bc6h_format->istransformed)
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- out[0][0][i] = (CGU_FLOAT)oEndPoints[0][0][i];
- t = SIGN_EXTEND_TYPELESS(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
- out[0][1][i] = (CGU_FLOAT)((t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits));
- t = SIGN_EXTEND_TYPELESS(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
- out[1][0][i] = (CGU_FLOAT)((t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits));
- t = SIGN_EXTEND_TYPELESS(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
- out[1][1][i] = (CGU_FLOAT)((t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits));
- // Unquantize all points to nbits
- out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
- out[1][0][i] = (CGU_FLOAT)Unquantize((int)out[1][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);
- // nbits to F16 format
- outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
- outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
- outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
- outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);
- }
- }
- else
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- out[0][0][i] = (CGU_FLOAT)oEndPoints[0][0][i];
- out[0][1][i] = (CGU_FLOAT)oEndPoints[0][1][i];
- out[1][0][i] = (CGU_FLOAT)oEndPoints[1][0][i];
- out[1][1][i] = (CGU_FLOAT)oEndPoints[1][1][i];
- // Unquantize all points to nbits
- out[0][0][i] = (CGU_FLOAT)Unquantize((int)out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[0][1][i] = (CGU_FLOAT)Unquantize((int)out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
- out[1][0][i] = (CGU_FLOAT)Unquantize((int)out[1][0][i], (unsigned char)ModePartition[mode].nbits, false);
- out[1][1][i] = (CGU_FLOAT)Unquantize((int)out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);
- // nbits to F16 format
- outf[0][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][0][i], false);
- outf[0][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[0][1][i], false);
- outf[1][0][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][0][i], false);
- outf[1][1][i] = (CGU_FLOAT)finish_unquantizef16((int)out[1][1][i], false);
- }
- }
- }
- }
- // decompress endpoints
- static void decompress_endpts(const CGU_INT in[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- const CGU_INT mode,
- CGU_BOOL issigned)
- {
- if (ModePartition[mode].transformed)
- {
- for (CGU_INT i = 0; i < 3; ++i)
- {
- R_0(out) = issigned ? SIGN_EXTEND_TYPELESS(R_0(in), ModePartition[mode].IndexPrec) : R_0(in);
- CGU_INT t;
- t = SIGN_EXTEND_TYPELESS(R_1(in), ModePartition[mode].prec[i]);
- t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
- R_1(out) = issigned ? SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits) : t;
- t = SIGN_EXTEND_TYPELESS(R_2(in), ModePartition[mode].prec[i]);
- t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
- R_2(out) = issigned ? SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits) : t;
- t = SIGN_EXTEND_TYPELESS(R_3(in), ModePartition[mode].prec[i]);
- t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
- R_3(out) = issigned ? SIGN_EXTEND_TYPELESS(t, ModePartition[mode].nbits) : t;
- }
- }
- else
- {
- for (CGU_INT i = 0; i < 3; ++i)
- {
- R_0(out) = issigned ? SIGN_EXTEND_TYPELESS(R_0(in), ModePartition[mode].nbits) : R_0(in);
- R_1(out) = issigned ? SIGN_EXTEND_TYPELESS(R_1(in), ModePartition[mode].prec[i]) : R_1(in);
- R_2(out) = issigned ? SIGN_EXTEND_TYPELESS(R_2(in), ModePartition[mode].prec[i]) : R_2(in);
- R_3(out) = issigned ? SIGN_EXTEND_TYPELESS(R_3(in), ModePartition[mode].prec[i]) : R_3(in);
- }
- }
- }
- // endpoints fit only if the compression was lossless
- static CGU_BOOL endpts_fit(const CGU_INT orig[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- const CGU_INT compressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- const CGU_INT mode,
- CGU_INT max_subsets,
- CGU_BOOL issigned)
- {
- CGU_INT uncompressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
- decompress_endpts(compressed, uncompressed, mode, issigned);
- for (CGU_INT j = 0; j < max_subsets; ++j)
- for (CGU_INT i = 0; i < 3; ++i)
- {
- if (orig[j][0][i] != uncompressed[j][0][i])
- return false;
- if (orig[j][1][i] != uncompressed[j][1][i])
- return false;
- }
- return true;
- }
- //todo: checkoverflow
- void QuantizeEndPointToF16Prec(CGU_FLOAT EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
- CGU_INT max_subsets,
- CGU_INT prec,
- CGU_BOOL isSigned)
- {
- for (CGU_INT subset = 0; subset < max_subsets; ++subset)
- {
- iEndPoints[subset][0][0] = cmp_QuantizeToBitSize((CGU_INT)EndPoints[subset][0][0], prec, isSigned); // A.Red
- iEndPoints[subset][0][1] = cmp_QuantizeToBitSize((CGU_INT)EndPoints[subset][0][1], prec, isSigned); // A.Green
- iEndPoints[subset][0][2] = cmp_QuantizeToBitSize((CGU_INT)EndPoints[subset][0][2], prec, isSigned); // A.Blue
- iEndPoints[subset][1][0] = cmp_QuantizeToBitSize((CGU_INT)EndPoints[subset][1][0], prec, isSigned); // B.Red
- iEndPoints[subset][1][1] = cmp_QuantizeToBitSize((CGU_INT)EndPoints[subset][1][1], prec, isSigned); // B.Green
- iEndPoints[subset][1][2] = cmp_QuantizeToBitSize((CGU_INT)EndPoints[subset][1][2], prec, isSigned); // B.Blue
- }
- }
- CGU_FLOAT EncodePattern(BC6H_Encode_local* BC6H_data, CGU_FLOAT error)
- {
- CGU_INT8 max_subsets = BC6H_data->region;
- // now we have input colors (in), output colors (outB) mapped to a line of ends (EndPoints)
- // and a set of colors on the line equally spaced (indexedcolors)
- // Lets assign indices
- //CGU_FLOAT SrcEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // temp endpoints used during calculations
- // Quantize the EndPoints
- CGU_INT F16EndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // temp endpoints used during calculations
- CGU_INT quantEndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // endpoints to save for a given mode
- // ModePartition[] starts from 1 to 14
- // If we have a shape pattern set the loop to check modes from 1 to 10 else from 11 to 14
- // of the ModePartition table
- CGU_INT min_mode = (BC6H_data->region == 2) ? 1 : 11;
- CGU_INT max_mode = (BC6H_data->region == 2) ? MAX_TWOREGION_MODES : MAX_BC6H_MODES;
- CGU_BOOL fits[15];
- memset((CGU_UINT8*)fits, 0, sizeof(fits));
- CGU_INT bestFit = 0;
- CGU_INT bestEndpointMode = 0;
- CGU_FLOAT bestError = CMP_FLOAT_MAX;
- CGU_FLOAT bestEndpointsErr = CMP_FLOAT_MAX;
- CGU_FLOAT endPointErr = 0;
- // Try Optimization for the Mode
- CGU_FLOAT best_EndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
- CGU_INT best_Indices[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_SUBSET_SIZE];
- CGU_FLOAT opt_toterr[MAX_BC6H_MODES + 1] = {0};
- memset((CGU_UINT8*)opt_toterr, 0, sizeof(opt_toterr));
- CGU_INT numfits = 0;
- //
- // Notes; Only the endpoints are varying; the indices stay fixed in values!
- // so to optimize which mode we need only check the endpoints error against our original to pick the mode to save
- //
- for (CGU_INT modes = min_mode; modes <= max_mode; ++modes)
- {
- memcpy((CGU_UINT8*)best_EndPoints[modes], (CGU_UINT8*)BC6H_data->fEndPoints, sizeof(BC6H_data->fEndPoints));
- memcpy((CGU_UINT8*)best_Indices[modes], (CGU_UINT8*)BC6H_data->shape_indices, sizeof(BC6H_data->shape_indices));
- {
- QuantizeEndPointToF16Prec(best_EndPoints[modes], F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits, BC6H_data->issigned);
- }
- // Indices data to save for given mode
- SwapIndices(F16EndPoints[modes], best_Indices[modes], BC6H_data->entryCount, max_subsets, ModeFitOrder[modes], BC6H_data->d_shape_index);
- CGU_BOOL transformfit = TransformEndPoints(BC6H_data, F16EndPoints[modes], quantEndPoints[modes], max_subsets, ModeFitOrder[modes]);
- fits[modes] = endpts_fit(F16EndPoints[modes], quantEndPoints[modes], ModeFitOrder[modes], max_subsets, BC6H_data->issigned);
- if (fits[modes] && transformfit)
- {
- numfits++;
- // The new compressed end points fit the mode
- // recalculate the error for this mode with a new set of indices
- // since we have shifted the end points from what we origially calc
- // from the find_bestpattern
- CGU_FLOAT uncompressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
- if (BC6H_data->region == 1)
- decompress_endpoints1(BC6H_data, quantEndPoints[modes], uncompressed, ModeFitOrder[modes]);
- else
- decompress_endpoints2(BC6H_data, quantEndPoints[modes], uncompressed, ModeFitOrder[modes]);
- // Takes the end points and creates a pallet of colors
- // based on preset weights along a vector formed by the two end points
- palitizeEndPointsF(BC6H_data, uncompressed);
- // Once we have the pallet - recalculate the optimal indices using the pallet
- // and the original image data stored in BC6H_data.din[]
- if (!BC6H_data->issigned)
- ReIndexShapef(BC6H_data, best_Indices[modes]);
- // Calculate the error of the new tile vs the old tile data
- opt_toterr[modes] = CalcShapeError(BC6H_data, uncompressed, true);
- if (BC6H_data->region == 1)
- {
- endPointErr = CalcOneRegionEndPtsError(BC6H_data, uncompressed, best_Indices[modes]);
- if (endPointErr < bestEndpointsErr)
- {
- bestEndpointsErr = endPointErr;
- bestEndpointMode = modes;
- }
- }
- CGU_BOOL transformFit = true;
- // Save hold this mode fit data if its better than the last one checked.
- if (opt_toterr[modes] < bestError)
- {
- if (!BC6H_data->issigned)
- {
- QuantizeEndPointToF16Prec(uncompressed, F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits, BC6H_data->issigned);
- SwapIndices(F16EndPoints[modes], best_Indices[modes], BC6H_data->entryCount, max_subsets, ModeFitOrder[modes], BC6H_data->d_shape_index);
- transformFit = TransformEndPoints(BC6H_data, F16EndPoints[modes], quantEndPoints[modes], max_subsets, ModeFitOrder[modes]);
- }
- if (transformFit)
- {
- if (BC6H_data->region == 1)
- {
- bestFit = (modes == bestEndpointMode) ? modes : ((modes < bestEndpointMode) ? modes : bestEndpointMode);
- }
- else
- {
- bestFit = modes;
- }
- bestError = opt_toterr[bestFit];
- error = bestError;
- }
- }
- }
- }
- if (numfits > 0)
- {
- SaveCompressedBlockData(BC6H_data, quantEndPoints[bestFit], best_Indices[bestFit], max_subsets, ModeFitOrder[bestFit]);
- return error;
- }
- // Should not get here!
- return error;
- }
- void CompressBlockBC6_Internal(CMP_GLOBAL unsigned char* outdata,
- CGU_UINT32 destIdx,
- BC6H_Encode_local* BC6HEncode_local,
- CMP_GLOBAL const BC6H_Encode* BC6HEncode)
- {
- // printf("---SRC---\n");
- // CGU_UINT8 blkindex = 0;
- // CGU_UINT8 srcindex = 0;
- // CGU_FLOAT f[3];
- // for ( CGU_INT32 j = 0; j < 16; j++) {
- // printf("%04x," , CGU_UINT32(BC6HEncode_local->din[j][0])); // R
- // printf("%04x," , CGU_UINT32(BC6HEncode_local->din[j][1])); // G
- // printf("%04x : " , CGU_UINT32(BC6HEncode_local->din[j][2])); // B
- // f[0] = HalfToFloat(BC6HEncode_local->din[j][0]); // R
- // f[1] = HalfToFloat(BC6HEncode_local->din[j][1]); // G
- // f[2] = HalfToFloat(BC6HEncode_local->din[j][2]); // B
- // printf("%1.3f,", f[0]); // R
- // printf("%1.3f,", f[1]); // G
- // printf("%1.3f :",f[2]); // B
- // printf("%04x, ", CGU_UINT32(FloatToHalf(f[0]))); // R
- // printf("%04x," , CGU_UINT32(FloatToHalf(f[1]))); // G
- // printf("%04x\n", CGU_UINT32(FloatToHalf(f[2]))); // B
- // }
- #ifdef USE_NEW_SINGLE_HEADER_INTERFACES
- CGU_Vec3f image_src[16];
- for (int i = 0; i < 16; i++)
- {
- image_src[i].x = HalfToFloat((CGU_UINT32)BC6HEncode_local->din[i][0]);
- image_src[i].y = HalfToFloat((CGU_UINT32)BC6HEncode_local->din[i][1]);
- image_src[i].z = HalfToFloat((CGU_UINT32)BC6HEncode_local->din[i][2]);
- }
- CGU_Vec4ui cmp = CompressBlockBC6H_UNORM(image_src, 1.0f);
- outdata[destIdx + 0] = cmp.x & 0xFF;
- outdata[destIdx + 1] = (cmp.x >> 8) & 0xFF;
- outdata[destIdx + 2] = (cmp.x >> 16) & 0xFF;
- outdata[destIdx + 3] = (cmp.x >> 24) & 0xFF;
- outdata[destIdx + 4] = cmp.y & 0xFF;
- outdata[destIdx + 5] = (cmp.y >> 8) & 0xFF;
- outdata[destIdx + 6] = (cmp.y >> 16) & 0xFF;
- outdata[destIdx + 7] = (cmp.y >> 24) & 0xFF;
- outdata[destIdx + 8] = cmp.z & 0xFF;
- outdata[destIdx + 9] = (cmp.z >> 8) & 0xFF;
- outdata[destIdx + 10] = (cmp.z >> 16) & 0xFF;
- outdata[destIdx + 11] = (cmp.z >> 24) & 0xFF;
- outdata[destIdx + 12] = cmp.w & 0xFF;
- outdata[destIdx + 13] = (cmp.w >> 8) & 0xFF;
- outdata[destIdx + 14] = (cmp.w >> 16) & 0xFF;
- outdata[destIdx + 15] = (cmp.w >> 24) & 0xFF;
- return;
- #else
- CGU_UINT8 Cmp_Red_Block[16] = {0xc2, 0x7b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00};
- CGU_FLOAT bestError = CMP_FLOAT_MAX;
- CGU_FLOAT error = CMP_FLOAT_MAX;
- CGU_INT8 bestShape = 0;
- CGU_FLOAT quality = BC6HEncode->m_quality;
- BC6HEncode_local->issigned = BC6HEncode->m_isSigned;
- // run through no partition first
- error = FindBestPattern(BC6HEncode_local, false, 0, quality);
- if (error < bestError)
- {
- bestError = error;
- bestShape = -1;
- memcpy((CGU_UINT8*)BC6HEncode_local->cur_best_shape_indices, (CGU_UINT8*)BC6HEncode_local->shape_indices, sizeof(BC6HEncode_local->shape_indices));
- memcpy((CGU_UINT8*)BC6HEncode_local->cur_best_partition, (CGU_UINT8*)BC6HEncode_local->partition, sizeof(BC6HEncode_local->partition));
- memcpy((CGU_UINT8*)BC6HEncode_local->cur_best_fEndPoints, (CGU_UINT8*)BC6HEncode_local->fEndPoints, sizeof(BC6HEncode_local->fEndPoints));
- memcpy((CGU_UINT8*)BC6HEncode_local->cur_best_entryCount, (CGU_UINT8*)BC6HEncode_local->entryCount, sizeof(BC6HEncode_local->entryCount));
- BC6HEncode_local->d_shape_index = bestShape;
- }
- // run through 32 possible partition set
- for (CGU_INT8 shape = 0; shape < MAX_BC6H_PARTITIONS; shape++)
- {
- error = FindBestPattern(BC6HEncode_local, true, shape, quality);
- if (error < bestError)
- {
- bestError = error;
- bestShape = shape;
- memcpy((CGU_UINT8*)BC6HEncode_local->cur_best_shape_indices, (CGU_UINT8*)BC6HEncode_local->shape_indices, sizeof(BC6HEncode_local->shape_indices));
- memcpy((CGU_UINT8*)BC6HEncode_local->cur_best_partition, (CGU_UINT8*)BC6HEncode_local->partition, sizeof(BC6HEncode_local->partition));
- memcpy((CGU_UINT8*)BC6HEncode_local->cur_best_fEndPoints, (CGU_UINT8*)BC6HEncode_local->fEndPoints, sizeof(BC6HEncode_local->fEndPoints));
- memcpy((CGU_UINT8*)BC6HEncode_local->cur_best_entryCount, (CGU_UINT8*)BC6HEncode_local->entryCount, sizeof(BC6HEncode_local->entryCount));
- BC6HEncode_local->d_shape_index = bestShape;
- }
- else
- {
- if (bestShape != -1)
- {
- BC6HEncode_local->d_shape_index = bestShape;
- memcpy(
- (CGU_UINT8*)BC6HEncode_local->shape_indices, (CGU_UINT8*)BC6HEncode_local->cur_best_shape_indices, sizeof(BC6HEncode_local->shape_indices));
- memcpy((CGU_UINT8*)BC6HEncode_local->partition, (CGU_UINT8*)BC6HEncode_local->cur_best_partition, sizeof(BC6HEncode_local->partition));
- memcpy((CGU_UINT8*)BC6HEncode_local->fEndPoints, (CGU_UINT8*)BC6HEncode_local->cur_best_fEndPoints, sizeof(BC6HEncode_local->fEndPoints));
- memcpy((CGU_UINT8*)BC6HEncode_local->entryCount, (CGU_UINT8*)BC6HEncode_local->cur_best_entryCount, sizeof(BC6HEncode_local->entryCount));
- }
- }
- }
- bestError = EncodePattern(BC6HEncode_local, bestError);
- // used for debugging modes, set the value you want to debug with
- if (BC6HEncode_local->m_mode != 0)
- {
- // do final encoding and save to output block
- SaveDataBlock(BC6HEncode_local, &outdata[destIdx]);
- }
- else
- {
- for (CGU_INT i = 0; i < 16; i++)
- outdata[destIdx + i] = Cmp_Red_Block[i];
- }
- // for (CGU_INT i = 0; i < 16; i++)
- // printf("data[%d] = %x\n", i,outdata[i]);
- #endif
- }
- //============================================== USER INTERFACES ========================================================
- #ifndef ASPM_GPU
- #ifndef ASPM
- //======================= DECOMPRESS =========================================
- static AMD_BC6H_Format extract_format(const CGU_UINT8 in[COMPRESSED_BLOCK_SIZE])
- {
- AMD_BC6H_Format bc6h_format;
- unsigned short decvalue;
- CGU_UINT8 iData[COMPRESSED_BLOCK_SIZE];
- memcpy(iData, in, COMPRESSED_BLOCK_SIZE);
- memset(&bc6h_format, 0, sizeof(AMD_BC6H_Format));
- // 2 bit mode has Mode bit:2 = 0 and mode bits:1 = 0 or 1
- // 5 bit mode has Mode bit:2 = 1
- if ((in[0] & 0x02) > 0)
- {
- decvalue = (in[0] & 0x1F); // first five bits
- }
- else
- {
- decvalue = (in[0] & 0x01); // first two bits
- }
- BitHeader header(in, 16);
- switch (decvalue)
- {
- case 0x00:
- bc6h_format.m_mode = 1; // 10:5:5:5
- bc6h_format.wBits = 10;
- bc6h_format.tBits[C_RED] = 5;
- bc6h_format.tBits[C_GREEN] = 5;
- bc6h_format.tBits[C_BLUE] = 5;
- bc6h_format.rw = header.getvalue(5, 10); // 10: rw[9:0]
- bc6h_format.rx = header.getvalue(35, 5); // 5: rx[4:0]
- bc6h_format.ry = header.getvalue(65, 5); // 5: ry[4:0]
- bc6h_format.rz = header.getvalue(71, 5); // 5: rz[4:0]
- bc6h_format.gw = header.getvalue(15, 10); // 10: gw[9:0]
- bc6h_format.gx = header.getvalue(45, 5); // 5: gx[4:0]
- bc6h_format.gy = header.getvalue(41, 4) | // 5: gy[3:0]
- (header.getvalue(2, 1) << 4); // gy[4]
- bc6h_format.gz = header.getvalue(51, 4) | // 5: gz[3:0]
- (header.getvalue(40, 1) << 4); // gz[4]
- bc6h_format.bw = header.getvalue(25, 10); // 10: bw[9:0]
- bc6h_format.bx = header.getvalue(55, 5); // 5: bx[4:0]
- bc6h_format.by = header.getvalue(61, 4) | // 5: by[3:0]
- (header.getvalue(3, 1) << 4); // by[4]
- bc6h_format.bz = header.getvalue(50, 1) | // 5: bz[0]
- (header.getvalue(60, 1) << 1) | // bz[1]
- (header.getvalue(70, 1) << 2) | // bz[2]
- (header.getvalue(76, 1) << 3) | // bz[3]
- (header.getvalue(4, 1) << 4); // bz[4]
- break;
- case 0x01:
- bc6h_format.m_mode = 2; // 7:6:6:6
- bc6h_format.wBits = 7;
- bc6h_format.tBits[C_RED] = 6;
- bc6h_format.tBits[C_GREEN] = 6;
- bc6h_format.tBits[C_BLUE] = 6;
- bc6h_format.rw = header.getvalue(5, 7); // 7: rw[6:0]
- bc6h_format.rx = header.getvalue(35, 6); // 6: rx[5:0]
- bc6h_format.ry = header.getvalue(65, 6); // 6: ry[5:0]
- bc6h_format.rz = header.getvalue(71, 6); // 6: rz[5:0]
- bc6h_format.gw = header.getvalue(15, 7); // 7: gw[6:0]
- bc6h_format.gx = header.getvalue(45, 6); // 6: gx[5:0]
- bc6h_format.gy = header.getvalue(41, 4) | // 6: gy[3:0]
- (header.getvalue(24, 1) << 4) | // gy[4]
- (header.getvalue(2, 1) << 5); // gy[5]
- bc6h_format.gz = header.getvalue(51, 4) | // 6: gz[3:0]
- (header.getvalue(3, 1) << 4) | // gz[4]
- (header.getvalue(4, 1) << 5); // gz[5]
- bc6h_format.bw = header.getvalue(25, 7); // 7: bw[6:0]
- bc6h_format.bx = header.getvalue(55, 6); // 6: bx[5:0]
- bc6h_format.by = header.getvalue(61, 4) | // 6: by[3:0]
- (header.getvalue(14, 1) << 4) | // by[4]
- (header.getvalue(22, 1) << 5); // by[5]
- bc6h_format.bz = header.getvalue(12, 1) | // 6: bz[0]
- (header.getvalue(13, 1) << 1) | // bz[1]
- (header.getvalue(23, 1) << 2) | // bz[2]
- (header.getvalue(32, 1) << 3) | // bz[3]
- (header.getvalue(34, 1) << 4) | // bz[4]
- (header.getvalue(33, 1) << 5); // bz[5]
- break;
- case 0x02:
- bc6h_format.m_mode = 3; // 11:5:4:4
- bc6h_format.wBits = 11;
- bc6h_format.tBits[C_RED] = 5;
- bc6h_format.tBits[C_GREEN] = 4;
- bc6h_format.tBits[C_BLUE] = 4;
- bc6h_format.rw = header.getvalue(5, 10) | //11: rw[9:0]
- (header.getvalue(40, 1) << 10); // rw[10]
- bc6h_format.rx = header.getvalue(35, 5); // 5: rx[4:0]
- bc6h_format.ry = header.getvalue(65, 5); // 5: ry[4:0]
- bc6h_format.rz = header.getvalue(71, 5); // 5: rz[4:0]
- bc6h_format.gw = header.getvalue(15, 10) | //11: gw[9:0]
- (header.getvalue(49, 1) << 10); // gw[10]
- bc6h_format.gx = header.getvalue(45, 4); //4: gx[3:0]
- bc6h_format.gy = header.getvalue(41, 4); //4: gy[3:0]
- bc6h_format.gz = header.getvalue(51, 4); //4: gz[3:0]
- bc6h_format.bw = header.getvalue(25, 10) | //11: bw[9:0]
- (header.getvalue(59, 1) << 10); // bw[10]
- bc6h_format.bx = header.getvalue(55, 4); //4: bx[3:0]
- bc6h_format.by = header.getvalue(61, 4); //4: by[3:0]
- bc6h_format.bz = header.getvalue(50, 1) | //4: bz[0]
- (header.getvalue(60, 1) << 1) | // bz[1]
- (header.getvalue(70, 1) << 2) | // bz[2]
- (header.getvalue(76, 1) << 3); // bz[3]
- break;
- case 0x06:
- bc6h_format.m_mode = 4; // 11:4:5:4
- bc6h_format.wBits = 11;
- bc6h_format.tBits[C_RED] = 4;
- bc6h_format.tBits[C_GREEN] = 5;
- bc6h_format.tBits[C_BLUE] = 4;
- bc6h_format.rw = header.getvalue(5, 10) | //11: rw[9:0]
- (header.getvalue(39, 1) << 10); // rw[10]
- bc6h_format.rx = header.getvalue(35, 4); //4: rx[3:0]
- bc6h_format.ry = header.getvalue(65, 4); //4: ry[3:0]
- bc6h_format.rz = header.getvalue(71, 4); //4: rz[3:0]
- bc6h_format.gw = header.getvalue(15, 10) | //11: gw[9:0]
- (header.getvalue(50, 1) << 10); // gw[10]
- bc6h_format.gx = header.getvalue(45, 5); //5: gx[4:0]
- bc6h_format.gy = header.getvalue(41, 4) | //5: gy[3:0]
- (header.getvalue(75, 1) << 4); // gy[4]
- bc6h_format.gz = header.getvalue(51, 4) | //5: gz[3:0]
- (header.getvalue(40, 1) << 4); // gz[4]
- bc6h_format.bw = header.getvalue(25, 10) | //11: bw[9:0]
- (header.getvalue(59, 1) << 10); // bw[10]
- bc6h_format.bx = header.getvalue(55, 4); //4: bx[3:0]
- bc6h_format.by = header.getvalue(61, 4); //4: by[3:0]
- bc6h_format.bz = header.getvalue(69, 1) | //4: bz[0]
- (header.getvalue(60, 1) << 1) | // bz[1]
- (header.getvalue(70, 1) << 2) | // bz[2]
- (header.getvalue(76, 1) << 3); // bz[3]
- break;
- case 0x0A:
- bc6h_format.m_mode = 5; // 11:4:4:5
- bc6h_format.wBits = 11;
- bc6h_format.tBits[C_RED] = 4;
- bc6h_format.tBits[C_GREEN] = 4;
- bc6h_format.tBits[C_BLUE] = 5;
- bc6h_format.rw = header.getvalue(5, 10) | //11: rw[9:0]
- (header.getvalue(39, 1) << 10); // rw[10]
- bc6h_format.rx = header.getvalue(35, 4); //4: rx[3:0]
- bc6h_format.ry = header.getvalue(65, 4); //4: ry[3:0]
- bc6h_format.rz = header.getvalue(71, 4); //4: rz[3:0]
- bc6h_format.gw = header.getvalue(15, 10) | //11: gw[9:0]
- (header.getvalue(49, 1) << 10); // gw[10]
- bc6h_format.gx = header.getvalue(45, 4); //4: gx[3:0]
- bc6h_format.gy = header.getvalue(41, 4); //4: gy[3:0]
- bc6h_format.gz = header.getvalue(51, 4); //4: gz[3:0]
- bc6h_format.bw = header.getvalue(25, 10) | //11: bw[9:0]
- (header.getvalue(60, 1) << 10); // bw[10]
- bc6h_format.bx = header.getvalue(55, 5); //5: bx[4:0]
- bc6h_format.by = header.getvalue(61, 4); //5: by[3:0]
- (header.getvalue(40, 1) << 4); // by[4]
- bc6h_format.bz = header.getvalue(50, 1) | //5: bz[0]
- (header.getvalue(69, 1) << 1) | // bz[1]
- (header.getvalue(70, 1) << 2) | // bz[2]
- (header.getvalue(76, 1) << 3) | // bz[3]
- (header.getvalue(75, 1) << 4); // bz[4]
- break;
- case 0x0E:
- bc6h_format.m_mode = 6; // 9:5:5:5
- bc6h_format.wBits = 9;
- bc6h_format.tBits[C_RED] = 5;
- bc6h_format.tBits[C_GREEN] = 5;
- bc6h_format.tBits[C_BLUE] = 5;
- bc6h_format.rw = header.getvalue(5, 9); //9: rw[8:0]
- bc6h_format.gw = header.getvalue(15, 9); //9: gw[8:0]
- bc6h_format.bw = header.getvalue(25, 9); //9: bw[8:0]
- bc6h_format.rx = header.getvalue(35, 5); //5: rx[4:0]
- bc6h_format.gx = header.getvalue(45, 5); //5: gx[4:0]
- bc6h_format.bx = header.getvalue(55, 5); //5: bx[4:0]
- bc6h_format.ry = header.getvalue(65, 5); //5: ry[4:0]
- bc6h_format.gy = header.getvalue(41, 4) | //5: gy[3:0]
- (header.getvalue(24, 1) << 4); // gy[4]
- bc6h_format.by = header.getvalue(61, 4) | //5: by[3:0]
- (header.getvalue(14, 1) << 4); // by[4]
- bc6h_format.rz = header.getvalue(71, 5); //5: rz[4:0]
- bc6h_format.gz = header.getvalue(51, 4) | //5: gz[3:0]
- (header.getvalue(40, 1) << 4); // gz[4]
- bc6h_format.bz = header.getvalue(50, 1) | //5: bz[0]
- (header.getvalue(60, 1) << 1) | // bz[1]
- (header.getvalue(70, 1) << 2) | // bz[2]
- (header.getvalue(76, 1) << 3) | // bz[3]
- (header.getvalue(34, 1) << 4); // bz[4]
- break;
- case 0x12:
- bc6h_format.m_mode = 7; // 8:6:5:5
- bc6h_format.wBits = 8;
- bc6h_format.tBits[C_RED] = 6;
- bc6h_format.tBits[C_GREEN] = 5;
- bc6h_format.tBits[C_BLUE] = 5;
- bc6h_format.rw = header.getvalue(5, 8); //8: rw[7:0]
- bc6h_format.gw = header.getvalue(15, 8); //8: gw[7:0]
- bc6h_format.bw = header.getvalue(25, 8); //8: bw[7:0]
- bc6h_format.rx = header.getvalue(35, 6); //6: rx[5:0]
- bc6h_format.gx = header.getvalue(45, 5); //5: gx[4:0]
- bc6h_format.bx = header.getvalue(55, 5); //5: bx[4:0]
- bc6h_format.ry = header.getvalue(65, 6); //6: ry[5:0]
- bc6h_format.gy = header.getvalue(41, 4) | //5: gy[3:0]
- (header.getvalue(24, 1) << 4); // gy[4]
- bc6h_format.by = header.getvalue(61, 4) | //5: by[3:0]
- (header.getvalue(14, 1) << 4); // by[4]
- bc6h_format.rz = header.getvalue(71, 6); //6: rz[5:0]
- bc6h_format.gz = header.getvalue(51, 4) | //5: gz[3:0]
- (header.getvalue(13, 1) << 4); // gz[4]
- bc6h_format.bz = header.getvalue(50, 1) | //5: bz[0]
- (header.getvalue(60, 1) << 1) | // bz[1]
- (header.getvalue(23, 1) << 2) | // bz[2]
- (header.getvalue(33, 1) << 3) | // bz[3]
- (header.getvalue(34, 1) << 4); // bz[4]
- break;
- case 0x16:
- bc6h_format.m_mode = 8; // 8:5:6:5
- bc6h_format.wBits = 8;
- bc6h_format.tBits[C_RED] = 5;
- bc6h_format.tBits[C_GREEN] = 6;
- bc6h_format.tBits[C_BLUE] = 5;
- bc6h_format.rw = header.getvalue(5, 8); //8: rw[7:0]
- bc6h_format.gw = header.getvalue(15, 8); //8: gw[7:0]
- bc6h_format.bw = header.getvalue(25, 8); //8: bw[7:0]
- bc6h_format.rx = header.getvalue(35, 5); //5: rx[4:0]
- bc6h_format.gx = header.getvalue(45, 6); //6: gx[5:0]
- bc6h_format.bx = header.getvalue(55, 5); //5: bx[4:0]
- bc6h_format.ry = header.getvalue(65, 5); //5: ry[4:0]
- bc6h_format.gy = header.getvalue(41, 4) | //6: gy[3:0]
- (header.getvalue(24, 1) << 4) | // gy[4]
- (header.getvalue(23, 1) << 5); // gy[5]
- bc6h_format.by = header.getvalue(61, 4) | //5: by[3:0]
- (header.getvalue(14, 1) << 4); // by[4]
- bc6h_format.rz = header.getvalue(71, 5); //5: rz[4:0]
- bc6h_format.gz = header.getvalue(51, 4) | //6: gz[3:0]
- (header.getvalue(40, 1) << 4) | // gz[4]
- (header.getvalue(33, 1) << 5); // gz[5]
- bc6h_format.bz = header.getvalue(13, 1) | //5: bz[0]
- (header.getvalue(60, 1) << 1) | // bz[1]
- (header.getvalue(70, 1) << 2) | // bz[2]
- (header.getvalue(76, 1) << 3) | // bz[3]
- (header.getvalue(34, 1) << 4); // bz[4]
- break;
- case 0x1A:
- bc6h_format.m_mode = 9; // 8:5:5:6
- bc6h_format.wBits = 8;
- bc6h_format.tBits[C_RED] = 5;
- bc6h_format.tBits[C_GREEN] = 5;
- bc6h_format.tBits[C_BLUE] = 6;
- bc6h_format.rw = header.getvalue(5, 8); //8: rw[7:0]
- bc6h_format.gw = header.getvalue(15, 8); //8: gw[7:0]
- bc6h_format.bw = header.getvalue(25, 8); //8: bw[7:0]
- bc6h_format.rx = header.getvalue(35, 5); //5: rx[4:0]
- bc6h_format.gx = header.getvalue(45, 5); //5: gx[4:0]
- bc6h_format.bx = header.getvalue(55, 6); //6: bx[5:0]
- bc6h_format.ry = header.getvalue(65, 5); //5: ry[4:0]
- bc6h_format.gy = header.getvalue(41, 4) | //5: gy[3:0]
- (header.getvalue(24, 1) << 4); // gy[4]
- bc6h_format.by = header.getvalue(61, 4) | //6: by[3:0]
- (header.getvalue(14, 1) << 4) | // by[4]
- (header.getvalue(23, 1) << 5); // by[5]
- bc6h_format.rz = header.getvalue(71, 5); //5: rz[4:0]
- bc6h_format.gz = header.getvalue(51, 4) | //5: gz[3:0]
- (header.getvalue(40, 1) << 4); // gz[4]
- bc6h_format.bz = header.getvalue(50, 1) | //6: bz[0]
- (header.getvalue(13, 1) << 1) | // bz[1]
- (header.getvalue(70, 1) << 2) | // bz[2]
- (header.getvalue(76, 1) << 3) | // bz[3]
- (header.getvalue(34, 1) << 4) | // bz[4]
- (header.getvalue(33, 1) << 5); // bz[5]
- break;
- case 0x1E:
- bc6h_format.m_mode = 10; // 6:6:6:6
- bc6h_format.istransformed = FALSE;
- bc6h_format.wBits = 6;
- bc6h_format.tBits[C_RED] = 6;
- bc6h_format.tBits[C_GREEN] = 6;
- bc6h_format.tBits[C_BLUE] = 6;
- bc6h_format.rw = header.getvalue(5, 6); //6: rw[5:0]
- bc6h_format.gw = header.getvalue(15, 6); //6: gw[5:0]
- bc6h_format.bw = header.getvalue(25, 6); //6: bw[5:0]
- bc6h_format.rx = header.getvalue(35, 6); //6: rx[5:0]
- bc6h_format.gx = header.getvalue(45, 6); //6: gx[5:0]
- bc6h_format.bx = header.getvalue(55, 6); //6: bx[5:0]
- bc6h_format.ry = header.getvalue(65, 6); //6: ry[5:0]
- bc6h_format.gy = header.getvalue(41, 4) | //6: gy[3:0]
- (header.getvalue(24, 1) << 4) | // gy[4]
- (header.getvalue(21, 1) << 5); // gy[5]
- bc6h_format.by = header.getvalue(61, 4) | //6: by[3:0]
- (header.getvalue(14, 1) << 4) | // by[4]
- (header.getvalue(22, 1) << 5); // by[5]
- bc6h_format.rz = header.getvalue(71, 6); //6: rz[5:0]
- bc6h_format.gz = header.getvalue(51, 4) | //6: gz[3:0]
- (header.getvalue(11, 1) << 4) | // gz[4]
- (header.getvalue(31, 1) << 5); // gz[5]
- bc6h_format.bz = header.getvalue(12, 1) | //6: bz[0]
- (header.getvalue(13, 1) << 1) | // bz[1]
- (header.getvalue(23, 1) << 2) | // bz[2]
- (header.getvalue(32, 1) << 3) | // bz[3]
- (header.getvalue(34, 1) << 4) | // bz[4]
- (header.getvalue(33, 1) << 5); // bz[5]
- break;
- // Single region modes
- case 0x03:
- bc6h_format.m_mode = 11; // 10:10
- bc6h_format.wBits = 10;
- bc6h_format.tBits[C_RED] = 10;
- bc6h_format.tBits[C_GREEN] = 10;
- bc6h_format.tBits[C_BLUE] = 10;
- bc6h_format.rw = header.getvalue(5, 10); // 10: rw[9:0]
- bc6h_format.gw = header.getvalue(15, 10); // 10: gw[9:0]
- bc6h_format.bw = header.getvalue(25, 10); // 10: bw[9:0]
- bc6h_format.rx = header.getvalue(35, 10); // 10: rx[9:0]
- bc6h_format.gx = header.getvalue(45, 10); // 10: gx[9:0]
- bc6h_format.bx = header.getvalue(55, 10); // 10: bx[9:0]
- break;
- case 0x07:
- bc6h_format.m_mode = 12; // 11:9
- bc6h_format.wBits = 11;
- bc6h_format.tBits[C_RED] = 9;
- bc6h_format.tBits[C_GREEN] = 9;
- bc6h_format.tBits[C_BLUE] = 9;
- bc6h_format.rw = header.getvalue(5, 10) | // 10: rw[9:0]
- (header.getvalue(44, 1) << 10); // rw[10]
- bc6h_format.gw = header.getvalue(15, 10) | // 10: gw[9:0]
- (header.getvalue(54, 1) << 10); // gw[10]
- bc6h_format.bw = header.getvalue(25, 10) | // 10: bw[9:0]
- (header.getvalue(64, 1) << 10); // bw[10]
- bc6h_format.rx = header.getvalue(35, 9); // 9: rx[8:0]
- bc6h_format.gx = header.getvalue(45, 9); // 9: gx[8:0]
- bc6h_format.bx = header.getvalue(55, 9); // 9: bx[8:0]
- break;
- case 0x0B:
- bc6h_format.m_mode = 13; // 12:8
- bc6h_format.wBits = 12;
- bc6h_format.tBits[C_RED] = 8;
- bc6h_format.tBits[C_GREEN] = 8;
- bc6h_format.tBits[C_BLUE] = 8;
- bc6h_format.rw = header.getvalue(5, 10) | // 12: rw[9:0]
- (header.getvalue(43, 1) << 11) | // rw[11]
- (header.getvalue(44, 1) << 10); // rw[10]
- bc6h_format.gw = header.getvalue(15, 10) | // 12: gw[9:0]
- (header.getvalue(53, 1) << 11) | // gw[11]
- (header.getvalue(54, 1) << 10); // gw[10]
- bc6h_format.bw = header.getvalue(25, 10) | // 12: bw[9:0]
- (header.getvalue(63, 1) << 11) | // bw[11]
- (header.getvalue(64, 1) << 10); // bw[10]
- bc6h_format.rx = header.getvalue(35, 8); // 8: rx[7:0]
- bc6h_format.gx = header.getvalue(45, 8); // 8: gx[7:0]
- bc6h_format.bx = header.getvalue(55, 8); // 8: bx[7:0]
- break;
- case 0x0F:
- bc6h_format.m_mode = 14; // 16:4
- bc6h_format.wBits = 16;
- bc6h_format.tBits[C_RED] = 4;
- bc6h_format.tBits[C_GREEN] = 4;
- bc6h_format.tBits[C_BLUE] = 4;
- bc6h_format.rw = header.getvalue(5, 10) | // 16: rw[9:0]
- (header.getvalue(39, 1) << 15) | // rw[15]
- (header.getvalue(40, 1) << 14) | // rw[14]
- (header.getvalue(41, 1) << 13) | // rw[13]
- (header.getvalue(42, 1) << 12) | // rw[12]
- (header.getvalue(43, 1) << 11) | // rw[11]
- (header.getvalue(44, 1) << 10); // rw[10]
- bc6h_format.gw = header.getvalue(15, 10) | // 16: gw[9:0]
- (header.getvalue(49, 1) << 15) | // gw[15]
- (header.getvalue(50, 1) << 14) | // gw[14]
- (header.getvalue(51, 1) << 13) | // gw[13]
- (header.getvalue(52, 1) << 12) | // gw[12]
- (header.getvalue(53, 1) << 11) | // gw[11]
- (header.getvalue(54, 1) << 10); // gw[10]
- bc6h_format.bw = header.getvalue(25, 10) | // 16: bw[9:0]
- (header.getvalue(59, 1) << 15) | // bw[15]
- (header.getvalue(60, 1) << 14) | // bw[14]
- (header.getvalue(61, 1) << 13) | // bw[13]
- (header.getvalue(62, 1) << 12) | // bw[12]
- (header.getvalue(63, 1) << 11) | // bw[11]
- (header.getvalue(64, 1) << 10); // bw[10]
- bc6h_format.rx = header.getvalue(35, 4); // 4: rx[3:0]
- bc6h_format.gx = header.getvalue(45, 4); // 4: gx[3:0]
- bc6h_format.bx = header.getvalue(55, 4); // 4: bx[3:0]
- break;
- default:
- bc6h_format.m_mode = 0;
- return bc6h_format;
- }
- // Each format in the mode table can be uniquely identified by the mode bits.
- // The first ten modes are used for two-region tiles, and the mode bit field
- // can be either two or five bits long. These blocks also have fields for
- // the compressed color endpoints (72 or 75 bits), the partition (5 bits),
- // and the partition indices (46 bits).
- if (bc6h_format.m_mode <= 10)
- {
- bc6h_format.region = BC6_TWO;
- // Get the shape index bits 77 to 81
- bc6h_format.d_shape_index = (unsigned short)header.getvalue(77, 5);
- bc6h_format.istransformed = (bc6h_format.m_mode < 10) ? TRUE : FALSE;
- }
- else
- {
- bc6h_format.region = BC6_ONE;
- bc6h_format.d_shape_index = 0;
- bc6h_format.istransformed = (bc6h_format.m_mode > 11) ? TRUE : FALSE;
- }
- // Save the points in a form easy to compute with
- bc6h_format.EC[0].A[0] = (CGU_FLOAT)bc6h_format.rw;
- bc6h_format.EC[0].B[0] = (CGU_FLOAT)bc6h_format.rx;
- bc6h_format.EC[1].A[0] = (CGU_FLOAT)bc6h_format.ry;
- bc6h_format.EC[1].B[0] = (CGU_FLOAT)bc6h_format.rz;
- bc6h_format.EC[0].A[1] = (CGU_FLOAT)bc6h_format.gw;
- bc6h_format.EC[0].B[1] = (CGU_FLOAT)bc6h_format.gx;
- bc6h_format.EC[1].A[1] = (CGU_FLOAT)bc6h_format.gy;
- bc6h_format.EC[1].B[1] = (CGU_FLOAT)bc6h_format.gz;
- bc6h_format.EC[0].A[2] = (CGU_FLOAT)bc6h_format.bw;
- bc6h_format.EC[0].B[2] = (CGU_FLOAT)bc6h_format.bx;
- bc6h_format.EC[1].A[2] = (CGU_FLOAT)bc6h_format.by;
- bc6h_format.EC[1].B[2] = (CGU_FLOAT)bc6h_format.bz;
- if (bc6h_format.region == BC6_ONE)
- {
- int startbits = ONE_REGION_INDEX_OFFSET;
- bc6h_format.indices16[0] = (CGU_UINT8)header.getvalue(startbits, 3);
- startbits += 3;
- for (int i = 1; i < 16; i++)
- {
- bc6h_format.indices16[i] = (CGU_UINT8)header.getvalue(startbits, 4);
- startbits += 4;
- }
- }
- else
- {
- int startbit = TWO_REGION_INDEX_OFFSET, nbits = 2;
- bc6h_format.indices16[0] = (CGU_UINT8)header.getvalue(startbit, 2);
- for (int i = 1; i < 16; i++)
- {
- startbit += nbits; // offset start bit for next index using prior nbits used
- nbits = g_indexfixups[bc6h_format.d_shape_index] == i ? 2 : 3; // get new number of bit to save index with
- bc6h_format.indices16[i] = (CGU_UINT8)header.getvalue(startbit, nbits);
- }
- }
- return bc6h_format;
- }
- static void extract_compressed_endpoints(AMD_BC6H_Format& bc6h_format)
- {
- int i;
- int t;
- if (bc6h_format.issigned)
- {
- if (bc6h_format.istransformed)
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- bc6h_format.E[0].A[i] = (CGU_FLOAT)SIGN_EXTEND(bc6h_format.EC[0].A[i], bc6h_format.wBits);
- t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); //C_RED
- t = int(t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
- bc6h_format.E[0].B[i] = (CGU_FLOAT)SIGN_EXTEND(t, bc6h_format.wBits);
- }
- }
- else
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- bc6h_format.E[0].A[i] = (CGU_FLOAT)SIGN_EXTEND(bc6h_format.EC[0].A[i], bc6h_format.wBits);
- bc6h_format.E[0].B[i] = (CGU_FLOAT)SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); //C_RED
- }
- }
- }
- else
- {
- if (bc6h_format.istransformed)
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
- t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); //C_RED
- bc6h_format.E[0].B[i] = CGU_FLOAT(CGU_INT(t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits));
- }
- }
- else
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
- bc6h_format.E[0].B[i] = bc6h_format.EC[0].B[i];
- }
- }
- }
- }
- // NV code: Used with modifcations
- static int unquantize(AMD_BC6H_Format& bc6h_format, int q, int prec)
- {
- int unq = 0, s;
- switch (bc6h_format.format)
- {
- // modify this case to move the multiplication by 31 after interpolation.
- // Need to use finish_unquantize.
- // since we have 16 bits available, let's unquantize this to 16 bits unsigned
- // thus the scale factor is [0-7c00)/[0-10000) = 31/64
- case UNSIGNED_F16:
- if (prec >= 15)
- unq = q;
- else if (q == 0)
- unq = 0;
- else if (q == ((1 << prec) - 1))
- unq = U16MAX;
- else
- unq = (q * (U16MAX + 1) + (U16MAX + 1) / 2) >> prec;
- break;
- // here, let's stick with S16 (no apparent quality benefit from going to S17)
- // range is (-7c00..7c00)/(-8000..8000) = 31/32
- case SIGNED_F16:
- // don't remove this test even though it appears equivalent to the code below
- // as it isn't -- the code below can overflow for prec = 16
- if (prec >= 16)
- unq = q;
- else
- {
- if (q < 0)
- {
- s = 1;
- q = -q;
- }
- else
- s = 0;
- if (q == 0)
- unq = 0;
- else if (q >= ((1 << (prec - 1)) - 1))
- unq = s ? -S16MAX : S16MAX;
- else
- {
- unq = (q * (S16MAX + 1) + (S16MAX + 1) / 2) >> (prec - 1);
- if (s)
- unq = -unq;
- }
- }
- break;
- }
- return unq;
- }
- static int lerp(int a, int b, int i, int denom)
- {
- assert(denom == 3 || denom == 7 || denom == 15);
- assert(i >= 0 && i <= denom);
- int shift = 6, *weights = NULL;
- switch (denom)
- {
- case 3:
- denom *= 5;
- i *= 5; // fall through to case 15
- case 15:
- weights = g_aWeights4;
- break;
- case 7:
- weights = g_aWeights3;
- break;
- default:
- assert(0);
- }
- #pragma warning(disable : 4244)
- // no need to round these as this is an exact division
- return (int)(a * weights[denom - i] + b * weights[i]) / float(1 << shift);
- }
- static int finish_unquantize(AMD_BC6H_Format bc6h_format, int q)
- {
- if (bc6h_format.format == UNSIGNED_F16)
- return (q * 31) >> 6; // scale the magnitude by 31/64
- else if (bc6h_format.format == SIGNED_F16)
- return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32
- else
- return q;
- }
- static void generate_palette_quantized(int max, AMD_BC6H_Format& bc6h_format, int region)
- {
- // scale endpoints
- int a, b, c; // really need a IntVec3...
- a = unquantize(bc6h_format, bc6h_format.E[region].A[0], bc6h_format.wBits);
- b = unquantize(bc6h_format, bc6h_format.E[region].B[0], bc6h_format.wBits);
- // interpolate : This part of code is used for debuging data
- for (int i = 0; i < max; i++)
- {
- c = finish_unquantize(bc6h_format, lerp(a, b, i, max - 1));
- bc6h_format.Palete[region][i].x = c;
- }
- a = unquantize(bc6h_format, bc6h_format.E[region].A[1], bc6h_format.wBits);
- b = unquantize(bc6h_format, bc6h_format.E[region].B[1], bc6h_format.wBits);
- // interpolate
- for (int i = 0; i < max; i++)
- bc6h_format.Palete[region][i].y = finish_unquantize(bc6h_format, lerp(a, b, i, max - 1));
- a = unquantize(bc6h_format, bc6h_format.E[region].A[2], bc6h_format.wBits);
- b = unquantize(bc6h_format, bc6h_format.E[region].B[2], bc6h_format.wBits);
- // interpolate
- for (int i = 0; i < max; i++)
- bc6h_format.Palete[region][i].z = finish_unquantize(bc6h_format, lerp(a, b, i, max - 1));
- }
- // NV code : used with modifications
- static void extract_compressed_endpoints2(AMD_BC6H_Format& bc6h_format)
- {
- int i;
- int t;
- if (bc6h_format.issigned)
- {
- if (bc6h_format.istransformed)
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i], bc6h_format.wBits);
- t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); // C_RED
- t = int(t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
- bc6h_format.E[0].B[i] = SIGN_EXTEND(t, bc6h_format.wBits);
- t = SIGN_EXTEND(bc6h_format.EC[1].A[i], bc6h_format.tBits[i]); //C_GREEN
- t = int(t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
- bc6h_format.E[1].A[i] = SIGN_EXTEND(t, bc6h_format.wBits);
- t = SIGN_EXTEND(bc6h_format.EC[1].B[i], bc6h_format.tBits[i]); //C_BLUE
- t = int(t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
- bc6h_format.E[1].B[i] = SIGN_EXTEND(t, bc6h_format.wBits);
- }
- }
- else
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i], bc6h_format.wBits);
- bc6h_format.E[0].B[i] = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); //C_RED
- bc6h_format.E[1].A[i] = SIGN_EXTEND(bc6h_format.EC[1].A[i], bc6h_format.tBits[i]); //C_GREEN
- bc6h_format.E[1].B[i] = SIGN_EXTEND(bc6h_format.EC[1].B[i], bc6h_format.tBits[i]); //C_BLUE
- }
- }
- }
- else
- {
- if (bc6h_format.istransformed)
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
- t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); // C_RED
- bc6h_format.E[0].B[i] = int(t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
- t = SIGN_EXTEND(bc6h_format.EC[1].A[i], bc6h_format.tBits[i]); // C_GREEN
- bc6h_format.E[1].A[i] = int(t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
- t = SIGN_EXTEND(bc6h_format.EC[1].B[i], bc6h_format.tBits[i]); //C_BLUE
- bc6h_format.E[1].B[i] = int(t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
- }
- }
- else
- {
- for (i = 0; i < NCHANNELS; i++)
- {
- bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
- bc6h_format.E[0].B[i] = bc6h_format.EC[0].B[i];
- bc6h_format.E[1].A[i] = bc6h_format.EC[1].A[i];
- bc6h_format.E[1].B[i] = bc6h_format.EC[1].B[i];
- }
- }
- }
- }
- void DecompressBC6_Internal(CGU_UINT16 rgbBlock[48], const CGU_UINT8 compressedBlock[16], const BC6H_Encode* BC6HEncode)
- {
- if (BC6HEncode)
- {
- }
- CGU_BOOL m_bc6signed = false;
- // now determine the mode type and extract the coded endpoints data
- AMD_BC6H_Format bc6h_format = extract_format(compressedBlock);
- if (!m_bc6signed)
- bc6h_format.format = UNSIGNED_F16;
- else
- bc6h_format.format = SIGNED_F16;
- if (bc6h_format.region == BC6_ONE)
- {
- extract_compressed_endpoints(bc6h_format);
- generate_palette_quantized(16, bc6h_format, 0);
- }
- else
- { //mode.type == BC6_TWO
- extract_compressed_endpoints2(bc6h_format);
- for (int r = 0; r < 2; r++)
- {
- generate_palette_quantized(8, bc6h_format, r);
- }
- }
- BC6H_Vec3 data;
- int indexPos = 0;
- int rgbPos = 0;
- // Note first 32 BC6H_PARTIONS is shared with BC6H
- // Partitioning is always arranged such that index 0 is always in subset 0 of BC6H_PARTIONS array
- // Partition order goes from top-left to bottom-right, moving left to right and then top to bottom.
- for (int block_row = 0; block_row < 4; block_row++)
- for (int block_col = 0; block_col < 4; block_col++)
- {
- // Need to check region logic
- // gets the region (0 or 1) in the partition set
- //int region = bc6h_format.region == BC6_ONE?0:REGION(block_col,block_row,bc6h_format.d_shape_index);
- // for a one region partitions : its always return 0 so there is room for performance improvement
- // by seperating the condition into another looped call.
- //int region = bc6h_format.region == BC6_ONE?0:BC6H_PARTITIONS[1][bc6h_format.d_shape_index][indexPos];
- int region = bc6h_format.region == BC6_ONE ? 0 : BC6_PARTITIONS[bc6h_format.d_shape_index][indexPos];
- // Index is validated as ok
- int paleteIndex = bc6h_format.indices[block_row][block_col];
- // this result is validated ok for region = BC6_ONE , BC6_TWO To be determined
- data = bc6h_format.Palete[region][paleteIndex];
- rgbBlock[rgbPos++] = data.x;
- rgbBlock[rgbPos++] = data.y;
- rgbBlock[rgbPos++] = data.z;
- indexPos++;
- }
- }
- //======================= END OF DECOMPRESS CODE =========================================
- int CMP_CDECL CreateOptionsBC6(void** options)
- {
- (*options) = new BC6H_Encode;
- if (!options)
- return CGU_CORE_ERR_NEWMEM;
- SetDefaultBC6Options((BC6H_Encode*)(*options));
- return CGU_CORE_OK;
- }
- int CMP_CDECL DestroyOptionsBC6(void* options)
- {
- if (!options)
- return CGU_CORE_ERR_INVALIDPTR;
- BC6H_Encode* BCOptions = reinterpret_cast<BC6H_Encode*>(options);
- delete BCOptions;
- return CGU_CORE_OK;
- }
- int CMP_CDECL SetQualityBC6(void* options, CGU_FLOAT fquality)
- {
- if (!options)
- return CGU_CORE_ERR_INVALIDPTR;
- BC6H_Encode* BC6optionsDefault = (BC6H_Encode*)options;
- if (fquality < 0.0f)
- fquality = 0.0f;
- else if (fquality > 1.0f)
- fquality = 1.0f;
- BC6optionsDefault->m_quality = fquality;
- BC6optionsDefault->m_partitionSearchSize = (BC6optionsDefault->m_quality * 2.0F) / qFAST_THRESHOLD;
- if (BC6optionsDefault->m_partitionSearchSize < (1.0F / 16.0F))
- BC6optionsDefault->m_partitionSearchSize = (1.0F / 16.0F);
- return CGU_CORE_OK;
- }
- int CMP_CDECL SetMaskBC6(void* options, CGU_UINT32 mask)
- {
- if (!options)
- return CGU_CORE_ERR_INVALIDPTR;
- BC6H_Encode* BC6options = (BC6H_Encode*)options;
- BC6options->m_validModeMask = mask;
- return CGU_CORE_OK;
- }
- int CMP_CDECL SetSignedBC6(void* options, CGU_BOOL sf16)
- {
- if (!options)
- return CGU_CORE_ERR_INVALIDPTR;
- BC6H_Encode* BC6options = (BC6H_Encode*)options;
- BC6options->m_isSigned = sf16;
- return CGU_CORE_OK;
- }
- int CMP_CDECL CompressBlockBC6(const CGU_UINT16* srcBlock,
- unsigned int srcStrideInShorts,
- CMP_GLOBAL CGU_UINT8 cmpBlock[16],
- const CMP_GLOBAL void* options = NULL)
- {
- CGU_UINT16 inBlock[48];
- //----------------------------------
- // Fill the inBlock with source data
- //----------------------------------
- CGU_INT srcpos = 0;
- CGU_INT dstptr = 0;
- for (CGU_UINT8 row = 0; row < 4; row++)
- {
- srcpos = row * srcStrideInShorts;
- for (CGU_UINT8 col = 0; col < 4; col++)
- {
- inBlock[dstptr++] = CGU_UINT16(srcBlock[srcpos++]);
- inBlock[dstptr++] = CGU_UINT16(srcBlock[srcpos++]);
- inBlock[dstptr++] = CGU_UINT16(srcBlock[srcpos++]);
- }
- }
- BC6H_Encode* BC6HEncode = (BC6H_Encode*)options;
- BC6H_Encode BC6HEncodeDefault;
- if (BC6HEncode == NULL)
- {
- BC6HEncode = &BC6HEncodeDefault;
- SetDefaultBC6Options(BC6HEncode);
- }
- BC6H_Encode_local BC6HEncode_local;
- memset((CGU_UINT8*)&BC6HEncode_local, 0, sizeof(BC6H_Encode_local));
- CGU_UINT8 blkindex = 0;
- for (CGU_INT32 j = 0; j < 16; j++)
- {
- BC6HEncode_local.din[j][0] = inBlock[blkindex++]; // R
- BC6HEncode_local.din[j][1] = inBlock[blkindex++]; // G
- BC6HEncode_local.din[j][2] = inBlock[blkindex++]; // B
- BC6HEncode_local.din[j][3] = 0; // A
- }
- CompressBlockBC6_Internal(cmpBlock, 0, &BC6HEncode_local, BC6HEncode);
- return CGU_CORE_OK;
- }
- int CMP_CDECL DecompressBlockBC6(const unsigned char cmpBlock[16], CGU_UINT16 srcBlock[48], const void* options = NULL)
- {
- BC6H_Encode* BC6HEncode = (BC6H_Encode*)options;
- BC6H_Encode BC6HEncodeDefault;
- if (BC6HEncode == NULL)
- {
- BC6HEncode = &BC6HEncodeDefault;
- SetDefaultBC6Options(BC6HEncode);
- }
- DecompressBC6_Internal(srcBlock, cmpBlock, BC6HEncode);
- return CGU_CORE_OK;
- }
- #endif // !ASPM
- #endif // !ASPM_GPU
- //============================================== OpenCL USER INTERFACE ====================================================
- #ifdef ASPM_OPENCL
- CMP_STATIC CMP_KERNEL void CMP_GPUEncoder(CMP_GLOBAL CGU_UINT8* p_source_pixels,
- CMP_GLOBAL CGU_UINT8* p_encoded_blocks,
- CMP_GLOBAL Source_Info* SourceInfo,
- CMP_GLOBAL BC6H_Encode* BC6HEncode)
- {
- CGU_UINT32 x = get_global_id(0);
- CGU_UINT32 y = get_global_id(1);
- if (x >= (SourceInfo->m_src_width / BYTEPP))
- return;
- if (y >= (SourceInfo->m_src_height / BYTEPP))
- return;
- BC6H_Encode_local BC6HEncode_local;
- memset((CGU_UINT8*)&BC6HEncode_local, 0, sizeof(BC6H_Encode_local));
- CGU_UINT32 stride = SourceInfo->m_src_width * BYTEPP;
- CGU_UINT32 srcOffset = (x * BlockX * BYTEPP) + (y * stride * BYTEPP);
- CGU_UINT32 destI = (x * COMPRESSED_BLOCK_SIZE) + (y * (SourceInfo->m_src_width / BlockX) * COMPRESSED_BLOCK_SIZE);
- CGU_UINT32 srcidx;
- //CGU_FLOAT block4x4[16][4];
- for (CGU_INT i = 0; i < BlockX; i++)
- {
- srcidx = i * stride;
- for (CGU_INT j = 0; j < BlockY; j++)
- {
- BC6HEncode_local.din[i * BlockX + j][0] = (CGU_UINT16)(p_source_pixels[srcOffset + srcidx++]);
- if (BC6HEncode_local.din[i * BlockX + j][0] < 0.00001 || cmp_isnan(BC6HEncode_local.din[i * BlockX + j][0]))
- {
- if (BC6HEncode->m_isSigned)
- {
- BC6HEncode_local.din[i * BlockX + j][0] =
- (cmp_isnan(BC6HEncode_local.din[i * BlockX + j][0])) ? F16NEGPREC_LIMIT_VAL : -BC6HEncode_local.din[i * BlockX + j][0];
- if (BC6HEncode_local.din[i * BlockX + j][0] < F16NEGPREC_LIMIT_VAL)
- {
- BC6HEncode_local.din[i * BlockX + j][0] = F16NEGPREC_LIMIT_VAL;
- }
- }
- else
- BC6HEncode_local.din[i * BlockX + j][0] = 0.0;
- }
- BC6HEncode_local.din[i * BlockX + j][1] = (CGU_UINT16)(p_source_pixels[srcOffset + srcidx++]);
- if (BC6HEncode_local.din[i * BlockX + j][1] < 0.00001 || cmp_isnan(BC6HEncode_local.din[i * BlockX + j][1]))
- {
- if (BC6HEncode->m_isSigned)
- {
- if (BC6HEncode_local.din[i * BlockX + j][1] < 0.00001 || cmp_isnan(BC6HEncode_local.din[i * BlockX + j][1]))
- (isnan(BC6HEncode_local.din[i * BlockX + j][1])) ? F16NEGPREC_LIMIT_VAL : -BC6HEncode_local.din[i * BlockX + j][1];
- BC6HEncode_local.din[i * BlockX + j][1] =
- (cmp_isnan(BC6HEncode_local.din[i * BlockX + j][1])) ? F16NEGPREC_LIMIT_VAL : -BC6HEncode_local.din[i * BlockX + j][1];
- if (BC6HEncode_local.din[i * BlockX + j][1] < F16NEGPREC_LIMIT_VAL)
- {
- BC6HEncode_local.din[i * BlockX + j][1] = F16NEGPREC_LIMIT_VAL;
- }
- }
- else
- BC6HEncode_local.din[i * BlockX + j][1] = 0.0;
- }
- BC6HEncode_local.din[i * BlockX + j][2] = (CGU_UINT16)(p_source_pixels[srcOffset + srcidx++]);
- if (BC6HEncode_local.din[i * BlockX + j][2] < 0.00001 || isnan(BC6HEncode_local.din[i * BlockX + j][2]))
- {
- if (BC6HEncode->m_isSigned)
- {
- BC6HEncode_local.din[i * BlockX + j][2] =
- (isnan(BC6HEncode_local.din[i * BlockX + j][2])) ? F16NEGPREC_LIMIT_VAL : -BC6HEncode_local.din[i * BlockX + j][2];
- if (BC6HEncode_local.din[i * BlockX + j][2] < F16NEGPREC_LIMIT_VAL)
- {
- BC6HEncode_local.din[i * BlockX + j][2] = F16NEGPREC_LIMIT_VAL;
- }
- }
- else
- BC6HEncode_local.din[i * BlockX + j][2] = 0.0;
- }
- BC6HEncode_local.din[i * BlockX + j][3] = 0.0f;
- //printf("Ori---src image %d, --%02x", x, (p_source_pixels[srcOffset + srcidx++]) & 0x0000ff); //for debug
- }
- }
- // printf(" X %3d Y %3d Quality %2.2f", x, y, BC6HEncode->m_quality);
- CompressBlockBC6_Internal(p_encoded_blocks, destI, &BC6HEncode_local, BC6HEncode);
- }
- #endif
|