BC6HBC7.cpp 136 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468
  1. //-------------------------------------------------------------------------------------
  2. // BC6HBC7.cpp
  3. //
  4. // Block-compression (BC) functionality for BC6H and BC7 (DirectX 11 texture compression)
  5. //
  6. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  7. // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  8. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  9. // PARTICULAR PURPOSE.
  10. //
  11. // Copyright (c) Microsoft Corporation. All rights reserved.
  12. //
  13. // http://go.microsoft.com/fwlink/?LinkId=248926
  14. //-------------------------------------------------------------------------------------
  15. //#include "directxtexp.h" ESENTHEL
  16. //#include "BC.h" ESENTHEL
  17. using namespace DirectX;
  18. using namespace DirectX::PackedVector;
  19. //-------------------------------------------------------------------------------------
  20. // Macros
  21. //-------------------------------------------------------------------------------------
  22. #define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x))
  23. // Because these are used in SAL annotations, they need to remain macros rather than const values
  24. #define BC6H_MAX_REGIONS 2
  25. #define BC6H_MAX_INDICES 16
  26. #define BC7_MAX_REGIONS 3
  27. #define BC7_MAX_INDICES 16
  28. namespace
  29. {
  30. //-------------------------------------------------------------------------------------
  31. // Constants
  32. //-------------------------------------------------------------------------------------
  33. const uint16_t F16S_MASK = 0x8000; // f16 sign mask
  34. const uint16_t F16EM_MASK = 0x7fff; // f16 exp & mantissa mask
  35. const uint16_t F16MAX = 0x7bff; // MAXFLT bit pattern for XMHALF
  36. const size_t BC6H_NUM_CHANNELS = 3;
  37. const size_t BC6H_MAX_SHAPES = 32;
  38. const size_t BC7_NUM_CHANNELS = 4;
  39. const size_t BC7_MAX_SHAPES = 64;
  40. const int32_t BC67_WEIGHT_MAX = 64;
  41. const uint32_t BC67_WEIGHT_SHIFT = 6;
  42. const int32_t BC67_WEIGHT_ROUND = 32;
  43. const float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f);
  44. const float pC3[] = { 2.0f / 2.0f, 1.0f / 2.0f, 0.0f / 2.0f };
  45. const float pD3[] = { 0.0f / 2.0f, 1.0f / 2.0f, 2.0f / 2.0f };
  46. const float pC4[] = { 3.0f / 3.0f, 2.0f / 3.0f, 1.0f / 3.0f, 0.0f / 3.0f };
  47. const float pD4[] = { 0.0f / 3.0f, 1.0f / 3.0f, 2.0f / 3.0f, 3.0f / 3.0f };
  48. // Partition, Shape, Pixel (index into 4x4 block)
  49. const uint8_t g_aPartitionTable[3][64][16] =
  50. {
  51. { // 1 Region case has no subsets (all 0)
  52. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  53. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  54. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  55. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  56. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  57. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  58. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  59. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  60. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  61. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  62. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  63. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  64. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  65. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  66. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  67. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  68. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  69. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  70. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  71. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  72. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  73. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  74. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  75. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  76. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  77. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  78. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  79. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  80. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  81. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  82. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  83. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  84. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  85. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  86. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  87. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  88. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  89. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  90. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  91. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  92. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  93. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  94. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  95. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  96. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  97. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  98. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  99. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  100. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  101. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  102. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  103. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  104. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  105. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  106. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  107. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  108. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  109. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  110. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  111. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  112. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  113. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  114. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
  115. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
  116. },
  117. { // BC6H/BC7 Partition Set for 2 Subsets
  118. { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 0
  119. { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, // Shape 1
  120. { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, // Shape 2
  121. { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
  122. { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 4
  123. { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 5
  124. { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
  125. { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 7
  126. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 8
  127. { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 9
  128. { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 10
  129. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, // Shape 11
  130. { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 12
  131. { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 13
  132. { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 14
  133. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 15
  134. { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, // Shape 16
  135. { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 17
  136. { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 18
  137. { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 19
  138. { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 20
  139. { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 21
  140. { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 22
  141. { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, // Shape 23
  142. { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 24
  143. { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 25
  144. { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, // Shape 26
  145. { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, // Shape 27
  146. { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, // Shape 28
  147. { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 29
  148. { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 30
  149. { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 31
  150. // BC7 Partition Set for 2 Subsets (second-half)
  151. { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, // Shape 32
  152. { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 33
  153. { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, // Shape 34
  154. { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, // Shape 35
  155. { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // Shape 36
  156. { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, // Shape 37
  157. { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, // Shape 38
  158. { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, // Shape 39
  159. { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 40
  160. { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, // Shape 41
  161. { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, // Shape 42
  162. { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, // Shape 43
  163. { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, // Shape 44
  164. { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, // Shape 45
  165. { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, // Shape 46
  166. { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, // Shape 47
  167. { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, // Shape 48
  168. { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, // Shape 49
  169. { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, // Shape 50
  170. { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, // Shape 51
  171. { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, // Shape 52
  172. { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 53
  173. { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 54
  174. { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, // Shape 55
  175. { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 56
  176. { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, // Shape 57
  177. { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, // Shape 58
  178. { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, // Shape 59
  179. { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 60
  180. { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 61
  181. { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, // Shape 62
  182. { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 } // Shape 63
  183. },
  184. { // BC7 Partition Set for 3 Subsets
  185. { 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 }, // Shape 0
  186. { 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 1
  187. { 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 2
  188. { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
  189. { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 4
  190. { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 }, // Shape 5
  191. { 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
  192. { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 7
  193. { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 8
  194. { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 9
  195. { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 10
  196. { 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 }, // Shape 11
  197. { 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 }, // Shape 12
  198. { 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 13
  199. { 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 14
  200. { 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 }, // Shape 15
  201. { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 }, // Shape 16
  202. { 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 }, // Shape 17
  203. { 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 18
  204. { 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 }, // Shape 19
  205. { 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 }, // Shape 20
  206. { 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, // Shape 21
  207. { 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 22
  208. { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 }, // Shape 23
  209. { 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 }, // Shape 24
  210. { 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 }, // Shape 25
  211. { 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 }, // Shape 26
  212. { 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 }, // Shape 27
  213. { 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 }, // Shape 28
  214. { 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 }, // Shape 29
  215. { 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 }, // Shape 30
  216. { 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 31
  217. { 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 32
  218. { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 }, // Shape 33
  219. { 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 }, // Shape 34
  220. { 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 }, // Shape 35
  221. { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 }, // Shape 36
  222. { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }, // Shape 37
  223. { 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 }, // Shape 38
  224. { 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 }, // Shape 39
  225. { 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 }, // Shape 40
  226. { 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 41
  227. { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 42
  228. { 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 }, // Shape 43
  229. { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 }, // Shape 44
  230. { 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 }, // Shape 45
  231. { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 }, // Shape 46
  232. { 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 47
  233. { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 }, // Shape 48
  234. { 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 }, // Shape 49
  235. { 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 }, // Shape 50
  236. { 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 51
  237. { 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 }, // Shape 52
  238. { 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 }, // Shape 53
  239. { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 }, // Shape 54
  240. { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 55
  241. { 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 56
  242. { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 }, // Shape 57
  243. { 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 }, // Shape 58
  244. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 }, // Shape 59
  245. { 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 }, // Shape 60
  246. { 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 }, // Shape 61
  247. { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 62
  248. { 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 } // Shape 63
  249. }
  250. };
  251. // Partition, Shape, Fixup
  252. const uint8_t g_aFixUp[3][64][3] =
  253. {
  254. { // No fix-ups for 1st subset for BC6H or BC7
  255. { 0, 0, 0 },{ 0, 0, 0 },{ 0, 0, 0 },{ 0, 0, 0 },
  256. { 0, 0, 0 },{ 0, 0, 0 },{ 0, 0, 0 },{ 0, 0, 0 },
  257. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  258. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  259. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  260. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  261. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  262. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  263. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  264. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  265. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  266. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  267. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  268. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  269. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
  270. { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}
  271. },
  272. { // BC6H/BC7 Partition Set Fixups for 2 Subsets
  273. { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
  274. { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
  275. { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
  276. { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
  277. { 0,15, 0}, { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0},
  278. { 0, 2, 0}, { 0, 8, 0}, { 0, 8, 0}, { 0,15, 0},
  279. { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
  280. { 0, 8, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
  281. // BC7 Partition Set Fixups for 2 Subsets (second-half)
  282. { 0,15, 0}, { 0,15, 0}, { 0, 6, 0}, { 0, 8, 0},
  283. { 0, 2, 0}, { 0, 8, 0}, { 0,15, 0}, { 0,15, 0},
  284. { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
  285. { 0, 2, 0}, { 0,15, 0}, { 0,15, 0}, { 0, 6, 0},
  286. { 0, 6, 0}, { 0, 2, 0}, { 0, 6, 0}, { 0, 8, 0},
  287. { 0,15, 0}, { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0},
  288. { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
  289. { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0}, { 0,15, 0}
  290. },
  291. { // BC7 Partition Set Fixups for 3 Subsets
  292. { 0, 3,15}, { 0, 3, 8}, { 0,15, 8}, { 0,15, 3},
  293. { 0, 8,15}, { 0, 3,15}, { 0,15, 3}, { 0,15, 8},
  294. { 0, 8,15}, { 0, 8,15}, { 0, 6,15}, { 0, 6,15},
  295. { 0, 6,15}, { 0, 5,15}, { 0, 3,15}, { 0, 3, 8},
  296. { 0, 3,15}, { 0, 3, 8}, { 0, 8,15}, { 0,15, 3},
  297. { 0, 3,15}, { 0, 3, 8}, { 0, 6,15}, { 0,10, 8},
  298. { 0, 5, 3}, { 0, 8,15}, { 0, 8, 6}, { 0, 6,10},
  299. { 0, 8,15}, { 0, 5,15}, { 0,15,10}, { 0,15, 8},
  300. { 0, 8,15}, { 0,15, 3}, { 0, 3,15}, { 0, 5,10},
  301. { 0, 6,10}, { 0,10, 8}, { 0, 8, 9}, { 0,15,10},
  302. { 0,15, 6}, { 0, 3,15}, { 0,15, 8}, { 0, 5,15},
  303. { 0,15, 3}, { 0,15, 6}, { 0,15, 6}, { 0,15, 8},
  304. { 0, 3,15}, { 0,15, 3}, { 0, 5,15}, { 0, 5,15},
  305. { 0, 5,15}, { 0, 8,15}, { 0, 5,15}, { 0,10,15},
  306. { 0, 5,15}, { 0,10,15}, { 0, 8,15}, { 0,13,15},
  307. { 0,15, 3}, { 0,12,15}, { 0, 3,15}, { 0, 3, 8}
  308. }
  309. };
  310. const int g_aWeights2[] = { 0, 21, 43, 64 };
  311. const int g_aWeights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
  312. const int g_aWeights4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
  313. }
  314. namespace DirectX
  315. {
  316. class LDRColorA
  317. {
  318. public:
  319. uint8_t r, g, b, a;
  320. LDRColorA() = default;
  321. LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) : r(_r), g(_g), b(_b), a(_a) {}
  322. const uint8_t& operator [] (_In_range_(0, 3) size_t uElement) const
  323. {
  324. switch (uElement)
  325. {
  326. case 0: return r;
  327. case 1: return g;
  328. case 2: return b;
  329. case 3: return a;
  330. default: assert(false); return r;
  331. }
  332. }
  333. uint8_t& operator [] (_In_range_(0, 3) size_t uElement)
  334. {
  335. switch (uElement)
  336. {
  337. case 0: return r;
  338. case 1: return g;
  339. case 2: return b;
  340. case 3: return a;
  341. default: assert(false); return r;
  342. }
  343. }
  344. LDRColorA operator = (_In_ const HDRColorA& c)
  345. {
  346. LDRColorA ret;
  347. HDRColorA tmp(c);
  348. tmp = tmp.Clamp(0.0f, 1.0f) * 255.0f;
  349. ret.r = uint8_t(tmp.r + 0.001f);
  350. ret.g = uint8_t(tmp.g + 0.001f);
  351. ret.b = uint8_t(tmp.b + 0.001f);
  352. ret.a = uint8_t(tmp.a + 0.001f);
  353. return ret;
  354. }
  355. static void InterpolateRGB(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ _In_range_(2, 4) size_t wcprec, _Out_ LDRColorA& out)
  356. {
  357. const int* aWeights = nullptr;
  358. switch (wcprec)
  359. {
  360. case 2: aWeights = g_aWeights2; assert(wc < 4); _Analysis_assume_(wc < 4); break;
  361. case 3: aWeights = g_aWeights3; assert(wc < 8); _Analysis_assume_(wc < 8); break;
  362. case 4: aWeights = g_aWeights4; assert(wc < 16); _Analysis_assume_(wc < 16); break;
  363. default: assert(false); out.r = out.g = out.b = 0; return;
  364. }
  365. out.r = uint8_t((uint32_t(c0.r) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.r) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
  366. out.g = uint8_t((uint32_t(c0.g) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.g) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
  367. out.b = uint8_t((uint32_t(c0.b) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.b) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
  368. }
  369. static void InterpolateA(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wa, _In_range_(2, 4) _In_ size_t waprec, _Out_ LDRColorA& out)
  370. {
  371. const int* aWeights = nullptr;
  372. switch (waprec)
  373. {
  374. case 2: aWeights = g_aWeights2; assert(wa < 4); _Analysis_assume_(wa < 4); break;
  375. case 3: aWeights = g_aWeights3; assert(wa < 8); _Analysis_assume_(wa < 8); break;
  376. case 4: aWeights = g_aWeights4; assert(wa < 16); _Analysis_assume_(wa < 16); break;
  377. default: assert(false); out.a = 0; return;
  378. }
  379. out.a = uint8_t((uint32_t(c0.a) * uint32_t(BC67_WEIGHT_MAX - aWeights[wa]) + uint32_t(c1.a) * uint32_t(aWeights[wa]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
  380. }
  381. static void Interpolate(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wa, _In_ _In_range_(2, 4) size_t wcprec, _In_ _In_range_(2, 4) size_t waprec, _Out_ LDRColorA& out)
  382. {
  383. InterpolateRGB(c0, c1, wc, wcprec, out);
  384. InterpolateA(c0, c1, wa, waprec, out);
  385. }
  386. };
  387. static_assert(sizeof(LDRColorA) == 4, "Unexpected packing");
  388. struct LDREndPntPair
  389. {
  390. LDRColorA A;
  391. LDRColorA B;
  392. };
  393. inline HDRColorA::HDRColorA(const LDRColorA& c)
  394. {
  395. r = float(c.r) * (1.0f / 255.0f);
  396. g = float(c.g) * (1.0f / 255.0f);
  397. b = float(c.b) * (1.0f / 255.0f);
  398. a = float(c.a) * (1.0f / 255.0f);
  399. }
  400. inline HDRColorA& HDRColorA::operator = (const LDRColorA& c)
  401. {
  402. r = (float)c.r;
  403. g = (float)c.g;
  404. b = (float)c.b;
  405. a = (float)c.a;
  406. return *this;
  407. }
  408. inline LDRColorA HDRColorA::ToLDRColorA() const
  409. {
  410. return LDRColorA((uint8_t)(r + 0.01f), (uint8_t)(g + 0.01f), (uint8_t)(b + 0.01f), (uint8_t)(a + 0.01f));
  411. }
  412. }
  413. namespace
  414. {
  415. class INTColor
  416. {
  417. public:
  418. int r, g, b;
  419. int pad;
  420. public:
  421. INTColor() = default;
  422. INTColor(int nr, int ng, int nb) { r = nr; g = ng; b = nb; }
  423. INTColor(const INTColor& c) { r = c.r; g = c.g; b = c.b; }
  424. INTColor operator - (_In_ const INTColor& c) const
  425. {
  426. return INTColor(r - c.r, g - c.g, b - c.b);
  427. }
  428. INTColor& operator += (_In_ const INTColor& c)
  429. {
  430. r += c.r;
  431. g += c.g;
  432. b += c.b;
  433. return *this;
  434. }
  435. INTColor& operator -= (_In_ const INTColor& c)
  436. {
  437. r -= c.r;
  438. g -= c.g;
  439. b -= c.b;
  440. return *this;
  441. }
  442. INTColor& operator &= (_In_ const INTColor& c)
  443. {
  444. r &= c.r;
  445. g &= c.g;
  446. b &= c.b;
  447. return *this;
  448. }
  449. int& operator [] (_In_ uint8_t i)
  450. {
  451. assert(i < sizeof(INTColor) / sizeof(int));
  452. _Analysis_assume_(i < sizeof(INTColor) / sizeof(int));
  453. return ((int*) this)[i];
  454. }
  455. void Set(_In_ const HDRColorA& c, _In_ bool bSigned)
  456. {
  457. PackedVector::XMHALF4 aF16;
  458. XMVECTOR v = XMLoadFloat4((const XMFLOAT4*)& c);
  459. XMStoreHalf4(&aF16, v);
  460. r = F16ToINT(aF16.x, bSigned);
  461. g = F16ToINT(aF16.y, bSigned);
  462. b = F16ToINT(aF16.z, bSigned);
  463. }
  464. INTColor& Clamp(_In_ int iMin, _In_ int iMax)
  465. {
  466. r = std::min<int>(iMax, std::max<int>(iMin, r));
  467. g = std::min<int>(iMax, std::max<int>(iMin, g));
  468. b = std::min<int>(iMax, std::max<int>(iMin, b));
  469. return *this;
  470. }
  471. INTColor& SignExtend(_In_ const LDRColorA& Prec)
  472. {
  473. r = SIGN_EXTEND(r, Prec.r);
  474. g = SIGN_EXTEND(g, Prec.g);
  475. b = SIGN_EXTEND(b, Prec.b);
  476. return *this;
  477. }
  478. void ToF16(_Out_writes_(3) PackedVector::HALF aF16[3], _In_ bool bSigned) const
  479. {
  480. aF16[0] = INT2F16(r, bSigned);
  481. aF16[1] = INT2F16(g, bSigned);
  482. aF16[2] = INT2F16(b, bSigned);
  483. }
  484. private:
  485. static int F16ToINT(_In_ const PackedVector::HALF& f, _In_ bool bSigned)
  486. {
  487. uint16_t input = *((const uint16_t*)&f);
  488. int out, s;
  489. if (bSigned)
  490. {
  491. s = input & F16S_MASK;
  492. input &= F16EM_MASK;
  493. if (input > F16MAX) out = F16MAX;
  494. else out = input;
  495. out = s ? -out : out;
  496. }
  497. else
  498. {
  499. if (input & F16S_MASK) out = 0;
  500. else out = input;
  501. }
  502. return out;
  503. }
  504. static PackedVector::HALF INT2F16(_In_ int input, _In_ bool bSigned)
  505. {
  506. PackedVector::HALF h;
  507. uint16_t out;
  508. if (bSigned)
  509. {
  510. int s = 0;
  511. if (input < 0)
  512. {
  513. s = F16S_MASK;
  514. input = -input;
  515. }
  516. out = uint16_t(s | input);
  517. }
  518. else
  519. {
  520. assert(input >= 0 && input <= F16MAX);
  521. out = (uint16_t)input;
  522. }
  523. *((uint16_t*)&h) = out;
  524. return h;
  525. }
  526. };
  527. static_assert(sizeof(INTColor) == 16, "Unexpected packing");
  528. struct INTEndPntPair
  529. {
  530. INTColor A;
  531. INTColor B;
  532. };
  533. template< size_t SizeInBytes >
  534. class CBits
  535. {
  536. public:
  537. uint8_t GetBit(_Inout_ size_t& uStartBit) const
  538. {
  539. assert(uStartBit < 128);
  540. _Analysis_assume_(uStartBit < 128);
  541. size_t uIndex = uStartBit >> 3;
  542. uint8_t ret = (m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01;
  543. uStartBit++;
  544. return ret;
  545. }
  546. uint8_t GetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits) const
  547. {
  548. if (uNumBits == 0) return 0;
  549. assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
  550. _Analysis_assume_(uStartBit + uNumBits <= 128 && uNumBits <= 8);
  551. uint8_t ret;
  552. size_t uIndex = uStartBit >> 3;
  553. size_t uBase = uStartBit - (uIndex << 3);
  554. if (uBase + uNumBits > 8)
  555. {
  556. size_t uFirstIndexBits = 8 - uBase;
  557. size_t uNextIndexBits = uNumBits - uFirstIndexBits;
  558. ret = (m_uBits[uIndex] >> uBase) | ((m_uBits[uIndex + 1] & ((1 << uNextIndexBits) - 1)) << uFirstIndexBits);
  559. }
  560. else
  561. {
  562. ret = (m_uBits[uIndex] >> uBase) & ((1 << uNumBits) - 1);
  563. }
  564. assert(ret < (1 << uNumBits));
  565. uStartBit += uNumBits;
  566. return ret;
  567. }
  568. void SetBit(_Inout_ size_t& uStartBit, _In_ uint8_t uValue)
  569. {
  570. assert(uStartBit < 128 && uValue < 2);
  571. _Analysis_assume_(uStartBit < 128 && uValue < 2);
  572. size_t uIndex = uStartBit >> 3;
  573. size_t uBase = uStartBit - (uIndex << 3);
  574. m_uBits[uIndex] &= ~(1 << uBase);
  575. m_uBits[uIndex] |= uValue << uBase;
  576. uStartBit++;
  577. }
  578. void SetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits, _In_ uint8_t uValue)
  579. {
  580. if (uNumBits == 0)
  581. return;
  582. assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
  583. _Analysis_assume_(uStartBit + uNumBits <= 128 && uNumBits <= 8);
  584. assert(uValue < (1 << uNumBits));
  585. size_t uIndex = uStartBit >> 3;
  586. size_t uBase = uStartBit - (uIndex << 3);
  587. if (uBase + uNumBits > 8)
  588. {
  589. size_t uFirstIndexBits = 8 - uBase;
  590. size_t uNextIndexBits = uNumBits - uFirstIndexBits;
  591. m_uBits[uIndex] &= ~(((1 << uFirstIndexBits) - 1) << uBase);
  592. m_uBits[uIndex] |= uValue << uBase;
  593. m_uBits[uIndex + 1] &= ~((1 << uNextIndexBits) - 1);
  594. m_uBits[uIndex + 1] |= uValue >> uFirstIndexBits;
  595. }
  596. else
  597. {
  598. m_uBits[uIndex] &= ~(((1 << uNumBits) - 1) << uBase);
  599. m_uBits[uIndex] |= uValue << uBase;
  600. }
  601. uStartBit += uNumBits;
  602. }
  603. private:
  604. uint8_t m_uBits[SizeInBytes];
  605. };
  606. // BC6H compression (16 bits per texel)
  607. class D3DX_BC6H : private CBits< 16 >
  608. {
  609. public:
  610. void Decode(_In_ bool bSigned, _Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
  611. void Encode(_In_ bool bSigned, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
  612. private:
  613. #pragma warning(push)
  614. #pragma warning(disable : 4480)
  615. enum EField : uint8_t
  616. {
  617. NA, // N/A
  618. M, // Mode
  619. D, // Shape
  620. RW,
  621. RX,
  622. RY,
  623. RZ,
  624. GW,
  625. GX,
  626. GY,
  627. GZ,
  628. BW,
  629. BX,
  630. BY,
  631. BZ,
  632. };
  633. #pragma warning(pop)
  634. struct ModeDescriptor
  635. {
  636. EField m_eField;
  637. uint8_t m_uBit;
  638. };
  639. struct ModeInfo
  640. {
  641. uint8_t uMode;
  642. uint8_t uPartitions;
  643. bool bTransformed;
  644. uint8_t uIndexPrec;
  645. LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2];
  646. };
  647. #pragma warning(push)
  648. #pragma warning(disable : 4512)
  649. struct EncodeParams
  650. {
  651. float fBestErr;
  652. const bool bSigned;
  653. uint8_t uMode;
  654. uint8_t uShape;
  655. const HDRColorA* const aHDRPixels;
  656. INTEndPntPair aUnqEndPts[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS];
  657. INTColor aIPixels[NUM_PIXELS_PER_BLOCK];
  658. EncodeParams(const HDRColorA* const aOriginal, bool bSignedFormat) :
  659. fBestErr(FLT_MAX), bSigned(bSignedFormat), aHDRPixels(aOriginal)
  660. {
  661. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  662. {
  663. aIPixels[i].Set(aOriginal[i], bSigned);
  664. }
  665. }
  666. };
  667. #pragma warning(pop)
  668. static int Quantize(_In_ int iValue, _In_ int prec, _In_ bool bSigned);
  669. static int Unquantize(_In_ int comp, _In_ uint8_t uBitsPerComp, _In_ bool bSigned);
  670. static int FinishUnquantize(_In_ int comp, _In_ bool bSigned);
  671. static bool EndPointsFit(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[]);
  672. void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ const INTEndPntPair& endPts,
  673. _Out_writes_(BC6H_MAX_INDICES) INTColor aPalette[]) const;
  674. float MapColorsQuantized(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ const INTEndPntPair &endPts) const;
  675. float PerturbOne(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ uint8_t ch,
  676. _In_ const INTEndPntPair& oldEndPts, _Out_ INTEndPntPair& newEndPts, _In_ float fOldErr, _In_ int do_b) const;
  677. void OptimizeOne(_In_ const EncodeParams* pEP, _In_reads_(np) const INTColor aColors[], _In_ size_t np, _In_ float aOrgErr,
  678. _In_ const INTEndPntPair &aOrgEndPts, _Out_ INTEndPntPair &aOptEndPts) const;
  679. void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const float aOrgErr[],
  680. _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aOrgEndPts[],
  681. _Out_writes_all_(BC6H_MAX_REGIONS) INTEndPntPair aOptEndPts[]) const;
  682. static void SwapIndices(_In_ const EncodeParams* pEP, _Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[],
  683. _In_reads_(NUM_PIXELS_PER_BLOCK) size_t aIndices[]);
  684. void AssignIndices(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
  685. _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices[],
  686. _Out_writes_(BC6H_MAX_REGIONS) float aTotErr[]) const;
  687. void QuantizeEndPts(_In_ const EncodeParams* pEP, _Out_writes_(BC6H_MAX_REGIONS) INTEndPntPair* qQntEndPts) const;
  688. void EmitBlock(_In_ const EncodeParams* pEP, _In_reads_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
  689. _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndices[]);
  690. void Refine(_Inout_ EncodeParams* pEP);
  691. static void GeneratePaletteUnquantized(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _Out_writes_(BC6H_MAX_INDICES) INTColor aPalette[]);
  692. float MapColors(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _In_ size_t np, _In_reads_(np) const size_t* auIndex) const;
  693. float RoughMSE(_Inout_ EncodeParams* pEP) const;
  694. private:
  695. const static ModeDescriptor ms_aDesc[][82];
  696. const static ModeInfo ms_aInfo[];
  697. const static int ms_aModeToInfo[];
  698. };
  699. // BC67 compression (16b bits per texel)
  700. class D3DX_BC7 : private CBits< 16 >
  701. {
  702. public:
  703. void Decode(Color (&pOut)[4][4]) const; // ESENTHEL CHANGED
  704. void Encode(DWORD flags, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
  705. private:
  706. struct ModeInfo
  707. {
  708. uint8_t uPartitions;
  709. uint8_t uPartitionBits;
  710. uint8_t uPBits;
  711. uint8_t uRotationBits;
  712. uint8_t uIndexModeBits;
  713. uint8_t uIndexPrec;
  714. uint8_t uIndexPrec2;
  715. LDRColorA RGBAPrec;
  716. LDRColorA RGBAPrecWithP;
  717. };
  718. #pragma warning(push)
  719. #pragma warning(disable : 4512)
  720. struct EncodeParams
  721. {
  722. uint8_t uMode;
  723. LDREndPntPair aEndPts[BC7_MAX_SHAPES][BC7_MAX_REGIONS];
  724. LDRColorA aLDRPixels[NUM_PIXELS_PER_BLOCK];
  725. const HDRColorA* const aHDRPixels;
  726. EncodeParams(const HDRColorA* const aOriginal) : aHDRPixels(aOriginal) {}
  727. };
  728. #pragma warning(pop)
  729. static uint8_t Quantize(_In_ uint8_t comp, _In_ uint8_t uPrec)
  730. {
  731. assert(0 < uPrec && uPrec <= 8);
  732. uint8_t rnd = (uint8_t)std::min<uint16_t>(255, uint16_t(comp) + (1 << (7 - uPrec)));
  733. return rnd >> (8 - uPrec);
  734. }
  735. static LDRColorA Quantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
  736. {
  737. LDRColorA q;
  738. q.r = Quantize(c.r, RGBAPrec.r);
  739. q.g = Quantize(c.g, RGBAPrec.g);
  740. q.b = Quantize(c.b, RGBAPrec.b);
  741. if (RGBAPrec.a)
  742. q.a = Quantize(c.a, RGBAPrec.a);
  743. else
  744. q.a = 255;
  745. return q;
  746. }
  747. static uint8_t Unquantize(_In_ uint8_t comp, _In_ size_t uPrec)
  748. {
  749. assert(0 < uPrec && uPrec <= 8);
  750. comp = comp << (8 - uPrec);
  751. return comp | (comp >> uPrec);
  752. }
  753. static LDRColorA Unquantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
  754. {
  755. LDRColorA q;
  756. q.r = Unquantize(c.r, RGBAPrec.r);
  757. q.g = Unquantize(c.g, RGBAPrec.g);
  758. q.b = Unquantize(c.b, RGBAPrec.b);
  759. q.a = RGBAPrec.a > 0 ? Unquantize(c.a, RGBAPrec.a) : 255;
  760. return q;
  761. }
  762. void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ size_t uIndexMode, _In_ const LDREndPntPair& endpts,
  763. _Out_writes_(BC7_MAX_INDICES) LDRColorA aPalette[]) const;
  764. float PerturbOne(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
  765. _In_ size_t ch, _In_ const LDREndPntPair &old_endpts,
  766. _Out_ LDREndPntPair &new_endpts, _In_ float old_err, _In_ uint8_t do_b) const;
  767. void Exhaustive(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
  768. _In_ size_t ch, _Inout_ float& fOrgErr, _Inout_ LDREndPntPair& optEndPt) const;
  769. void OptimizeOne(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
  770. _In_ float orig_err, _In_ const LDREndPntPair &orig_endpts, _Out_ LDREndPntPair &opt_endpts) const;
  771. void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
  772. _In_reads_(BC7_MAX_REGIONS) const float orig_err[],
  773. _In_reads_(BC7_MAX_REGIONS) const LDREndPntPair orig_endpts[],
  774. _Out_writes_(BC7_MAX_REGIONS) LDREndPntPair opt_endpts[]) const;
  775. void AssignIndices(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
  776. _In_reads_(BC7_MAX_REGIONS) LDREndPntPair endpts[],
  777. _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], _Out_writes_(NUM_PIXELS_PER_BLOCK) size_t aIndices2[],
  778. _Out_writes_(BC7_MAX_REGIONS) float afTotErr[]) const;
  779. void EmitBlock(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode,
  780. _In_reads_(BC7_MAX_REGIONS) const LDREndPntPair aEndPts[],
  781. _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndex[],
  782. _In_reads_(NUM_PIXELS_PER_BLOCK) const size_t aIndex2[]);
  783. float Refine(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode);
  784. float MapColors(_In_ const EncodeParams* pEP, _In_reads_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
  785. _In_ const LDREndPntPair& endPts, _In_ float fMinErr) const;
  786. static float RoughMSE(_Inout_ EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode);
  787. private:
  788. const static ModeInfo ms_aInfo[];
  789. };
  790. }
  791. // BC6H Compression
  792. const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] =
  793. {
  794. { // Mode 1 (0x00) - 10 5 5 5
  795. { M, 0}, { M, 1}, {GY, 4}, {BY, 4}, {BZ, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  796. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  797. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  798. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  799. {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  800. {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  801. {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
  802. {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
  803. { D, 3}, { D, 4},
  804. },
  805. { // Mode 2 (0x01) - 7 6 6 6
  806. { M, 0}, { M, 1}, {GY, 5}, {GZ, 4}, {GZ, 5}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  807. {RW, 5}, {RW, 6}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  808. {GW, 5}, {GW, 6}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  809. {BW, 5}, {BW, 6}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  810. {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  811. {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  812. {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
  813. {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
  814. { D, 3}, { D, 4},
  815. },
  816. { // Mode 3 (0x02) - 11 5 4 4
  817. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  818. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  819. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  820. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  821. {RW,10}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
  822. {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
  823. {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
  824. {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
  825. { D, 3}, { D, 4},
  826. },
  827. { // Mode 4 (0x06) - 11 4 5 4
  828. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  829. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  830. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  831. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
  832. {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  833. {GW,10}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
  834. {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 0},
  835. {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {GY, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
  836. { D, 3}, { D, 4},
  837. },
  838. { // Mode 5 (0x0a) - 11 4 4 5
  839. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  840. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  841. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  842. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
  843. {BY, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
  844. {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  845. {BW,10}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 1},
  846. {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {BZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
  847. { D, 3}, { D, 4},
  848. },
  849. { // Mode 6 (0x0e) - 9 5 5 5
  850. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  851. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  852. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  853. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  854. {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  855. {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  856. {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
  857. {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
  858. { D, 3}, { D, 4},
  859. },
  860. { // Mode 7 (0x12) - 8 6 5 5
  861. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  862. {RW, 5}, {RW, 6}, {RW, 7}, {GZ, 4}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  863. {GW, 5}, {GW, 6}, {GW, 7}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  864. {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 3}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  865. {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  866. {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  867. {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
  868. {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
  869. { D, 3}, { D, 4},
  870. },
  871. { // Mode 8 (0x16) - 8 5 6 5
  872. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  873. {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 0}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  874. {GW, 5}, {GW, 6}, {GW, 7}, {GY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  875. {BW, 5}, {BW, 6}, {BW, 7}, {GZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  876. {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  877. {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  878. {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
  879. {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
  880. { D, 3}, { D, 4},
  881. },
  882. { // Mode 9 (0x1a) - 8 5 5 6
  883. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  884. {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  885. {GW, 5}, {GW, 6}, {GW, 7}, {BY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  886. {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  887. {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  888. {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  889. {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
  890. {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
  891. { D, 3}, { D, 4},
  892. },
  893. { // Mode 10 (0x1e) - 6 6 6 6
  894. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  895. {RW, 5}, {GZ, 4}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  896. {GW, 5}, {GY, 5}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  897. {BW, 5}, {GZ, 5}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  898. {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  899. {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  900. {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
  901. {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
  902. { D, 3}, { D, 4},
  903. },
  904. { // Mode 11 (0x03) - 10 10
  905. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  906. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  907. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  908. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  909. {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RX, 9}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  910. {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GX, 9}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  911. {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BX, 9}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
  912. {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
  913. {NA, 0}, {NA, 0},
  914. },
  915. { // Mode 12 (0x07) - 11 9
  916. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  917. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  918. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  919. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  920. {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  921. {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  922. {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
  923. {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
  924. {NA, 0}, {NA, 0},
  925. },
  926. { // Mode 13 (0x0b) - 12 8
  927. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  928. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  929. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  930. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
  931. {RX, 5}, {RX, 6}, {RX, 7}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
  932. {GX, 5}, {GX, 6}, {GX, 7}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
  933. {BX, 5}, {BX, 6}, {BX, 7}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
  934. {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
  935. {NA, 0}, {NA, 0},
  936. },
  937. { // Mode 14 (0x0f) - 16 4
  938. { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
  939. {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
  940. {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
  941. {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,15},
  942. {RW,14}, {RW,13}, {RW,12}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,15},
  943. {GW,14}, {GW,13}, {GW,12}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,15},
  944. {BW,14}, {BW,13}, {BW,12}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
  945. {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
  946. {NA, 0}, {NA, 0},
  947. },
  948. };
  949. // Mode, Partitions, Transformed, IndexPrec, RGBAPrec
  950. const D3DX_BC6H::ModeInfo D3DX_BC6H::ms_aInfo[] =
  951. {
  952. {0x00, 1, true, 3, LDRColorA(10,10,10,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 1
  953. {0x01, 1, true, 3, LDRColorA( 7, 7, 7,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 2
  954. {0x02, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 5, 4, 4,0), LDRColorA(5,4,4,0), LDRColorA(5,4,4,0)}, // Mode 3
  955. {0x06, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 5, 4,0), LDRColorA(4,5,4,0), LDRColorA(4,5,4,0)}, // Mode 4
  956. {0x0a, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 4, 5,0), LDRColorA(4,4,5,0), LDRColorA(4,4,5,0)}, // Mode 5
  957. {0x0e, 1, true, 3, LDRColorA( 9, 9, 9,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 6
  958. {0x12, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 6, 5, 5,0), LDRColorA(6,5,5,0), LDRColorA(6,5,5,0)}, // Mode 7
  959. {0x16, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 6, 5,0), LDRColorA(5,6,5,0), LDRColorA(5,6,5,0)}, // Mode 8
  960. {0x1a, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 5, 6,0), LDRColorA(5,5,6,0), LDRColorA(5,5,6,0)}, // Mode 9
  961. {0x1e, 1, false, 3, LDRColorA( 6, 6, 6,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 10
  962. {0x03, 0, false, 4, LDRColorA(10,10,10,0), LDRColorA(10,10,10,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 11
  963. {0x07, 0, true, 4, LDRColorA(11,11,11,0), LDRColorA( 9, 9, 9,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 12
  964. {0x0b, 0, true, 4, LDRColorA(12,12,12,0), LDRColorA( 8, 8, 8,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 13
  965. {0x0f, 0, true, 4, LDRColorA(16,16,16,0), LDRColorA( 4, 4, 4,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 14
  966. };
  967. const int D3DX_BC6H::ms_aModeToInfo[] =
  968. {
  969. 0, // Mode 1 - 0x00
  970. 1, // Mode 2 - 0x01
  971. 2, // Mode 3 - 0x02
  972. 10, // Mode 11 - 0x03
  973. -1, // Invalid - 0x04
  974. -1, // Invalid - 0x05
  975. 3, // Mode 4 - 0x06
  976. 11, // Mode 12 - 0x07
  977. -1, // Invalid - 0x08
  978. -1, // Invalid - 0x09
  979. 4, // Mode 5 - 0x0a
  980. 12, // Mode 13 - 0x0b
  981. -1, // Invalid - 0x0c
  982. -1, // Invalid - 0x0d
  983. 5, // Mode 6 - 0x0e
  984. 13, // Mode 14 - 0x0f
  985. -1, // Invalid - 0x10
  986. -1, // Invalid - 0x11
  987. 6, // Mode 7 - 0x12
  988. -1, // Reserved - 0x13
  989. -1, // Invalid - 0x14
  990. -1, // Invalid - 0x15
  991. 7, // Mode 8 - 0x16
  992. -1, // Reserved - 0x17
  993. -1, // Invalid - 0x18
  994. -1, // Invalid - 0x19
  995. 8, // Mode 9 - 0x1a
  996. -1, // Reserved - 0x1b
  997. -1, // Invalid - 0x1c
  998. -1, // Invalid - 0x1d
  999. 9, // Mode 10 - 0x1e
  1000. -1, // Resreved - 0x1f
  1001. };
  1002. // BC7 compression: uPartitions, uPartitionBits, uPBits, uRotationBits, uIndexModeBits, uIndexPrec, uIndexPrec2, RGBAPrec, RGBAPrecWithP
  1003. const D3DX_BC7::ModeInfo D3DX_BC7::ms_aInfo[] =
  1004. {
  1005. {2, 4, 6, 0, 0, 3, 0, LDRColorA(4,4,4,0), LDRColorA(5,5,5,0)},
  1006. // Mode 0: Color only, 3 Subsets, RGBP 4441 (unique P-bit), 3-bit indecies, 16 partitions
  1007. {1, 6, 2, 0, 0, 3, 0, LDRColorA(6,6,6,0), LDRColorA(7,7,7,0)},
  1008. // Mode 1: Color only, 2 Subsets, RGBP 6661 (shared P-bit), 3-bit indecies, 64 partitions
  1009. {2, 6, 0, 0, 0, 2, 0, LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)},
  1010. // Mode 2: Color only, 3 Subsets, RGB 555, 2-bit indecies, 64 partitions
  1011. {1, 6, 4, 0, 0, 2, 0, LDRColorA(7,7,7,0), LDRColorA(8,8,8,0)},
  1012. // Mode 3: Color only, 2 Subsets, RGBP 7771 (unique P-bit), 2-bits indecies, 64 partitions
  1013. {0, 0, 0, 2, 1, 2, 3, LDRColorA(5,5,5,6), LDRColorA(5,5,5,6)},
  1014. // Mode 4: Color w/ Separate Alpha, 1 Subset, RGB 555, A6, 16x2/16x3-bit indices, 2-bit rotation, 1-bit index selector
  1015. {0, 0, 0, 2, 0, 2, 2, LDRColorA(7,7,7,8), LDRColorA(7,7,7,8)},
  1016. // Mode 5: Color w/ Separate Alpha, 1 Subset, RGB 777, A8, 16x2/16x2-bit indices, 2-bit rotation
  1017. {0, 0, 2, 0, 0, 4, 0, LDRColorA(7,7,7,7), LDRColorA(8,8,8,8)},
  1018. // Mode 6: Color+Alpha, 1 Subset, RGBAP 77771 (unique P-bit), 16x4-bit indecies
  1019. {1, 6, 4, 0, 0, 2, 0, LDRColorA(5,5,5,5), LDRColorA(6,6,6,6)}
  1020. // Mode 7: Color+Alpha, 2 Subsets, RGBAP 55551 (unique P-bit), 2-bit indices, 64 partitions
  1021. };
  1022. namespace
  1023. {
  1024. //-------------------------------------------------------------------------------------
  1025. // Helper functions
  1026. //-------------------------------------------------------------------------------------
  1027. inline bool IsFixUpOffset(_In_range_(0, 2) size_t uPartitions, _In_range_(0, 63) size_t uShape, _In_range_(0, 15) size_t uOffset)
  1028. {
  1029. assert(uPartitions < 3 && uShape < 64 && uOffset < 16);
  1030. _Analysis_assume_(uPartitions < 3 && uShape < 64 && uOffset < 16);
  1031. for (size_t p = 0; p <= uPartitions; p++)
  1032. {
  1033. if (uOffset == g_aFixUp[uPartitions][uShape][p])
  1034. {
  1035. return true;
  1036. }
  1037. }
  1038. return false;
  1039. }
  1040. inline void TransformForward(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[])
  1041. {
  1042. aEndPts[0].B -= aEndPts[0].A;
  1043. aEndPts[1].A -= aEndPts[0].A;
  1044. aEndPts[1].B -= aEndPts[0].A;
  1045. }
  1046. inline void TransformInverse(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], _In_ const LDRColorA& Prec, _In_ bool bSigned)
  1047. {
  1048. INTColor WrapMask((1 << Prec.r) - 1, (1 << Prec.g) - 1, (1 << Prec.b) - 1);
  1049. aEndPts[0].B += aEndPts[0].A; aEndPts[0].B &= WrapMask;
  1050. aEndPts[1].A += aEndPts[0].A; aEndPts[1].A &= WrapMask;
  1051. aEndPts[1].B += aEndPts[0].A; aEndPts[1].B &= WrapMask;
  1052. if (bSigned)
  1053. {
  1054. aEndPts[0].B.SignExtend(Prec);
  1055. aEndPts[1].A.SignExtend(Prec);
  1056. aEndPts[1].B.SignExtend(Prec);
  1057. }
  1058. }
  1059. inline float Norm(_In_ const INTColor& a, _In_ const INTColor& b)
  1060. {
  1061. float dr = float(a.r) - float(b.r);
  1062. float dg = float(a.g) - float(b.g);
  1063. float db = float(a.b) - float(b.b);
  1064. return dr * dr + dg * dg + db * db;
  1065. }
  1066. // return # of bits needed to store n. handle signed or unsigned cases properly
  1067. inline int NBits(_In_ int n, _In_ bool bIsSigned)
  1068. {
  1069. int nb;
  1070. if (n == 0)
  1071. {
  1072. return 0; // no bits needed for 0, signed or not
  1073. }
  1074. else if(n > 0)
  1075. {
  1076. for(nb = 0; n; ++nb, n >>= 1);
  1077. return nb + (bIsSigned ? 1 : 0);
  1078. }
  1079. else
  1080. {
  1081. assert(bIsSigned);
  1082. for(nb = 0; n < -1; ++nb, n >>= 1) ;
  1083. return nb + 1;
  1084. }
  1085. }
  1086. //-------------------------------------------------------------------------------------
  1087. float OptimizeRGB(
  1088. _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
  1089. _Out_ HDRColorA* pX,
  1090. _Out_ HDRColorA* pY,
  1091. _In_range_(3, 4) size_t cSteps,
  1092. size_t cPixels,
  1093. _In_reads_(cPixels) const size_t* pIndex)
  1094. {
  1095. float fError = FLT_MAX;
  1096. const float *pC = (3 == cSteps) ? pC3 : pC4;
  1097. const float *pD = (3 == cSteps) ? pD3 : pD4;
  1098. // Find Min and Max points, as starting point
  1099. HDRColorA X(1.0f, 1.0f, 1.0f, 0.0f);
  1100. HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
  1101. for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
  1102. {
  1103. if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
  1104. if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
  1105. if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
  1106. if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
  1107. if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
  1108. if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
  1109. }
  1110. // Diagonal axis
  1111. HDRColorA AB;
  1112. AB.r = Y.r - X.r;
  1113. AB.g = Y.g - X.g;
  1114. AB.b = Y.b - X.b;
  1115. float fAB = AB.r * AB.r + AB.g * AB.g + AB.b * AB.b;
  1116. // Single color block.. no need to root-find
  1117. if(fAB < FLT_MIN)
  1118. {
  1119. pX->r = X.r; pX->g = X.g; pX->b = X.b;
  1120. pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
  1121. return 0.0f;
  1122. }
  1123. // Try all four axis directions, to determine which diagonal best fits data
  1124. float fABInv = 1.0f / fAB;
  1125. HDRColorA Dir;
  1126. Dir.r = AB.r * fABInv;
  1127. Dir.g = AB.g * fABInv;
  1128. Dir.b = AB.b * fABInv;
  1129. HDRColorA Mid;
  1130. Mid.r = (X.r + Y.r) * 0.5f;
  1131. Mid.g = (X.g + Y.g) * 0.5f;
  1132. Mid.b = (X.b + Y.b) * 0.5f;
  1133. float fDir[4];
  1134. fDir[0] = fDir[1] = fDir[2] = fDir[3] = 0.0f;
  1135. for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
  1136. {
  1137. HDRColorA Pt;
  1138. Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
  1139. Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
  1140. Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
  1141. float f;
  1142. f = Pt.r + Pt.g + Pt.b; fDir[0] += f * f;
  1143. f = Pt.r + Pt.g - Pt.b; fDir[1] += f * f;
  1144. f = Pt.r - Pt.g + Pt.b; fDir[2] += f * f;
  1145. f = Pt.r - Pt.g - Pt.b; fDir[3] += f * f;
  1146. }
  1147. float fDirMax = fDir[0];
  1148. size_t iDirMax = 0;
  1149. for(size_t iDir = 1; iDir < 4; iDir++)
  1150. {
  1151. if(fDir[iDir] > fDirMax)
  1152. {
  1153. fDirMax = fDir[iDir];
  1154. iDirMax = iDir;
  1155. }
  1156. }
  1157. if (iDirMax & 2) std::swap(X.g, Y.g);
  1158. if (iDirMax & 1) std::swap(X.b, Y.b);
  1159. // Two color block.. no need to root-find
  1160. if (fAB < 1.0f / 4096.0f)
  1161. {
  1162. pX->r = X.r; pX->g = X.g; pX->b = X.b;
  1163. pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
  1164. return 0.0f;
  1165. }
  1166. // Use Newton's Method to find local minima of sum-of-squares error.
  1167. float fSteps = (float) (cSteps - 1);
  1168. for(size_t iIteration = 0; iIteration < 8; iIteration++)
  1169. {
  1170. // Calculate new steps
  1171. HDRColorA pSteps[4] = {};
  1172. for (size_t iStep = 0; iStep < cSteps; iStep++)
  1173. {
  1174. pSteps[iStep].r = X.r * pC[iStep] + Y.r * pD[iStep];
  1175. pSteps[iStep].g = X.g * pC[iStep] + Y.g * pD[iStep];
  1176. pSteps[iStep].b = X.b * pC[iStep] + Y.b * pD[iStep];
  1177. }
  1178. // Calculate color direction
  1179. Dir.r = Y.r - X.r;
  1180. Dir.g = Y.g - X.g;
  1181. Dir.b = Y.b - X.b;
  1182. float fLen = (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b);
  1183. if(fLen < (1.0f / 4096.0f))
  1184. break;
  1185. float fScale = fSteps / fLen;
  1186. Dir.r *= fScale;
  1187. Dir.g *= fScale;
  1188. Dir.b *= fScale;
  1189. // Evaluate function, and derivatives
  1190. float d2X = 0.0f, d2Y = 0.0f;
  1191. HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
  1192. for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
  1193. {
  1194. float fDot = (pPoints[pIndex[iPoint]].r - X.r) * Dir.r +
  1195. (pPoints[pIndex[iPoint]].g - X.g) * Dir.g +
  1196. (pPoints[pIndex[iPoint]].b - X.b) * Dir.b;
  1197. size_t iStep;
  1198. if(fDot <= 0.0f)
  1199. iStep = 0;
  1200. if(fDot >= fSteps)
  1201. iStep = cSteps - 1;
  1202. else
  1203. iStep = size_t(fDot + 0.5f);
  1204. HDRColorA Diff;
  1205. Diff.r = pSteps[iStep].r - pPoints[pIndex[iPoint]].r;
  1206. Diff.g = pSteps[iStep].g - pPoints[pIndex[iPoint]].g;
  1207. Diff.b = pSteps[iStep].b - pPoints[pIndex[iPoint]].b;
  1208. float fC = pC[iStep] * (1.0f / 8.0f);
  1209. float fD = pD[iStep] * (1.0f / 8.0f);
  1210. d2X += fC * pC[iStep];
  1211. dX.r += fC * Diff.r;
  1212. dX.g += fC * Diff.g;
  1213. dX.b += fC * Diff.b;
  1214. d2Y += fD * pD[iStep];
  1215. dY.r += fD * Diff.r;
  1216. dY.g += fD * Diff.g;
  1217. dY.b += fD * Diff.b;
  1218. }
  1219. // Move endpoints
  1220. if(d2X > 0.0f)
  1221. {
  1222. float f = -1.0f / d2X;
  1223. X.r += dX.r * f;
  1224. X.g += dX.g * f;
  1225. X.b += dX.b * f;
  1226. }
  1227. if(d2Y > 0.0f)
  1228. {
  1229. float f = -1.0f / d2Y;
  1230. Y.r += dY.r * f;
  1231. Y.g += dY.g * f;
  1232. Y.b += dY.b * f;
  1233. }
  1234. if((dX.r * dX.r < fEpsilon) && (dX.g * dX.g < fEpsilon) && (dX.b * dX.b < fEpsilon) &&
  1235. (dY.r * dY.r < fEpsilon) && (dY.g * dY.g < fEpsilon) && (dY.b * dY.b < fEpsilon))
  1236. {
  1237. break;
  1238. }
  1239. }
  1240. pX->r = X.r; pX->g = X.g; pX->b = X.b;
  1241. pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
  1242. return fError;
  1243. }
  1244. //-------------------------------------------------------------------------------------
  1245. float OptimizeRGBA(
  1246. _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
  1247. _Out_ HDRColorA* pX,
  1248. _Out_ HDRColorA* pY,
  1249. _In_range_(3, 4) size_t cSteps,
  1250. size_t cPixels,
  1251. _In_reads_(cPixels) const size_t* pIndex)
  1252. {
  1253. float fError = FLT_MAX;
  1254. const float *pC = (3 == cSteps) ? pC3 : pC4;
  1255. const float *pD = (3 == cSteps) ? pD3 : pD4;
  1256. // Find Min and Max points, as starting point
  1257. HDRColorA X(1.0f, 1.0f, 1.0f, 1.0f);
  1258. HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
  1259. for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
  1260. {
  1261. if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
  1262. if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
  1263. if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
  1264. if(pPoints[pIndex[iPoint]].a < X.a) X.a = pPoints[pIndex[iPoint]].a;
  1265. if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
  1266. if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
  1267. if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
  1268. if(pPoints[pIndex[iPoint]].a > Y.a) Y.a = pPoints[pIndex[iPoint]].a;
  1269. }
  1270. // Diagonal axis
  1271. HDRColorA AB = Y - X;
  1272. float fAB = AB * AB;
  1273. // Single color block.. no need to root-find
  1274. if(fAB < FLT_MIN)
  1275. {
  1276. *pX = X;
  1277. *pY = Y;
  1278. return 0.0f;
  1279. }
  1280. // Try all four axis directions, to determine which diagonal best fits data
  1281. float fABInv = 1.0f / fAB;
  1282. HDRColorA Dir = AB * fABInv;
  1283. HDRColorA Mid = (X + Y) * 0.5f;
  1284. float fDir[8];
  1285. fDir[0] = fDir[1] = fDir[2] = fDir[3] = fDir[4] = fDir[5] = fDir[6] = fDir[7] = 0.0f;
  1286. for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
  1287. {
  1288. HDRColorA Pt;
  1289. Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
  1290. Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
  1291. Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
  1292. Pt.a = (pPoints[pIndex[iPoint]].a - Mid.a) * Dir.a;
  1293. float f;
  1294. f = Pt.r + Pt.g + Pt.b + Pt.a; fDir[0] += f * f;
  1295. f = Pt.r + Pt.g + Pt.b - Pt.a; fDir[1] += f * f;
  1296. f = Pt.r + Pt.g - Pt.b + Pt.a; fDir[2] += f * f;
  1297. f = Pt.r + Pt.g - Pt.b - Pt.a; fDir[3] += f * f;
  1298. f = Pt.r - Pt.g + Pt.b + Pt.a; fDir[4] += f * f;
  1299. f = Pt.r - Pt.g + Pt.b - Pt.a; fDir[5] += f * f;
  1300. f = Pt.r - Pt.g - Pt.b + Pt.a; fDir[6] += f * f;
  1301. f = Pt.r - Pt.g - Pt.b - Pt.a; fDir[7] += f * f;
  1302. }
  1303. float fDirMax = fDir[0];
  1304. size_t iDirMax = 0;
  1305. for(size_t iDir = 1; iDir < 8; iDir++)
  1306. {
  1307. if(fDir[iDir] > fDirMax)
  1308. {
  1309. fDirMax = fDir[iDir];
  1310. iDirMax = iDir;
  1311. }
  1312. }
  1313. if (iDirMax & 4) std::swap(X.g, Y.g);
  1314. if (iDirMax & 2) std::swap(X.b, Y.b);
  1315. if (iDirMax & 1) std::swap(X.a, Y.a);
  1316. // Two color block.. no need to root-find
  1317. if(fAB < 1.0f / 4096.0f)
  1318. {
  1319. *pX = X;
  1320. *pY = Y;
  1321. return 0.0f;
  1322. }
  1323. // Use Newton's Method to find local minima of sum-of-squares error.
  1324. float fSteps = (float) (cSteps - 1);
  1325. for(size_t iIteration = 0; iIteration < 8 && fError > 0.0f; iIteration++)
  1326. {
  1327. // Calculate new steps
  1328. HDRColorA pSteps[BC7_MAX_INDICES];
  1329. LDRColorA lX, lY;
  1330. lX = (X * 255.0f).ToLDRColorA();
  1331. lY = (Y * 255.0f).ToLDRColorA();
  1332. for(size_t iStep = 0; iStep < cSteps; iStep++)
  1333. {
  1334. pSteps[iStep] = X * pC[iStep] + Y * pD[iStep];
  1335. //LDRColorA::Interpolate(lX, lY, i, i, wcprec, waprec, aSteps[i]);
  1336. }
  1337. // Calculate color direction
  1338. Dir = Y - X;
  1339. float fLen = Dir * Dir;
  1340. if(fLen < (1.0f / 4096.0f))
  1341. break;
  1342. float fScale = fSteps / fLen;
  1343. Dir *= fScale;
  1344. // Evaluate function, and derivatives
  1345. float d2X = 0.0f, d2Y = 0.0f;
  1346. HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
  1347. for(size_t iPoint = 0; iPoint < cPixels; ++iPoint)
  1348. {
  1349. float fDot = (pPoints[pIndex[iPoint]] - X) * Dir;
  1350. size_t iStep;
  1351. if(fDot <= 0.0f)
  1352. iStep = 0;
  1353. if(fDot >= fSteps)
  1354. iStep = cSteps - 1;
  1355. else
  1356. iStep = size_t(fDot + 0.5f);
  1357. HDRColorA Diff = pSteps[iStep] - pPoints[pIndex[iPoint]];
  1358. float fC = pC[iStep] * (1.0f / 8.0f);
  1359. float fD = pD[iStep] * (1.0f / 8.0f);
  1360. d2X += fC * pC[iStep];
  1361. dX += Diff * fC;
  1362. d2Y += fD * pD[iStep];
  1363. dY += Diff * fD;
  1364. }
  1365. // Move endpoints
  1366. if(d2X > 0.0f)
  1367. {
  1368. float f = -1.0f / d2X;
  1369. X += dX * f;
  1370. }
  1371. if(d2Y > 0.0f)
  1372. {
  1373. float f = -1.0f / d2Y;
  1374. Y += dY * f;
  1375. }
  1376. if((dX * dX < fEpsilon) && (dY * dY < fEpsilon))
  1377. break;
  1378. }
  1379. *pX = X;
  1380. *pY = Y;
  1381. return fError;
  1382. }
  1383. //-------------------------------------------------------------------------------------
  1384. float ComputeError(
  1385. _Inout_ const LDRColorA& pixel,
  1386. _In_reads_(1 << uIndexPrec) const LDRColorA aPalette[],
  1387. uint8_t uIndexPrec,
  1388. uint8_t uIndexPrec2,
  1389. _Out_opt_ size_t* pBestIndex = nullptr,
  1390. _Out_opt_ size_t* pBestIndex2 = nullptr)
  1391. {
  1392. const size_t uNumIndices = size_t(1) << uIndexPrec;
  1393. const size_t uNumIndices2 = size_t(1) << uIndexPrec2;
  1394. float fTotalErr = 0;
  1395. float fBestErr = FLT_MAX;
  1396. if(pBestIndex)
  1397. *pBestIndex = 0;
  1398. if(pBestIndex2)
  1399. *pBestIndex2 = 0;
  1400. XMVECTOR vpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &pixel ) );
  1401. if(uIndexPrec2 == 0)
  1402. {
  1403. for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
  1404. {
  1405. XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) );
  1406. // Compute ErrorMetric
  1407. tpixel = XMVectorSubtract( vpixel, tpixel );
  1408. float fErr = XMVectorGetX( XMVector4Dot( tpixel, tpixel ) );
  1409. if(fErr > fBestErr) // error increased, so we're done searching
  1410. break;
  1411. if(fErr < fBestErr)
  1412. {
  1413. fBestErr = fErr;
  1414. if(pBestIndex)
  1415. *pBestIndex = i;
  1416. }
  1417. }
  1418. fTotalErr += fBestErr;
  1419. }
  1420. else
  1421. {
  1422. for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
  1423. {
  1424. XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) );
  1425. // Compute ErrorMetricRGB
  1426. tpixel = XMVectorSubtract( vpixel, tpixel );
  1427. float fErr = XMVectorGetX( XMVector3Dot( tpixel, tpixel ) );
  1428. if(fErr > fBestErr) // error increased, so we're done searching
  1429. break;
  1430. if(fErr < fBestErr)
  1431. {
  1432. fBestErr = fErr;
  1433. if(pBestIndex)
  1434. *pBestIndex = i;
  1435. }
  1436. }
  1437. fTotalErr += fBestErr;
  1438. fBestErr = FLT_MAX;
  1439. for(register size_t i = 0; i < uNumIndices2 && fBestErr > 0; i++)
  1440. {
  1441. // Compute ErrorMetricAlpha
  1442. float ea = float(pixel.a) - float(aPalette[i].a);
  1443. float fErr = ea*ea;
  1444. if(fErr > fBestErr) // error increased, so we're done searching
  1445. break;
  1446. if(fErr < fBestErr)
  1447. {
  1448. fBestErr = fErr;
  1449. if(pBestIndex2)
  1450. *pBestIndex2 = i;
  1451. }
  1452. }
  1453. fTotalErr += fBestErr;
  1454. }
  1455. return fTotalErr;
  1456. }
  1457. void FillWithErrorColors(Color (&color)[4][4]) // ESENTHEL ADDED
  1458. {
  1459. REP(16)color[0][i]=(SET_DEBUG ? PURPLE : BLACK);
  1460. }
  1461. void FillWithErrorColors(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut)
  1462. {
  1463. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  1464. {
  1465. #ifdef _DEBUG
  1466. // Use Magenta in debug as a highly-visible error color
  1467. pOut[i] = HDRColorA(1.0f, 0.0f, 1.0f, 1.0f);
  1468. #else
  1469. // In production use, default to black
  1470. pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
  1471. #endif
  1472. }
  1473. }
  1474. }
  1475. //-------------------------------------------------------------------------------------
  1476. // BC6H Compression
  1477. //-------------------------------------------------------------------------------------
  1478. _Use_decl_annotations_
  1479. void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const
  1480. {
  1481. assert(pOut );
  1482. size_t uStartBit = 0;
  1483. uint8_t uMode = GetBits(uStartBit, 2);
  1484. if(uMode != 0x00 && uMode != 0x01)
  1485. {
  1486. uMode = (GetBits(uStartBit, 3) << 2) | uMode;
  1487. }
  1488. assert( uMode < 32 );
  1489. _Analysis_assume_( uMode < 32 );
  1490. if ( ms_aModeToInfo[uMode] >= 0 )
  1491. {
  1492. assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
  1493. _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
  1494. const ModeDescriptor* desc = ms_aDesc[ms_aModeToInfo[uMode]];
  1495. assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
  1496. _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
  1497. const ModeInfo& info = ms_aInfo[ms_aModeToInfo[uMode]];
  1498. INTEndPntPair aEndPts[BC6H_MAX_REGIONS];
  1499. memset(aEndPts, 0, BC6H_MAX_REGIONS * 2 * sizeof(INTColor));
  1500. uint32_t uShape = 0;
  1501. // Read header
  1502. const size_t uHeaderBits = info.uPartitions > 0 ? 82 : 65;
  1503. while(uStartBit < uHeaderBits)
  1504. {
  1505. size_t uCurBit = uStartBit;
  1506. if(GetBit(uStartBit))
  1507. {
  1508. switch(desc[uCurBit].m_eField)
  1509. {
  1510. case D: uShape |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1511. case RW: aEndPts[0].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1512. case RX: aEndPts[0].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1513. case RY: aEndPts[1].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1514. case RZ: aEndPts[1].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1515. case GW: aEndPts[0].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1516. case GX: aEndPts[0].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1517. case GY: aEndPts[1].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1518. case GZ: aEndPts[1].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1519. case BW: aEndPts[0].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1520. case BX: aEndPts[0].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1521. case BY: aEndPts[1].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1522. case BZ: aEndPts[1].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
  1523. default:
  1524. {
  1525. #ifdef _DEBUG
  1526. OutputDebugStringA( "BC6H: Invalid header bits encountered during decoding\n" );
  1527. #endif
  1528. FillWithErrorColors( pOut );
  1529. return;
  1530. }
  1531. }
  1532. }
  1533. }
  1534. assert( uShape < 64 );
  1535. _Analysis_assume_( uShape < 64 );
  1536. // Sign extend necessary end points
  1537. if(bSigned)
  1538. {
  1539. aEndPts[0].A.SignExtend(info.RGBAPrec[0][0]);
  1540. }
  1541. if(bSigned || info.bTransformed)
  1542. {
  1543. assert( info.uPartitions < BC6H_MAX_REGIONS );
  1544. _Analysis_assume_( info.uPartitions < BC6H_MAX_REGIONS );
  1545. for(size_t p = 0; p <= info.uPartitions; ++p)
  1546. {
  1547. if(p != 0)
  1548. {
  1549. aEndPts[p].A.SignExtend(info.RGBAPrec[p][0]);
  1550. }
  1551. aEndPts[p].B.SignExtend(info.RGBAPrec[p][1]);
  1552. }
  1553. }
  1554. // Inverse transform the end points
  1555. if(info.bTransformed)
  1556. {
  1557. TransformInverse(aEndPts, info.RGBAPrec[0][0], bSigned);
  1558. }
  1559. // Read indices
  1560. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  1561. {
  1562. size_t uNumBits = IsFixUpOffset(info.uPartitions, uShape, i) ? info.uIndexPrec-1 : info.uIndexPrec;
  1563. if ( uStartBit + uNumBits > 128 )
  1564. {
  1565. #ifdef _DEBUG
  1566. OutputDebugStringA( "BC6H: Invalid block encountered during decoding\n" );
  1567. #endif
  1568. FillWithErrorColors( pOut );
  1569. return;
  1570. }
  1571. uint8_t uIndex = GetBits(uStartBit, uNumBits);
  1572. if ( uIndex >= ((info.uPartitions > 0) ? 8 : 16) )
  1573. {
  1574. #ifdef _DEBUG
  1575. OutputDebugStringA( "BC6H: Invalid index encountered during decoding\n" );
  1576. #endif
  1577. FillWithErrorColors( pOut );
  1578. return;
  1579. }
  1580. size_t uRegion = g_aPartitionTable[info.uPartitions][uShape][i];
  1581. assert( uRegion < BC6H_MAX_REGIONS );
  1582. _Analysis_assume_( uRegion < BC6H_MAX_REGIONS );
  1583. // Unquantize endpoints and interpolate
  1584. int r1 = Unquantize(aEndPts[uRegion].A.r, info.RGBAPrec[0][0].r, bSigned);
  1585. int g1 = Unquantize(aEndPts[uRegion].A.g, info.RGBAPrec[0][0].g, bSigned);
  1586. int b1 = Unquantize(aEndPts[uRegion].A.b, info.RGBAPrec[0][0].b, bSigned);
  1587. int r2 = Unquantize(aEndPts[uRegion].B.r, info.RGBAPrec[0][0].r, bSigned);
  1588. int g2 = Unquantize(aEndPts[uRegion].B.g, info.RGBAPrec[0][0].g, bSigned);
  1589. int b2 = Unquantize(aEndPts[uRegion].B.b, info.RGBAPrec[0][0].b, bSigned);
  1590. const int* aWeights = info.uPartitions > 0 ? g_aWeights3 : g_aWeights4;
  1591. INTColor fc;
  1592. fc.r = FinishUnquantize((r1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + r2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
  1593. fc.g = FinishUnquantize((g1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + g2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
  1594. fc.b = FinishUnquantize((b1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + b2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
  1595. HALF rgb[3];
  1596. fc.ToF16(rgb, bSigned);
  1597. pOut[i].r = XMConvertHalfToFloat( rgb[0] );
  1598. pOut[i].g = XMConvertHalfToFloat( rgb[1] );
  1599. pOut[i].b = XMConvertHalfToFloat( rgb[2] );
  1600. pOut[i].a = 1.0f;
  1601. }
  1602. }
  1603. else
  1604. {
  1605. #ifdef _DEBUG
  1606. const char* warnstr = "BC6H: Invalid mode encountered during decoding\n";
  1607. switch( uMode )
  1608. {
  1609. case 0x13: warnstr = "BC6H: Reserved mode 10011 encountered during decoding\n"; break;
  1610. case 0x17: warnstr = "BC6H: Reserved mode 10111 encountered during decoding\n"; break;
  1611. case 0x1B: warnstr = "BC6H: Reserved mode 11011 encountered during decoding\n"; break;
  1612. case 0x1F: warnstr = "BC6H: Reserved mode 11111 encountered during decoding\n"; break;
  1613. }
  1614. OutputDebugStringA( warnstr );
  1615. #endif
  1616. // Per the BC6H format spec, we must return opaque black
  1617. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  1618. {
  1619. pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
  1620. }
  1621. }
  1622. }
  1623. _Use_decl_annotations_
  1624. void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn)
  1625. {
  1626. assert( pIn );
  1627. EncodeParams EP(pIn, bSigned);
  1628. for(EP.uMode = 0; EP.uMode < ARRAYSIZE(ms_aInfo) && EP.fBestErr > 0; ++EP.uMode)
  1629. {
  1630. const uint8_t uShapes = ms_aInfo[EP.uMode].uPartitions ? 32 : 1;
  1631. // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
  1632. // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
  1633. const size_t uItems = std::max<size_t>(1, uShapes >> 2);
  1634. float afRoughMSE[BC6H_MAX_SHAPES];
  1635. uint8_t auShape[BC6H_MAX_SHAPES];
  1636. // pick the best uItems shapes and refine these.
  1637. for(EP.uShape = 0; EP.uShape < uShapes; ++EP.uShape)
  1638. {
  1639. size_t uShape = EP.uShape;
  1640. afRoughMSE[uShape] = RoughMSE(&EP);
  1641. auShape[uShape] = static_cast<uint8_t>(uShape);
  1642. }
  1643. // Bubble up the first uItems items
  1644. for(register size_t i = 0; i < uItems; i++)
  1645. {
  1646. for (register size_t j = i + 1; j < uShapes; j++)
  1647. {
  1648. if (afRoughMSE[i] > afRoughMSE[j])
  1649. {
  1650. std::swap(afRoughMSE[i], afRoughMSE[j]);
  1651. std::swap(auShape[i], auShape[j]);
  1652. }
  1653. }
  1654. }
  1655. for(size_t i = 0; i < uItems && EP.fBestErr > 0; i++)
  1656. {
  1657. EP.uShape = auShape[i];
  1658. Refine(&EP);
  1659. }
  1660. }
  1661. }
  1662. //-------------------------------------------------------------------------------------
  1663. _Use_decl_annotations_
  1664. int D3DX_BC6H::Quantize(int iValue, int prec, bool bSigned)
  1665. {
  1666. assert(prec > 1); // didn't bother to make it work for 1
  1667. int q, s = 0;
  1668. if(bSigned)
  1669. {
  1670. assert(iValue >= -F16MAX && iValue <= F16MAX);
  1671. if(iValue < 0)
  1672. {
  1673. s = 1;
  1674. iValue = -iValue;
  1675. }
  1676. q = (prec >= 16) ? iValue : (iValue << (prec-1)) / (F16MAX+1);
  1677. if(s)
  1678. q = -q;
  1679. assert (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
  1680. }
  1681. else
  1682. {
  1683. assert(iValue >= 0 && iValue <= F16MAX);
  1684. q = (prec >= 15) ? iValue : (iValue << prec) / (F16MAX+1);
  1685. assert (q >= 0 && q < (1 << prec));
  1686. }
  1687. return q;
  1688. }
  1689. _Use_decl_annotations_
  1690. int D3DX_BC6H::Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned)
  1691. {
  1692. int unq = 0, s = 0;
  1693. if(bSigned)
  1694. {
  1695. if(uBitsPerComp >= 16)
  1696. {
  1697. unq = comp;
  1698. }
  1699. else
  1700. {
  1701. if(comp < 0)
  1702. {
  1703. s = 1;
  1704. comp = -comp;
  1705. }
  1706. if(comp == 0) unq = 0;
  1707. else if(comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF;
  1708. else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp-1);
  1709. if(s) unq = -unq;
  1710. }
  1711. }
  1712. else
  1713. {
  1714. if(uBitsPerComp >= 15) unq = comp;
  1715. else if(comp == 0) unq = 0;
  1716. else if(comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF;
  1717. else unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
  1718. }
  1719. return unq;
  1720. }
  1721. _Use_decl_annotations_
  1722. int D3DX_BC6H::FinishUnquantize(int comp, bool bSigned)
  1723. {
  1724. if (bSigned)
  1725. {
  1726. return (comp < 0) ? -(((-comp) * 31) >> 5) : (comp * 31) >> 5; // scale the magnitude by 31/32
  1727. }
  1728. else
  1729. {
  1730. return (comp * 31) >> 6; // scale the magnitude by 31/64
  1731. }
  1732. }
  1733. //-------------------------------------------------------------------------------------
  1734. _Use_decl_annotations_
  1735. bool D3DX_BC6H::EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPts[])
  1736. {
  1737. assert( pEP );
  1738. const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
  1739. const bool bIsSigned = pEP->bSigned;
  1740. const LDRColorA& Prec0 = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
  1741. const LDRColorA& Prec1 = ms_aInfo[pEP->uMode].RGBAPrec[0][1];
  1742. const LDRColorA& Prec2 = ms_aInfo[pEP->uMode].RGBAPrec[1][0];
  1743. const LDRColorA& Prec3 = ms_aInfo[pEP->uMode].RGBAPrec[1][1];
  1744. INTColor aBits[4];
  1745. aBits[0].r = NBits(aEndPts[0].A.r, bIsSigned);
  1746. aBits[0].g = NBits(aEndPts[0].A.g, bIsSigned);
  1747. aBits[0].b = NBits(aEndPts[0].A.b, bIsSigned);
  1748. aBits[1].r = NBits(aEndPts[0].B.r, bTransformed || bIsSigned);
  1749. aBits[1].g = NBits(aEndPts[0].B.g, bTransformed || bIsSigned);
  1750. aBits[1].b = NBits(aEndPts[0].B.b, bTransformed || bIsSigned);
  1751. if(aBits[0].r > Prec0.r || aBits[1].r > Prec1.r ||
  1752. aBits[0].g > Prec0.g || aBits[1].g > Prec1.g ||
  1753. aBits[0].b > Prec0.b || aBits[1].b > Prec1.b)
  1754. return false;
  1755. if(ms_aInfo[pEP->uMode].uPartitions)
  1756. {
  1757. aBits[2].r = NBits(aEndPts[1].A.r, bTransformed || bIsSigned);
  1758. aBits[2].g = NBits(aEndPts[1].A.g, bTransformed || bIsSigned);
  1759. aBits[2].b = NBits(aEndPts[1].A.b, bTransformed || bIsSigned);
  1760. aBits[3].r = NBits(aEndPts[1].B.r, bTransformed || bIsSigned);
  1761. aBits[3].g = NBits(aEndPts[1].B.g, bTransformed || bIsSigned);
  1762. aBits[3].b = NBits(aEndPts[1].B.b, bTransformed || bIsSigned);
  1763. if(aBits[2].r > Prec2.r || aBits[3].r > Prec3.r ||
  1764. aBits[2].g > Prec2.g || aBits[3].g > Prec3.g ||
  1765. aBits[2].b > Prec2.b || aBits[3].b > Prec3.b)
  1766. return false;
  1767. }
  1768. return true;
  1769. }
  1770. _Use_decl_annotations_
  1771. void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPntPair& endPts, INTColor aPalette[]) const
  1772. {
  1773. assert( pEP );
  1774. const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
  1775. const size_t uNumIndices = size_t(1) << uIndexPrec;
  1776. assert( uNumIndices > 0 );
  1777. _Analysis_assume_( uNumIndices > 0 );
  1778. const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
  1779. // scale endpoints
  1780. INTEndPntPair unqEndPts;
  1781. unqEndPts.A.r = Unquantize(endPts.A.r, Prec.r, pEP->bSigned);
  1782. unqEndPts.A.g = Unquantize(endPts.A.g, Prec.g, pEP->bSigned);
  1783. unqEndPts.A.b = Unquantize(endPts.A.b, Prec.b, pEP->bSigned);
  1784. unqEndPts.B.r = Unquantize(endPts.B.r, Prec.r, pEP->bSigned);
  1785. unqEndPts.B.g = Unquantize(endPts.B.g, Prec.g, pEP->bSigned);
  1786. unqEndPts.B.b = Unquantize(endPts.B.b, Prec.b, pEP->bSigned);
  1787. // interpolate
  1788. const int* aWeights = nullptr;
  1789. switch(uIndexPrec)
  1790. {
  1791. case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break;
  1792. case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break;
  1793. default:
  1794. assert(false);
  1795. for(size_t i = 0; i < uNumIndices; ++i)
  1796. {
  1797. #pragma prefast(suppress:22102 22103, "writing blocks in two halves confuses tool")
  1798. aPalette[i] = INTColor(0,0,0);
  1799. }
  1800. return;
  1801. }
  1802. for (size_t i = 0; i < uNumIndices; ++i)
  1803. {
  1804. aPalette[i].r = FinishUnquantize(
  1805. (unqEndPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
  1806. pEP->bSigned);
  1807. aPalette[i].g = FinishUnquantize(
  1808. (unqEndPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
  1809. pEP->bSigned);
  1810. aPalette[i].b = FinishUnquantize(
  1811. (unqEndPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
  1812. pEP->bSigned);
  1813. }
  1814. }
  1815. // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
  1816. _Use_decl_annotations_
  1817. float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aColors[], size_t np, const INTEndPntPair &endPts) const
  1818. {
  1819. assert( pEP );
  1820. const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
  1821. const uint8_t uNumIndices = 1 << uIndexPrec;
  1822. INTColor aPalette[BC6H_MAX_INDICES];
  1823. GeneratePaletteQuantized(pEP, endPts, aPalette);
  1824. float fTotErr = 0;
  1825. for(size_t i = 0; i < np; ++i)
  1826. {
  1827. XMVECTOR vcolors = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aColors[i] ) );
  1828. // Compute ErrorMetricRGB
  1829. XMVECTOR tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[0] ) );
  1830. tpal = XMVectorSubtract( vcolors, tpal );
  1831. float fBestErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) );
  1832. for(int j = 1; j < uNumIndices && fBestErr > 0; ++j)
  1833. {
  1834. // Compute ErrorMetricRGB
  1835. tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[j] ) );
  1836. tpal = XMVectorSubtract( vcolors, tpal );
  1837. float fErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) );
  1838. if(fErr > fBestErr) break; // error increased, so we're done searching
  1839. if(fErr < fBestErr) fBestErr = fErr;
  1840. }
  1841. fTotErr += fBestErr;
  1842. }
  1843. return fTotErr;
  1844. }
  1845. _Use_decl_annotations_
  1846. float D3DX_BC6H::PerturbOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, uint8_t ch,
  1847. const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts, float fOldErr, int do_b) const
  1848. {
  1849. assert( pEP );
  1850. uint8_t uPrec;
  1851. switch(ch)
  1852. {
  1853. case 0: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].r; break;
  1854. case 1: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].g; break;
  1855. case 2: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].b; break;
  1856. default: assert(false); newEndPts = oldEndPts; return FLT_MAX;
  1857. }
  1858. INTEndPntPair tmpEndPts;
  1859. float fMinErr = fOldErr;
  1860. int beststep = 0;
  1861. // copy real endpoints so we can perturb them
  1862. tmpEndPts = newEndPts = oldEndPts;
  1863. // do a logarithmic search for the best error for this endpoint (which)
  1864. for(int step = 1 << (uPrec-1); step; step >>= 1)
  1865. {
  1866. bool bImproved = false;
  1867. for(int sign = -1; sign <= 1; sign += 2)
  1868. {
  1869. if(do_b == 0)
  1870. {
  1871. tmpEndPts.A[ch] = newEndPts.A[ch] + sign * step;
  1872. if(tmpEndPts.A[ch] < 0 || tmpEndPts.A[ch] >= (1 << uPrec))
  1873. continue;
  1874. }
  1875. else
  1876. {
  1877. tmpEndPts.B[ch] = newEndPts.B[ch] + sign * step;
  1878. if(tmpEndPts.B[ch] < 0 || tmpEndPts.B[ch] >= (1 << uPrec))
  1879. continue;
  1880. }
  1881. float fErr = MapColorsQuantized(pEP, aColors, np, tmpEndPts);
  1882. if(fErr < fMinErr)
  1883. {
  1884. bImproved = true;
  1885. fMinErr = fErr;
  1886. beststep = sign * step;
  1887. }
  1888. }
  1889. // if this was an improvement, move the endpoint and continue search from there
  1890. if(bImproved)
  1891. {
  1892. if(do_b == 0)
  1893. newEndPts.A[ch] += beststep;
  1894. else
  1895. newEndPts.B[ch] += beststep;
  1896. }
  1897. }
  1898. return fMinErr;
  1899. }
  1900. _Use_decl_annotations_
  1901. void D3DX_BC6H::OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, float aOrgErr,
  1902. const INTEndPntPair &aOrgEndPts, INTEndPntPair &aOptEndPts) const
  1903. {
  1904. assert( pEP );
  1905. float aOptErr = aOrgErr;
  1906. aOptEndPts.A = aOrgEndPts.A;
  1907. aOptEndPts.B = aOrgEndPts.B;
  1908. INTEndPntPair new_a, new_b;
  1909. INTEndPntPair newEndPts;
  1910. int do_b;
  1911. // now optimize each channel separately
  1912. for(uint8_t ch = 0; ch < 3; ++ch)
  1913. {
  1914. // figure out which endpoint when perturbed gives the most improvement and start there
  1915. // if we just alternate, we can easily end up in a local minima
  1916. float fErr0 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_a, aOptErr, 0); // perturb endpt A
  1917. float fErr1 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_b, aOptErr, 1); // perturb endpt B
  1918. if(fErr0 < fErr1)
  1919. {
  1920. if(fErr0 >= aOptErr) continue;
  1921. aOptEndPts.A[ch] = new_a.A[ch];
  1922. aOptErr = fErr0;
  1923. do_b = 1; // do B next
  1924. }
  1925. else
  1926. {
  1927. if(fErr1 >= aOptErr) continue;
  1928. aOptEndPts.B[ch] = new_b.B[ch];
  1929. aOptErr = fErr1;
  1930. do_b = 0; // do A next
  1931. }
  1932. // now alternate endpoints and keep trying until there is no improvement
  1933. for(;;)
  1934. {
  1935. float fErr = PerturbOne(pEP, aColors, np, ch, aOptEndPts, newEndPts, aOptErr, do_b);
  1936. if(fErr >= aOptErr)
  1937. break;
  1938. if(do_b == 0)
  1939. aOptEndPts.A[ch] = newEndPts.A[ch];
  1940. else
  1941. aOptEndPts.B[ch] = newEndPts.B[ch];
  1942. aOptErr = fErr;
  1943. do_b = 1 - do_b; // now move the other endpoint
  1944. }
  1945. }
  1946. }
  1947. _Use_decl_annotations_
  1948. void D3DX_BC6H::OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[], const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[]) const
  1949. {
  1950. assert( pEP );
  1951. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  1952. assert( uPartitions < BC6H_MAX_REGIONS );
  1953. _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
  1954. INTColor aPixels[NUM_PIXELS_PER_BLOCK];
  1955. for(size_t p = 0; p <= uPartitions; ++p)
  1956. {
  1957. // collect the pixels in the region
  1958. size_t np = 0;
  1959. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  1960. {
  1961. if(g_aPartitionTable[p][pEP->uShape][i] == p)
  1962. {
  1963. aPixels[np++] = pEP->aIPixels[i];
  1964. }
  1965. }
  1966. OptimizeOne(pEP, aPixels, np, aOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
  1967. }
  1968. }
  1969. // Swap endpoints as needed to ensure that the indices at fix up have a 0 high-order bit
  1970. _Use_decl_annotations_
  1971. void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], size_t aIndices[])
  1972. {
  1973. assert( pEP );
  1974. const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  1975. const size_t uNumIndices = size_t(1) << ms_aInfo[pEP->uMode].uIndexPrec;
  1976. const size_t uHighIndexBit = uNumIndices >> 1;
  1977. assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
  1978. _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
  1979. for(size_t p = 0; p <= uPartitions; ++p)
  1980. {
  1981. size_t i = g_aFixUp[uPartitions][pEP->uShape][p];
  1982. assert(g_aPartitionTable[uPartitions][pEP->uShape][i] == p);
  1983. if(aIndices[i] & uHighIndexBit)
  1984. {
  1985. // high bit is set, swap the aEndPts and indices for this region
  1986. std::swap(aEndPts[p].A, aEndPts[p].B);
  1987. for(size_t j = 0; j < NUM_PIXELS_PER_BLOCK; ++j)
  1988. if(g_aPartitionTable[uPartitions][pEP->uShape][j] == p)
  1989. aIndices[j] = uNumIndices - 1 - aIndices[j];
  1990. }
  1991. }
  1992. }
  1993. // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
  1994. _Use_decl_annotations_
  1995. void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndPts[], size_t aIndices[], float aTotErr[]) const
  1996. {
  1997. assert( pEP );
  1998. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  1999. const uint8_t uNumIndices = 1 << ms_aInfo[pEP->uMode].uIndexPrec;
  2000. assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
  2001. _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
  2002. // build list of possibles
  2003. INTColor aPalette[BC6H_MAX_REGIONS][BC6H_MAX_INDICES];
  2004. for(size_t p = 0; p <= uPartitions; ++p)
  2005. {
  2006. GeneratePaletteQuantized(pEP, aEndPts[p], aPalette[p]);
  2007. aTotErr[p] = 0;
  2008. }
  2009. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  2010. {
  2011. const uint8_t uRegion = g_aPartitionTable[uPartitions][pEP->uShape][i];
  2012. assert( uRegion < BC6H_MAX_REGIONS );
  2013. _Analysis_assume_( uRegion < BC6H_MAX_REGIONS );
  2014. float fBestErr = Norm(pEP->aIPixels[i], aPalette[uRegion][0]);
  2015. aIndices[i] = 0;
  2016. for(uint8_t j = 1; j < uNumIndices && fBestErr > 0; ++j)
  2017. {
  2018. float fErr = Norm(pEP->aIPixels[i], aPalette[uRegion][j]);
  2019. if(fErr > fBestErr) break; // error increased, so we're done searching
  2020. if(fErr < fBestErr)
  2021. {
  2022. fBestErr = fErr;
  2023. aIndices[i] = j;
  2024. }
  2025. }
  2026. aTotErr[uRegion] += fBestErr;
  2027. }
  2028. }
  2029. _Use_decl_annotations_
  2030. void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPts) const
  2031. {
  2032. assert( pEP && aQntEndPts );
  2033. const INTEndPntPair* aUnqEndPts = pEP->aUnqEndPts[pEP->uShape];
  2034. const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
  2035. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2036. assert( uPartitions < BC6H_MAX_REGIONS );
  2037. _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
  2038. for(size_t p = 0; p <= uPartitions; ++p)
  2039. {
  2040. aQntEndPts[p].A.r = Quantize(aUnqEndPts[p].A.r, Prec.r, pEP->bSigned);
  2041. aQntEndPts[p].A.g = Quantize(aUnqEndPts[p].A.g, Prec.g, pEP->bSigned);
  2042. aQntEndPts[p].A.b = Quantize(aUnqEndPts[p].A.b, Prec.b, pEP->bSigned);
  2043. aQntEndPts[p].B.r = Quantize(aUnqEndPts[p].B.r, Prec.r, pEP->bSigned);
  2044. aQntEndPts[p].B.g = Quantize(aUnqEndPts[p].B.g, Prec.g, pEP->bSigned);
  2045. aQntEndPts[p].B.b = Quantize(aUnqEndPts[p].B.b, Prec.b, pEP->bSigned);
  2046. }
  2047. }
  2048. _Use_decl_annotations_
  2049. void D3DX_BC6H::EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[], const size_t aIndices[])
  2050. {
  2051. assert( pEP );
  2052. const uint8_t uRealMode = ms_aInfo[pEP->uMode].uMode;
  2053. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2054. const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
  2055. const size_t uHeaderBits = uPartitions > 0 ? 82 : 65;
  2056. const ModeDescriptor* desc = ms_aDesc[pEP->uMode];
  2057. size_t uStartBit = 0;
  2058. while(uStartBit < uHeaderBits)
  2059. {
  2060. switch(desc[uStartBit].m_eField)
  2061. {
  2062. case M: SetBit(uStartBit, uint8_t(uRealMode >> desc[uStartBit].m_uBit) & 0x01); break;
  2063. case D: SetBit(uStartBit, uint8_t(pEP->uShape >> desc[uStartBit].m_uBit) & 0x01); break;
  2064. case RW: SetBit(uStartBit, uint8_t(aEndPts[0].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
  2065. case RX: SetBit(uStartBit, uint8_t(aEndPts[0].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
  2066. case RY: SetBit(uStartBit, uint8_t(aEndPts[1].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
  2067. case RZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
  2068. case GW: SetBit(uStartBit, uint8_t(aEndPts[0].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
  2069. case GX: SetBit(uStartBit, uint8_t(aEndPts[0].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
  2070. case GY: SetBit(uStartBit, uint8_t(aEndPts[1].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
  2071. case GZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
  2072. case BW: SetBit(uStartBit, uint8_t(aEndPts[0].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
  2073. case BX: SetBit(uStartBit, uint8_t(aEndPts[0].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
  2074. case BY: SetBit(uStartBit, uint8_t(aEndPts[1].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
  2075. case BZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
  2076. default: assert(false);
  2077. }
  2078. }
  2079. for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  2080. {
  2081. if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, pEP->uShape, i))
  2082. SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aIndices[i] ));
  2083. else
  2084. SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aIndices[i] ));
  2085. }
  2086. assert(uStartBit == 128);
  2087. }
  2088. _Use_decl_annotations_
  2089. void D3DX_BC6H::Refine(EncodeParams* pEP)
  2090. {
  2091. assert( pEP );
  2092. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2093. assert( uPartitions < BC6H_MAX_REGIONS );
  2094. _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
  2095. const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
  2096. float aOrgErr[BC6H_MAX_REGIONS], aOptErr[BC6H_MAX_REGIONS];
  2097. INTEndPntPair aOrgEndPts[BC6H_MAX_REGIONS], aOptEndPts[BC6H_MAX_REGIONS];
  2098. size_t aOrgIdx[NUM_PIXELS_PER_BLOCK], aOptIdx[NUM_PIXELS_PER_BLOCK];
  2099. QuantizeEndPts(pEP, aOrgEndPts);
  2100. AssignIndices(pEP, aOrgEndPts, aOrgIdx, aOrgErr);
  2101. SwapIndices(pEP, aOrgEndPts, aOrgIdx);
  2102. if(bTransformed) TransformForward(aOrgEndPts);
  2103. if(EndPointsFit(pEP, aOrgEndPts))
  2104. {
  2105. if(bTransformed) TransformInverse(aOrgEndPts, ms_aInfo[pEP->uMode].RGBAPrec[0][0], pEP->bSigned);
  2106. OptimizeEndPoints(pEP, aOrgErr, aOrgEndPts, aOptEndPts);
  2107. AssignIndices(pEP, aOptEndPts, aOptIdx, aOptErr);
  2108. SwapIndices(pEP, aOptEndPts, aOptIdx);
  2109. float fOrgTotErr = 0.0f, fOptTotErr = 0.0f;
  2110. for(size_t p = 0; p <= uPartitions; ++p)
  2111. {
  2112. fOrgTotErr += aOrgErr[p];
  2113. fOptTotErr += aOptErr[p];
  2114. }
  2115. if(bTransformed) TransformForward(aOptEndPts);
  2116. if(EndPointsFit(pEP, aOptEndPts) && fOptTotErr < fOrgTotErr && fOptTotErr < pEP->fBestErr)
  2117. {
  2118. pEP->fBestErr = fOptTotErr;
  2119. EmitBlock(pEP, aOptEndPts, aOptIdx);
  2120. }
  2121. else if(fOrgTotErr < pEP->fBestErr)
  2122. {
  2123. // either it stopped fitting when we optimized it, or there was no improvement
  2124. // so go back to the unoptimized endpoints which we know will fit
  2125. if(bTransformed) TransformForward(aOrgEndPts);
  2126. pEP->fBestErr = fOrgTotErr;
  2127. EmitBlock(pEP, aOrgEndPts, aOrgIdx);
  2128. }
  2129. }
  2130. }
  2131. _Use_decl_annotations_
  2132. void D3DX_BC6H::GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegion, INTColor aPalette[])
  2133. {
  2134. assert( pEP );
  2135. assert( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
  2136. _Analysis_assume_( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
  2137. const INTEndPntPair& endPts = pEP->aUnqEndPts[pEP->uShape][uRegion];
  2138. const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
  2139. const uint8_t uNumIndices = 1 << uIndexPrec;
  2140. assert(uNumIndices > 0);
  2141. _Analysis_assume_(uNumIndices > 0);
  2142. const int* aWeights = nullptr;
  2143. switch(uIndexPrec)
  2144. {
  2145. case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break;
  2146. case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break;
  2147. default:
  2148. assert(false);
  2149. for(size_t i = 0; i < uNumIndices; ++i)
  2150. {
  2151. #pragma prefast(suppress:22102 22103, "writing blocks in two halves confuses tool")
  2152. aPalette[i] = INTColor(0,0,0);
  2153. }
  2154. return;
  2155. }
  2156. for(register size_t i = 0; i < uNumIndices; ++i)
  2157. {
  2158. aPalette[i].r = (endPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
  2159. aPalette[i].g = (endPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
  2160. aPalette[i].b = (endPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
  2161. }
  2162. }
  2163. _Use_decl_annotations_
  2164. float D3DX_BC6H::MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, const size_t* auIndex) const
  2165. {
  2166. assert( pEP );
  2167. const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
  2168. const uint8_t uNumIndices = 1 << uIndexPrec;
  2169. INTColor aPalette[BC6H_MAX_INDICES];
  2170. GeneratePaletteUnquantized(pEP, uRegion, aPalette);
  2171. float fTotalErr = 0.0f;
  2172. for(size_t i = 0; i < np; ++i)
  2173. {
  2174. float fBestErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[0]);
  2175. for(uint8_t j = 1; j < uNumIndices && fBestErr > 0.0f; ++j)
  2176. {
  2177. float fErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[j]);
  2178. if(fErr > fBestErr) break; // error increased, so we're done searching
  2179. if(fErr < fBestErr) fBestErr = fErr;
  2180. }
  2181. fTotalErr += fBestErr;
  2182. }
  2183. return fTotalErr;
  2184. }
  2185. _Use_decl_annotations_
  2186. float D3DX_BC6H::RoughMSE(EncodeParams* pEP) const
  2187. {
  2188. assert( pEP );
  2189. assert( pEP->uShape < BC6H_MAX_SHAPES);
  2190. _Analysis_assume_( pEP->uShape < BC6H_MAX_SHAPES);
  2191. INTEndPntPair* aEndPts = pEP->aUnqEndPts[pEP->uShape];
  2192. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2193. assert( uPartitions < BC6H_MAX_REGIONS );
  2194. _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
  2195. size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
  2196. float fError = 0.0f;
  2197. for(size_t p = 0; p <= uPartitions; ++p)
  2198. {
  2199. size_t np = 0;
  2200. for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  2201. {
  2202. if(g_aPartitionTable[uPartitions][pEP->uShape][i] == p)
  2203. {
  2204. auPixIdx[np++] = i;
  2205. }
  2206. }
  2207. // handle simple cases
  2208. assert(np > 0);
  2209. if(np == 1)
  2210. {
  2211. aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
  2212. aEndPts[p].B = pEP->aIPixels[auPixIdx[0]];
  2213. continue;
  2214. }
  2215. else if(np == 2)
  2216. {
  2217. aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
  2218. aEndPts[p].B = pEP->aIPixels[auPixIdx[1]];
  2219. continue;
  2220. }
  2221. HDRColorA epA, epB;
  2222. OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
  2223. aEndPts[p].A.Set(epA, pEP->bSigned);
  2224. aEndPts[p].B.Set(epB, pEP->bSigned);
  2225. if(pEP->bSigned)
  2226. {
  2227. aEndPts[p].A.Clamp(-F16MAX, F16MAX);
  2228. aEndPts[p].B.Clamp(-F16MAX, F16MAX);
  2229. }
  2230. else
  2231. {
  2232. aEndPts[p].A.Clamp(0, F16MAX);
  2233. aEndPts[p].B.Clamp(0, F16MAX);
  2234. }
  2235. fError += MapColors(pEP, p, np, auPixIdx);
  2236. }
  2237. return fError;
  2238. }
  2239. //-------------------------------------------------------------------------------------
  2240. // BC7 Compression
  2241. //-------------------------------------------------------------------------------------
  2242. _Use_decl_annotations_
  2243. void D3DX_BC7::Decode(Color (&pOut)[4][4]) const // !! this must be a reference because we're calling 'Zero' !!
  2244. {
  2245. size_t uFirst = 0;
  2246. while(uFirst < 128 && !GetBit(uFirst)) {}
  2247. uint8_t uMode = uint8_t(uFirst - 1);
  2248. if(uMode < 8)
  2249. {
  2250. const uint8_t uPartitions = ms_aInfo[uMode].uPartitions;
  2251. assert( uPartitions < BC7_MAX_REGIONS );
  2252. _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
  2253. const uint8_t uNumEndPts = (uPartitions + 1) << 1;
  2254. const uint8_t uIndexPrec = ms_aInfo[uMode].uIndexPrec;
  2255. const uint8_t uIndexPrec2 = ms_aInfo[uMode].uIndexPrec2;
  2256. register size_t i;
  2257. size_t uStartBit = uMode + 1;
  2258. uint8_t P[6];
  2259. uint8_t uShape = GetBits(uStartBit, ms_aInfo[uMode].uPartitionBits);
  2260. assert( uShape < BC7_MAX_SHAPES );
  2261. _Analysis_assume_( uShape < BC7_MAX_SHAPES );
  2262. uint8_t uRotation = GetBits(uStartBit, ms_aInfo[uMode].uRotationBits);
  2263. assert( uRotation < 4 );
  2264. uint8_t uIndexMode = GetBits(uStartBit, ms_aInfo[uMode].uIndexModeBits);
  2265. assert( uIndexMode < 2 );
  2266. LDRColorA c[BC7_MAX_REGIONS << 1];
  2267. const LDRColorA RGBAPrec = ms_aInfo[uMode].RGBAPrec;
  2268. const LDRColorA RGBAPrecWithP = ms_aInfo[uMode].RGBAPrecWithP;
  2269. assert( uNumEndPts <= (BC7_MAX_REGIONS << 1) );
  2270. // Red channel
  2271. for(i = 0; i < uNumEndPts; i++)
  2272. {
  2273. if ( uStartBit + RGBAPrec.r > 128 )
  2274. {
  2275. #ifdef _DEBUG
  2276. OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
  2277. #endif
  2278. FillWithErrorColors(pOut);
  2279. return;
  2280. }
  2281. c[i].r = GetBits(uStartBit, RGBAPrec.r);
  2282. }
  2283. // Green channel
  2284. for(i = 0; i < uNumEndPts; i++)
  2285. {
  2286. if ( uStartBit + RGBAPrec.g > 128 )
  2287. {
  2288. #ifdef _DEBUG
  2289. OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
  2290. #endif
  2291. FillWithErrorColors(pOut);
  2292. return;
  2293. }
  2294. c[i].g = GetBits(uStartBit, RGBAPrec.g);
  2295. }
  2296. // Blue channel
  2297. for(i = 0; i < uNumEndPts; i++)
  2298. {
  2299. if ( uStartBit + RGBAPrec.b > 128 )
  2300. {
  2301. #ifdef _DEBUG
  2302. OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
  2303. #endif
  2304. FillWithErrorColors(pOut);
  2305. return;
  2306. }
  2307. c[i].b = GetBits(uStartBit, RGBAPrec.b);
  2308. }
  2309. // Alpha channel
  2310. for(i = 0; i < uNumEndPts; i++)
  2311. {
  2312. if ( uStartBit + RGBAPrec.a > 128 )
  2313. {
  2314. #ifdef _DEBUG
  2315. OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
  2316. #endif
  2317. FillWithErrorColors(pOut);
  2318. return;
  2319. }
  2320. c[i].a = RGBAPrec.a ? GetBits(uStartBit, RGBAPrec.a) : 255;
  2321. }
  2322. // P-bits
  2323. assert( ms_aInfo[uMode].uPBits <= 6 );
  2324. _Analysis_assume_( ms_aInfo[uMode].uPBits <= 6 );
  2325. for(i = 0; i < ms_aInfo[uMode].uPBits; i++)
  2326. {
  2327. if ( uStartBit > 127 )
  2328. {
  2329. #ifdef _DEBUG
  2330. OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
  2331. #endif
  2332. FillWithErrorColors(pOut);
  2333. return;
  2334. }
  2335. P[i] = GetBit(uStartBit);
  2336. }
  2337. if(ms_aInfo[uMode].uPBits)
  2338. {
  2339. for(i = 0; i < uNumEndPts; i++)
  2340. {
  2341. size_t pi = i * ms_aInfo[uMode].uPBits / uNumEndPts;
  2342. for (register uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
  2343. {
  2344. if(RGBAPrec[ch] != RGBAPrecWithP[ch])
  2345. {
  2346. c[i][ch] = (c[i][ch] << 1) | P[pi];
  2347. }
  2348. }
  2349. }
  2350. }
  2351. for(i = 0; i < uNumEndPts; i++)
  2352. {
  2353. c[i] = Unquantize(c[i], RGBAPrecWithP);
  2354. }
  2355. uint8_t w1[NUM_PIXELS_PER_BLOCK], w2[NUM_PIXELS_PER_BLOCK];
  2356. // read color indices
  2357. for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2358. {
  2359. size_t uNumBits = IsFixUpOffset(ms_aInfo[uMode].uPartitions, uShape, i) ? uIndexPrec - 1 : uIndexPrec;
  2360. if ( uStartBit + uNumBits > 128 )
  2361. {
  2362. #ifdef _DEBUG
  2363. OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
  2364. #endif
  2365. FillWithErrorColors(pOut);
  2366. return;
  2367. }
  2368. w1[i] = GetBits(uStartBit, uNumBits);
  2369. }
  2370. // read alpha indices
  2371. if(uIndexPrec2)
  2372. {
  2373. for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2374. {
  2375. size_t uNumBits = i ? uIndexPrec2 : uIndexPrec2 - 1;
  2376. if ( uStartBit + uNumBits > 128 )
  2377. {
  2378. #ifdef _DEBUG
  2379. OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
  2380. #endif
  2381. FillWithErrorColors(pOut);
  2382. return;
  2383. }
  2384. w2[i] = GetBits(uStartBit, uNumBits );
  2385. }
  2386. }
  2387. for(i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  2388. {
  2389. uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
  2390. LDRColorA &outPixel = (LDRColorA&)pOut[0][i]; ASSERT(SIZE(LDRColorA)==SIZE(pOut[0][0])); // ESENTHEL CHANGED
  2391. if(uIndexPrec2 == 0)
  2392. {
  2393. LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w1[i], uIndexPrec, uIndexPrec, outPixel);
  2394. }
  2395. else
  2396. {
  2397. if (uIndexMode == 0)
  2398. {
  2399. LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w2[i], uIndexPrec, uIndexPrec2, outPixel);
  2400. }
  2401. else
  2402. {
  2403. LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w2[i], w1[i], uIndexPrec2, uIndexPrec, outPixel);
  2404. }
  2405. }
  2406. switch (uRotation)
  2407. {
  2408. case 1: std::swap(outPixel.r, outPixel.a); break;
  2409. case 2: std::swap(outPixel.g, outPixel.a); break;
  2410. case 3: std::swap(outPixel.b, outPixel.a); break;
  2411. }
  2412. }
  2413. }
  2414. else
  2415. {
  2416. #ifdef _DEBUG
  2417. OutputDebugStringA( "BC7: Reserved mode 8 encountered during decoding\n" );
  2418. #endif
  2419. // Per the BC7 format spec, we must return transparent black
  2420. Zero(pOut); // ESENTHEL CHANGED
  2421. }
  2422. }
  2423. _Use_decl_annotations_
  2424. void D3DX_BC7::Encode(DWORD flags, const HDRColorA* const pIn)
  2425. {
  2426. assert(pIn);
  2427. D3DX_BC7 final = *this;
  2428. EncodeParams EP(pIn);
  2429. float fMSEBest = FLT_MAX;
  2430. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  2431. {
  2432. EP.aLDRPixels[i].r = uint8_t(std::max<float>(0.0f, std::min<float>(255.0f, pIn[i].r * 255.0f + 0.01f)));
  2433. EP.aLDRPixels[i].g = uint8_t(std::max<float>(0.0f, std::min<float>(255.0f, pIn[i].g * 255.0f + 0.01f)));
  2434. EP.aLDRPixels[i].b = uint8_t(std::max<float>(0.0f, std::min<float>(255.0f, pIn[i].b * 255.0f + 0.01f)));
  2435. EP.aLDRPixels[i].a = uint8_t(std::max<float>(0.0f, std::min<float>(255.0f, pIn[i].a * 255.0f + 0.01f)));
  2436. }
  2437. for (EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode)
  2438. {
  2439. if (!(flags & BC_FLAGS_USE_3SUBSETS) && (EP.uMode == 0 || EP.uMode == 2))
  2440. {
  2441. // 3 subset modes tend to be used rarely and add significant compression time
  2442. continue;
  2443. }
  2444. if ((flags & BC_FLAGS_FORCE_BC7_MODE6) && (EP.uMode != 6))
  2445. {
  2446. // Use only mode 6
  2447. continue;
  2448. }
  2449. const size_t uShapes = size_t(1) << ms_aInfo[EP.uMode].uPartitionBits;
  2450. assert(uShapes <= BC7_MAX_SHAPES);
  2451. _Analysis_assume_(uShapes <= BC7_MAX_SHAPES);
  2452. const size_t uNumRots = size_t(1) << ms_aInfo[EP.uMode].uRotationBits;
  2453. const size_t uNumIdxMode = size_t(1) << ms_aInfo[EP.uMode].uIndexModeBits;
  2454. // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
  2455. // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
  2456. const size_t uItems = std::max<size_t>(1, uShapes >> 2);
  2457. float afRoughMSE[BC7_MAX_SHAPES];
  2458. size_t auShape[BC7_MAX_SHAPES];
  2459. for (size_t r = 0; r < uNumRots && fMSEBest > 0; ++r)
  2460. {
  2461. switch (r)
  2462. {
  2463. case 1: for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
  2464. case 2: for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
  2465. case 3: for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
  2466. }
  2467. for (size_t im = 0; im < uNumIdxMode && fMSEBest > 0; ++im)
  2468. {
  2469. // pick the best uItems shapes and refine these.
  2470. for (size_t s = 0; s < uShapes; s++)
  2471. {
  2472. afRoughMSE[s] = RoughMSE(&EP, s, im);
  2473. auShape[s] = s;
  2474. }
  2475. // Bubble up the first uItems items
  2476. for (size_t i = 0; i < uItems; i++)
  2477. {
  2478. for (size_t j = i + 1; j < uShapes; j++)
  2479. {
  2480. if (afRoughMSE[i] > afRoughMSE[j])
  2481. {
  2482. std::swap(afRoughMSE[i], afRoughMSE[j]);
  2483. std::swap(auShape[i], auShape[j]);
  2484. }
  2485. }
  2486. }
  2487. for (size_t i = 0; i < uItems && fMSEBest > 0; i++)
  2488. {
  2489. float fMSE = Refine(&EP, auShape[i], r, im);
  2490. if (fMSE < fMSEBest)
  2491. {
  2492. final = *this;
  2493. fMSEBest = fMSE;
  2494. }
  2495. }
  2496. }
  2497. switch (r)
  2498. {
  2499. case 1: for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
  2500. case 2: for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
  2501. case 3: for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
  2502. }
  2503. }
  2504. }
  2505. *this = final;
  2506. }
  2507. //-------------------------------------------------------------------------------------
  2508. _Use_decl_annotations_
  2509. void D3DX_BC7::GeneratePaletteQuantized(const EncodeParams* pEP, size_t uIndexMode, const LDREndPntPair& endPts, LDRColorA aPalette[]) const
  2510. {
  2511. assert(pEP);
  2512. const size_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
  2513. const size_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
  2514. const size_t uNumIndices = size_t(1) << uIndexPrec;
  2515. const size_t uNumIndices2 = size_t(1) << uIndexPrec2;
  2516. assert(uNumIndices > 0 && uNumIndices2 > 0);
  2517. _Analysis_assume_(uNumIndices > 0 && uNumIndices2 > 0);
  2518. assert((uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES));
  2519. _Analysis_assume_((uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES));
  2520. LDRColorA a = Unquantize(endPts.A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
  2521. LDRColorA b = Unquantize(endPts.B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
  2522. if(uIndexPrec2 == 0)
  2523. {
  2524. for(register size_t i = 0; i < uNumIndices; i++)
  2525. LDRColorA::Interpolate(a, b, i, i, uIndexPrec, uIndexPrec, aPalette[i]);
  2526. }
  2527. else
  2528. {
  2529. for(register size_t i = 0; i < uNumIndices; i++)
  2530. LDRColorA::InterpolateRGB(a, b, i, uIndexPrec, aPalette[i]);
  2531. for(register size_t i = 0; i < uNumIndices2; i++)
  2532. LDRColorA::InterpolateA(a, b, i, uIndexPrec2, aPalette[i]);
  2533. }
  2534. }
  2535. _Use_decl_annotations_
  2536. float D3DX_BC7::PerturbOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
  2537. const LDREndPntPair &oldEndPts, LDREndPntPair &newEndPts, float fOldErr, uint8_t do_b) const
  2538. {
  2539. assert( pEP );
  2540. const int prec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
  2541. LDREndPntPair tmp_endPts = newEndPts = oldEndPts;
  2542. float fMinErr = fOldErr;
  2543. uint8_t* pnew_c = (do_b ? &newEndPts.B[ch] : &newEndPts.A[ch]);
  2544. uint8_t* ptmp_c = (do_b ? &tmp_endPts.B[ch] : &tmp_endPts.A[ch]);
  2545. // do a logarithmic search for the best error for this endpoint (which)
  2546. for(int step = 1 << (prec-1); step; step >>= 1)
  2547. {
  2548. bool bImproved = false;
  2549. int beststep = 0;
  2550. for(int sign = -1; sign <= 1; sign += 2)
  2551. {
  2552. int tmp = int(*pnew_c) + sign * step;
  2553. if(tmp < 0 || tmp >= (1 << prec))
  2554. continue;
  2555. else
  2556. *ptmp_c = (uint8_t) tmp;
  2557. float fTotalErr = MapColors(pEP, aColors, np, uIndexMode, tmp_endPts, fMinErr);
  2558. if(fTotalErr < fMinErr)
  2559. {
  2560. bImproved = true;
  2561. fMinErr = fTotalErr;
  2562. beststep = sign * step;
  2563. }
  2564. }
  2565. // if this was an improvement, move the endpoint and continue search from there
  2566. if(bImproved)
  2567. *pnew_c = uint8_t(int(*pnew_c) + beststep);
  2568. }
  2569. return fMinErr;
  2570. }
  2571. // perturb the endpoints at least -3 to 3.
  2572. // always ensure endpoint ordering is preserved (no need to overlap the scan)
  2573. _Use_decl_annotations_
  2574. void D3DX_BC7::Exhaustive(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
  2575. float& fOrgErr, LDREndPntPair& optEndPt) const
  2576. {
  2577. assert( pEP );
  2578. const uint8_t uPrec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
  2579. LDREndPntPair tmpEndPt;
  2580. if(fOrgErr == 0)
  2581. return;
  2582. int delta = 5;
  2583. // ok figure out the range of A and B
  2584. tmpEndPt = optEndPt;
  2585. int alow = std::max<int>(0, int(optEndPt.A[ch]) - delta);
  2586. int ahigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.A[ch]) + delta);
  2587. int blow = std::max<int>(0, int(optEndPt.B[ch]) - delta);
  2588. int bhigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.B[ch]) + delta);
  2589. int amin = 0;
  2590. int bmin = 0;
  2591. float fBestErr = fOrgErr;
  2592. if (optEndPt.A[ch] <= optEndPt.B[ch])
  2593. {
  2594. // keep a <= b
  2595. for (int a = alow; a <= ahigh; ++a)
  2596. {
  2597. for (int b = std::max<int>(a, blow); b < bhigh; ++b)
  2598. {
  2599. tmpEndPt.A[ch] = (uint8_t)a;
  2600. tmpEndPt.B[ch] = (uint8_t)b;
  2601. float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
  2602. if (fErr < fBestErr)
  2603. {
  2604. amin = a;
  2605. bmin = b;
  2606. fBestErr = fErr;
  2607. }
  2608. }
  2609. }
  2610. }
  2611. else
  2612. {
  2613. // keep b <= a
  2614. for (int b = blow; b < bhigh; ++b)
  2615. {
  2616. for (int a = std::max<int>(b, alow); a <= ahigh; ++a)
  2617. {
  2618. tmpEndPt.A[ch] = (uint8_t)a;
  2619. tmpEndPt.B[ch] = (uint8_t)b;
  2620. float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
  2621. if (fErr < fBestErr)
  2622. {
  2623. amin = a;
  2624. bmin = b;
  2625. fBestErr = fErr;
  2626. }
  2627. }
  2628. }
  2629. }
  2630. if (fBestErr < fOrgErr)
  2631. {
  2632. optEndPt.A[ch] = (uint8_t)amin;
  2633. optEndPt.B[ch] = (uint8_t)bmin;
  2634. fOrgErr = fBestErr;
  2635. }
  2636. }
  2637. _Use_decl_annotations_
  2638. void D3DX_BC7::OptimizeOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode,
  2639. float fOrgErr, const LDREndPntPair& org, LDREndPntPair& opt) const
  2640. {
  2641. assert(pEP);
  2642. float fOptErr = fOrgErr;
  2643. opt = org;
  2644. LDREndPntPair new_a, new_b;
  2645. LDREndPntPair newEndPts;
  2646. uint8_t do_b;
  2647. // now optimize each channel separately
  2648. for (size_t ch = 0; ch < BC7_NUM_CHANNELS; ++ch)
  2649. {
  2650. if (ms_aInfo[pEP->uMode].RGBAPrecWithP[ch] == 0)
  2651. continue;
  2652. // figure out which endpoint when perturbed gives the most improvement and start there
  2653. // if we just alternate, we can easily end up in a local minima
  2654. float fErr0 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_a, fOptErr, 0); // perturb endpt A
  2655. float fErr1 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_b, fOptErr, 1); // perturb endpt B
  2656. uint8_t& copt_a = opt.A[ch];
  2657. uint8_t& copt_b = opt.B[ch];
  2658. uint8_t& cnew_a = new_a.A[ch];
  2659. uint8_t& cnew_b = new_a.B[ch];
  2660. if(fErr0 < fErr1)
  2661. {
  2662. if(fErr0 >= fOptErr)
  2663. continue;
  2664. copt_a = cnew_a;
  2665. fOptErr = fErr0;
  2666. do_b = 1; // do B next
  2667. }
  2668. else
  2669. {
  2670. if(fErr1 >= fOptErr)
  2671. continue;
  2672. copt_b = cnew_b;
  2673. fOptErr = fErr1;
  2674. do_b = 0; // do A next
  2675. }
  2676. // now alternate endpoints and keep trying until there is no improvement
  2677. for( ; ; )
  2678. {
  2679. float fErr = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, newEndPts, fOptErr, do_b);
  2680. if(fErr >= fOptErr)
  2681. break;
  2682. if(do_b == 0)
  2683. copt_a = cnew_a;
  2684. else
  2685. copt_b = cnew_b;
  2686. fOptErr = fErr;
  2687. do_b = 1 - do_b; // now move the other endpoint
  2688. }
  2689. }
  2690. // finally, do a small exhaustive search around what we think is the global minima to be sure
  2691. for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
  2692. Exhaustive(pEP, aColors, np, uIndexMode, ch, fOptErr, opt);
  2693. }
  2694. _Use_decl_annotations_
  2695. void D3DX_BC7::OptimizeEndPoints(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, const float afOrgErr[],
  2696. const LDREndPntPair aOrgEndPts[], LDREndPntPair aOptEndPts[]) const
  2697. {
  2698. assert( pEP );
  2699. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2700. assert( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
  2701. _Analysis_assume_( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
  2702. LDRColorA aPixels[NUM_PIXELS_PER_BLOCK];
  2703. for(size_t p = 0; p <= uPartitions; ++p)
  2704. {
  2705. // collect the pixels in the region
  2706. size_t np = 0;
  2707. for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  2708. if(g_aPartitionTable[uPartitions][uShape][i] == p)
  2709. aPixels[np++] = pEP->aLDRPixels[i];
  2710. OptimizeOne(pEP, aPixels, np, uIndexMode, afOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
  2711. }
  2712. }
  2713. _Use_decl_annotations_
  2714. void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, LDREndPntPair endPts[], size_t aIndices[], size_t aIndices2[],
  2715. float afTotErr[]) const
  2716. {
  2717. assert( pEP );
  2718. assert( uShape < BC7_MAX_SHAPES );
  2719. _Analysis_assume_( uShape < BC7_MAX_SHAPES );
  2720. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2721. assert( uPartitions < BC7_MAX_REGIONS );
  2722. _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
  2723. const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
  2724. const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
  2725. const uint8_t uNumIndices = 1 << uIndexPrec;
  2726. const uint8_t uNumIndices2 = 1 << uIndexPrec2;
  2727. assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
  2728. _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
  2729. const uint8_t uHighestIndexBit = uNumIndices >> 1;
  2730. const uint8_t uHighestIndexBit2 = uNumIndices2 >> 1;
  2731. LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
  2732. // build list of possibles
  2733. for(size_t p = 0; p <= uPartitions; p++)
  2734. {
  2735. GeneratePaletteQuantized(pEP, uIndexMode, endPts[p], aPalette[p]);
  2736. afTotErr[p] = 0;
  2737. }
  2738. for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2739. {
  2740. uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
  2741. assert( uRegion < BC7_MAX_REGIONS );
  2742. _Analysis_assume_( uRegion < BC7_MAX_REGIONS );
  2743. afTotErr[uRegion] += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2, &(aIndices[i]), &(aIndices2[i]));
  2744. }
  2745. // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
  2746. if(uIndexPrec2 == 0)
  2747. {
  2748. for (register size_t p = 0; p <= uPartitions; p++)
  2749. {
  2750. if (aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
  2751. {
  2752. std::swap(endPts[p].A, endPts[p].B);
  2753. for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2754. if (g_aPartitionTable[uPartitions][uShape][i] == p)
  2755. aIndices[i] = uNumIndices - 1 - aIndices[i];
  2756. }
  2757. assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
  2758. }
  2759. }
  2760. else
  2761. {
  2762. for (register size_t p = 0; p <= uPartitions; p++)
  2763. {
  2764. if (aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
  2765. {
  2766. std::swap(endPts[p].A.r, endPts[p].B.r);
  2767. std::swap(endPts[p].A.g, endPts[p].B.g);
  2768. std::swap(endPts[p].A.b, endPts[p].B.b);
  2769. for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2770. if(g_aPartitionTable[uPartitions][uShape][i] == p)
  2771. aIndices[i] = uNumIndices - 1 - aIndices[i];
  2772. }
  2773. assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
  2774. if (aIndices2[0] & uHighestIndexBit2)
  2775. {
  2776. std::swap(endPts[p].A.a, endPts[p].B.a);
  2777. for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2778. aIndices2[i] = uNumIndices2 - 1 - aIndices2[i];
  2779. }
  2780. assert((aIndices2[0] & uHighestIndexBit2) == 0);
  2781. }
  2782. }
  2783. }
  2784. _Use_decl_annotations_
  2785. void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode, const LDREndPntPair aEndPts[], const size_t aIndex[], const size_t aIndex2[])
  2786. {
  2787. assert( pEP );
  2788. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2789. assert( uPartitions < BC7_MAX_REGIONS );
  2790. _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
  2791. const size_t uPBits = ms_aInfo[pEP->uMode].uPBits;
  2792. const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
  2793. const size_t uIndexPrec2 = ms_aInfo[pEP->uMode].uIndexPrec2;
  2794. const LDRColorA RGBAPrec = ms_aInfo[pEP->uMode].RGBAPrec;
  2795. const LDRColorA RGBAPrecWithP = ms_aInfo[pEP->uMode].RGBAPrecWithP;
  2796. register size_t i;
  2797. size_t uStartBit = 0;
  2798. SetBits(uStartBit, pEP->uMode, 0);
  2799. SetBits(uStartBit, 1, 1);
  2800. SetBits(uStartBit, ms_aInfo[pEP->uMode].uRotationBits, static_cast<uint8_t>( uRotation ));
  2801. SetBits(uStartBit, ms_aInfo[pEP->uMode].uIndexModeBits, static_cast<uint8_t>( uIndexMode ));
  2802. SetBits(uStartBit, ms_aInfo[pEP->uMode].uPartitionBits, static_cast<uint8_t>( uShape ));
  2803. if(uPBits)
  2804. {
  2805. const size_t uNumEP = size_t(1 + uPartitions) << 1;
  2806. uint8_t aPVote[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
  2807. uint8_t aCount[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
  2808. for(uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
  2809. {
  2810. uint8_t ep = 0;
  2811. for(i = 0; i <= uPartitions; i++)
  2812. {
  2813. if(RGBAPrec[ch] == RGBAPrecWithP[ch])
  2814. {
  2815. SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch]);
  2816. SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch]);
  2817. }
  2818. else
  2819. {
  2820. SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] >> 1);
  2821. SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] >> 1);
  2822. size_t idx = ep++ * uPBits / uNumEP;
  2823. assert(idx < (BC7_MAX_REGIONS << 1));
  2824. _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1));
  2825. aPVote[idx] += aEndPts[i].A[ch] & 0x01;
  2826. aCount[idx]++;
  2827. idx = ep++ * uPBits / uNumEP;
  2828. assert(idx < (BC7_MAX_REGIONS << 1));
  2829. _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1));
  2830. aPVote[idx] += aEndPts[i].B[ch] & 0x01;
  2831. aCount[idx]++;
  2832. }
  2833. }
  2834. }
  2835. for(i = 0; i < uPBits; i++)
  2836. {
  2837. SetBits(uStartBit, 1, aPVote[i] > (aCount[i] >> 1) ? 1 : 0);
  2838. }
  2839. }
  2840. else
  2841. {
  2842. for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
  2843. {
  2844. for(i = 0; i <= uPartitions; i++)
  2845. {
  2846. SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] );
  2847. SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] );
  2848. }
  2849. }
  2850. }
  2851. const size_t* aI1 = uIndexMode ? aIndex2 : aIndex;
  2852. const size_t* aI2 = uIndexMode ? aIndex : aIndex2;
  2853. for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2854. {
  2855. if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, uShape, i))
  2856. SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aI1[i] ));
  2857. else
  2858. SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aI1[i] ));
  2859. }
  2860. if(uIndexPrec2)
  2861. for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2862. SetBits(uStartBit, i ? uIndexPrec2 : uIndexPrec2 - 1, static_cast<uint8_t>( aI2[i] ));
  2863. assert(uStartBit == 128);
  2864. }
  2865. _Use_decl_annotations_
  2866. float D3DX_BC7::Refine(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode)
  2867. {
  2868. assert( pEP );
  2869. assert( uShape < BC7_MAX_SHAPES );
  2870. _Analysis_assume_( uShape < BC7_MAX_SHAPES );
  2871. const LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
  2872. const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2873. assert( uPartitions < BC7_MAX_REGIONS );
  2874. _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
  2875. LDREndPntPair aOrgEndPts[BC7_MAX_REGIONS];
  2876. LDREndPntPair aOptEndPts[BC7_MAX_REGIONS];
  2877. size_t aOrgIdx[NUM_PIXELS_PER_BLOCK];
  2878. size_t aOrgIdx2[NUM_PIXELS_PER_BLOCK];
  2879. size_t aOptIdx[NUM_PIXELS_PER_BLOCK];
  2880. size_t aOptIdx2[NUM_PIXELS_PER_BLOCK];
  2881. float aOrgErr[BC7_MAX_REGIONS];
  2882. float aOptErr[BC7_MAX_REGIONS];
  2883. for(register size_t p = 0; p <= uPartitions; p++)
  2884. {
  2885. aOrgEndPts[p].A = Quantize(aEndPts[p].A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
  2886. aOrgEndPts[p].B = Quantize(aEndPts[p].B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
  2887. }
  2888. AssignIndices(pEP, uShape, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2, aOrgErr);
  2889. OptimizeEndPoints(pEP, uShape, uIndexMode, aOrgErr, aOrgEndPts, aOptEndPts);
  2890. AssignIndices(pEP, uShape, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2, aOptErr);
  2891. float fOrgTotErr = 0, fOptTotErr = 0;
  2892. for(register size_t p = 0; p <= uPartitions; p++)
  2893. {
  2894. fOrgTotErr += aOrgErr[p];
  2895. fOptTotErr += aOptErr[p];
  2896. }
  2897. if(fOptTotErr < fOrgTotErr)
  2898. {
  2899. EmitBlock(pEP, uShape, uRotation, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2);
  2900. return fOptTotErr;
  2901. }
  2902. else
  2903. {
  2904. EmitBlock(pEP, uShape, uRotation, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2);
  2905. return fOrgTotErr;
  2906. }
  2907. }
  2908. _Use_decl_annotations_
  2909. float D3DX_BC7::MapColors(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, const LDREndPntPair& endPts, float fMinErr) const
  2910. {
  2911. assert( pEP );
  2912. const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
  2913. const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
  2914. LDRColorA aPalette[BC7_MAX_INDICES];
  2915. float fTotalErr = 0;
  2916. GeneratePaletteQuantized(pEP, uIndexMode, endPts, aPalette);
  2917. for(register size_t i = 0; i < np; ++i)
  2918. {
  2919. fTotalErr += ComputeError(aColors[i], aPalette, uIndexPrec, uIndexPrec2);
  2920. if(fTotalErr > fMinErr) // check for early exit
  2921. {
  2922. fTotalErr = FLT_MAX;
  2923. break;
  2924. }
  2925. }
  2926. return fTotalErr;
  2927. }
  2928. _Use_decl_annotations_
  2929. float D3DX_BC7::RoughMSE(EncodeParams* pEP, size_t uShape, size_t uIndexMode)
  2930. {
  2931. assert( pEP );
  2932. assert( uShape < BC7_MAX_SHAPES );
  2933. _Analysis_assume_( uShape < BC7_MAX_SHAPES );
  2934. LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
  2935. const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
  2936. assert( uPartitions < BC7_MAX_REGIONS );
  2937. _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
  2938. const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
  2939. const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
  2940. const uint8_t uNumIndices = 1 << uIndexPrec;
  2941. const uint8_t uNumIndices2 = 1 << uIndexPrec2;
  2942. size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
  2943. LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
  2944. for(size_t p = 0; p <= uPartitions; p++)
  2945. {
  2946. size_t np = 0;
  2947. for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  2948. {
  2949. if (g_aPartitionTable[uPartitions][uShape][i] == p)
  2950. {
  2951. auPixIdx[np++] = i;
  2952. }
  2953. }
  2954. // handle simple cases
  2955. assert(np > 0);
  2956. if(np == 1)
  2957. {
  2958. aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
  2959. aEndPts[p].B = pEP->aLDRPixels[auPixIdx[0]];
  2960. continue;
  2961. }
  2962. else if(np == 2)
  2963. {
  2964. aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
  2965. aEndPts[p].B = pEP->aLDRPixels[auPixIdx[1]];
  2966. continue;
  2967. }
  2968. if(uIndexPrec2 == 0)
  2969. {
  2970. HDRColorA epA, epB;
  2971. OptimizeRGBA(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
  2972. epA.Clamp(0.0f, 1.0f);
  2973. epB.Clamp(0.0f, 1.0f);
  2974. epA *= 255.0f;
  2975. epB *= 255.0f;
  2976. aEndPts[p].A = epA.ToLDRColorA();
  2977. aEndPts[p].B = epB.ToLDRColorA();
  2978. }
  2979. else
  2980. {
  2981. uint8_t uMinAlpha = 255, uMaxAlpha = 0;
  2982. for (register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  2983. {
  2984. uMinAlpha = std::min<uint8_t>(uMinAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
  2985. uMaxAlpha = std::max<uint8_t>(uMaxAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
  2986. }
  2987. HDRColorA epA, epB;
  2988. OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
  2989. epA.Clamp(0.0f, 1.0f);
  2990. epB.Clamp(0.0f, 1.0f);
  2991. epA *= 255.0f;
  2992. epB *= 255.0f;
  2993. aEndPts[p].A = epA.ToLDRColorA();
  2994. aEndPts[p].B = epB.ToLDRColorA();
  2995. aEndPts[p].A.a = uMinAlpha;
  2996. aEndPts[p].B.a = uMaxAlpha;
  2997. }
  2998. }
  2999. if(uIndexPrec2 == 0)
  3000. {
  3001. for(size_t p = 0; p <= uPartitions; p++)
  3002. for(register size_t i = 0; i < uNumIndices; i++)
  3003. LDRColorA::Interpolate(aEndPts[p].A, aEndPts[p].B, i, i, uIndexPrec, uIndexPrec, aPalette[p][i]);
  3004. }
  3005. else
  3006. {
  3007. for(size_t p = 0; p <= uPartitions; p++)
  3008. {
  3009. for(register size_t i = 0; i < uNumIndices; i++)
  3010. LDRColorA::InterpolateRGB(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec, aPalette[p][i]);
  3011. for(register size_t i = 0; i < uNumIndices2; i++)
  3012. LDRColorA::InterpolateA(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec2, aPalette[p][i]);
  3013. }
  3014. }
  3015. float fTotalErr = 0;
  3016. for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
  3017. {
  3018. uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
  3019. fTotalErr += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2);
  3020. }
  3021. return fTotalErr;
  3022. }
  3023. //=====================================================================================
  3024. // Entry points
  3025. //=====================================================================================
  3026. //-------------------------------------------------------------------------------------
  3027. // BC6H Compression
  3028. //-------------------------------------------------------------------------------------
  3029. _Use_decl_annotations_
  3030. void DirectX::D3DXDecodeBC6HU(XMVECTOR *pColor, const uint8_t *pBC)
  3031. {
  3032. assert(pColor && pBC);
  3033. static_assert(sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes");
  3034. reinterpret_cast<const D3DX_BC6H*>(pBC)->Decode(false, reinterpret_cast<HDRColorA*>(pColor));
  3035. }
  3036. _Use_decl_annotations_
  3037. void DirectX::D3DXDecodeBC6HS(XMVECTOR *pColor, const uint8_t *pBC)
  3038. {
  3039. assert( pColor && pBC );
  3040. static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
  3041. reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(true, reinterpret_cast<HDRColorA*>(pColor));
  3042. }
  3043. _Use_decl_annotations_
  3044. void DirectX::D3DXEncodeBC6HU(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
  3045. {
  3046. UNREFERENCED_PARAMETER(flags);
  3047. assert(pBC && pColor);
  3048. static_assert(sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes");
  3049. reinterpret_cast<D3DX_BC6H*>(pBC)->Encode(false, reinterpret_cast<const HDRColorA*>(pColor));
  3050. }
  3051. _Use_decl_annotations_
  3052. void DirectX::D3DXEncodeBC6HS(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
  3053. {
  3054. UNREFERENCED_PARAMETER(flags);
  3055. assert(pBC && pColor);
  3056. static_assert(sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes");
  3057. reinterpret_cast<D3DX_BC6H*>(pBC)->Encode(true, reinterpret_cast<const HDRColorA*>(pColor));
  3058. }
  3059. //-------------------------------------------------------------------------------------
  3060. // BC7 Compression
  3061. //-------------------------------------------------------------------------------------
  3062. _Use_decl_annotations_
  3063. void DirectX::D3DXDecodeBC7(Color (&color)[4][4], const uint8_t *pBC) // ESENTHEL CHANGED
  3064. {
  3065. static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
  3066. reinterpret_cast< const D3DX_BC7* >( pBC )->Decode(color); // ESENTHEL CHANGED
  3067. }
  3068. _Use_decl_annotations_
  3069. void DirectX::D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
  3070. {
  3071. assert(pBC && pColor);
  3072. static_assert(sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes");
  3073. reinterpret_cast<D3DX_BC7*>(pBC)->Encode(flags, reinterpret_cast<const HDRColorA*>(pColor));
  3074. }