basisu_enc.h 104 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029
  1. // basisu_enc.h
  2. // Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. #pragma once
  16. #include "../transcoder/basisu.h"
  17. #include "../transcoder/basisu_transcoder_internal.h"
  18. #include <mutex>
  19. #include <atomic>
  20. #include <condition_variable>
  21. #include <functional>
  22. #include <thread>
  23. #include <unordered_map>
  24. #include <ostream>
  25. #if !defined(_WIN32) || defined(__MINGW32__)
  26. #include <libgen.h>
  27. #endif
  28. // This module is really just a huge grab bag of classes and helper functions needed by the encoder.
  29. // If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time.
  30. #define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0)
  31. #if BASISU_SUPPORT_SSE
  32. // Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors.
  33. extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16);
  34. #endif
  35. namespace basisu
  36. {
  37. extern uint8_t g_hamming_dist[256];
  38. extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8];
  39. // true if basisu_encoder_init() has been called and returned.
  40. extern bool g_library_initialized;
  41. // Encoder library initialization.
  42. // This function MUST be called before encoding anything!
  43. // Returns false if library initialization fails.
  44. bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
  45. void basisu_encoder_deinit();
  46. // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1
  47. extern void detect_sse41();
  48. #if BASISU_SUPPORT_SSE
  49. extern bool g_cpu_supports_sse41;
  50. #else
  51. const bool g_cpu_supports_sse41 = false;
  52. #endif
  53. void error_vprintf(const char* pFmt, va_list args);
  54. void error_printf(const char *pFmt, ...);
  55. // Helpers
  56. inline uint8_t clamp255(int32_t i)
  57. {
  58. return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
  59. }
  60. inline int left_shift32(int val, int shift)
  61. {
  62. assert((shift >= 0) && (shift < 32));
  63. return static_cast<int>(static_cast<uint32_t>(val) << shift);
  64. }
  65. inline uint32_t left_shift32(uint32_t val, int shift)
  66. {
  67. assert((shift >= 0) && (shift < 32));
  68. return val << shift;
  69. }
  70. inline int32_t clampi(int32_t value, int32_t low, int32_t high)
  71. {
  72. if (value < low)
  73. value = low;
  74. else if (value > high)
  75. value = high;
  76. return value;
  77. }
  78. inline uint8_t mul_8(uint32_t v, uint32_t a)
  79. {
  80. v = v * a + 128;
  81. return (uint8_t)((v + (v >> 8)) >> 8);
  82. }
  83. inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
  84. {
  85. assert(codesize <= 64);
  86. uint64_t bits = 0;
  87. uint32_t total_bits = 0;
  88. while (total_bits < codesize)
  89. {
  90. uint32_t byte_bit_offset = bit_offset & 7;
  91. uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);
  92. uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
  93. byte_bits &= ((1 << bits_to_read) - 1);
  94. bits |= ((uint64_t)(byte_bits) << total_bits);
  95. total_bits += bits_to_read;
  96. bit_offset += bits_to_read;
  97. }
  98. return bits;
  99. }
  100. inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
  101. {
  102. assert(codesize <= 32);
  103. uint32_t bits = 0;
  104. uint32_t total_bits = 0;
  105. while (total_bits < codesize)
  106. {
  107. uint32_t byte_bit_offset = bit_offset & 7;
  108. uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);
  109. uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
  110. byte_bits &= ((1 << bits_to_read) - 1);
  111. bits |= (byte_bits << total_bits);
  112. total_bits += bits_to_read;
  113. bit_offset += bits_to_read;
  114. }
  115. return bits;
  116. }
  117. // Open interval
  118. inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
  119. inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
  120. // Closed interval
  121. inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
  122. inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
  123. inline uint32_t clz(uint32_t x)
  124. {
  125. if (!x)
  126. return 32;
  127. uint32_t n = 0;
  128. while ((x & 0x80000000) == 0)
  129. {
  130. x <<= 1u;
  131. n++;
  132. }
  133. return n;
  134. }
  135. bool string_begins_with(const std::string& str, const char* pPhrase);
  136. // Hashing
  137. inline uint32_t bitmix32c(uint32_t v)
  138. {
  139. v = (v + 0x7ed55d16) + (v << 12);
  140. v = (v ^ 0xc761c23c) ^ (v >> 19);
  141. v = (v + 0x165667b1) + (v << 5);
  142. v = (v + 0xd3a2646c) ^ (v << 9);
  143. v = (v + 0xfd7046c5) + (v << 3);
  144. v = (v ^ 0xb55a4f09) ^ (v >> 16);
  145. return v;
  146. }
  147. inline uint32_t bitmix32(uint32_t v)
  148. {
  149. v -= (v << 6);
  150. v ^= (v >> 17);
  151. v -= (v << 9);
  152. v ^= (v << 4);
  153. v -= (v << 3);
  154. v ^= (v << 10);
  155. v ^= (v >> 15);
  156. return v;
  157. }
  158. inline uint32_t wang_hash(uint32_t seed)
  159. {
  160. seed = (seed ^ 61) ^ (seed >> 16);
  161. seed *= 9;
  162. seed = seed ^ (seed >> 4);
  163. seed *= 0x27d4eb2d;
  164. seed = seed ^ (seed >> 15);
  165. return seed;
  166. }
  167. uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);
  168. template <typename Key>
  169. struct bit_hasher
  170. {
  171. std::size_t operator()(const Key& k) const
  172. {
  173. return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k));
  174. }
  175. };
  176. class running_stat
  177. {
  178. public:
  179. running_stat() { clear(); }
  180. void clear()
  181. {
  182. m_n = 0;
  183. m_total = 0;
  184. m_old_m = 0;
  185. m_new_m = 0;
  186. m_old_s = 0;
  187. m_new_s = 0;
  188. m_min = 0;
  189. m_max = 0;
  190. }
  191. void push(double x)
  192. {
  193. m_n++;
  194. m_total += x;
  195. if (m_n == 1)
  196. {
  197. m_old_m = m_new_m = x;
  198. m_old_s = 0.0;
  199. m_min = x;
  200. m_max = x;
  201. }
  202. else
  203. {
  204. // See Knuth TAOCP vol 2, 3rd edition, page 232
  205. m_new_m = m_old_m + (x - m_old_m) / m_n;
  206. m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m);
  207. m_old_m = m_new_m;
  208. m_old_s = m_new_s;
  209. m_min = basisu::minimum(x, m_min);
  210. m_max = basisu::maximum(x, m_max);
  211. }
  212. }
  213. uint32_t get_num() const
  214. {
  215. return m_n;
  216. }
  217. double get_total() const
  218. {
  219. return m_total;
  220. }
  221. double get_mean() const
  222. {
  223. return (m_n > 0) ? m_new_m : 0.0;
  224. }
  225. // Returns sample variance
  226. double get_variance() const
  227. {
  228. return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0);
  229. }
  230. double get_std_dev() const
  231. {
  232. return sqrt(get_variance());
  233. }
  234. double get_min() const
  235. {
  236. return m_min;
  237. }
  238. double get_max() const
  239. {
  240. return m_max;
  241. }
  242. private:
  243. uint32_t m_n;
  244. double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max;
  245. };
  246. // Linear algebra
  247. template <uint32_t N, typename T>
  248. class vec
  249. {
  250. protected:
  251. T m_v[N];
  252. public:
  253. enum { num_elements = N };
  254. typedef T scalar_type;
  255. inline vec() { }
  256. inline vec(eZero) { set_zero(); }
  257. explicit inline vec(T val) { set(val); }
  258. inline vec(T v0, T v1) { set(v0, v1); }
  259. inline vec(T v0, T v1, T v2) { set(v0, v1, v2); }
  260. inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); }
  261. inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; }
  262. template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); }
  263. inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; }
  264. inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; }
  265. inline T getX() const { return m_v[0]; }
  266. inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; }
  267. inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; }
  268. inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; }
  269. inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; }
  270. inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
  271. inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
  272. inline void clear() { set_zero(); }
  273. template <uint32_t OtherN, typename OtherT>
  274. inline vec &set(const vec<OtherN, OtherT> &other)
  275. {
  276. uint32_t i;
  277. if ((const void *)(&other) == (const void *)(this))
  278. return *this;
  279. const uint32_t m = minimum(OtherN, N);
  280. for (i = 0; i < m; i++)
  281. m_v[i] = static_cast<T>(other[i]);
  282. for (; i < N; i++)
  283. m_v[i] = 0;
  284. return *this;
  285. }
  286. inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; }
  287. inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; }
  288. inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; }
  289. inline vec &set(T v0, T v1)
  290. {
  291. m_v[0] = v0;
  292. if (N >= 2)
  293. {
  294. m_v[1] = v1;
  295. clear_elements(2, N);
  296. }
  297. return *this;
  298. }
  299. inline vec &set(T v0, T v1, T v2)
  300. {
  301. m_v[0] = v0;
  302. if (N >= 2)
  303. {
  304. m_v[1] = v1;
  305. if (N >= 3)
  306. {
  307. m_v[2] = v2;
  308. clear_elements(3, N);
  309. }
  310. }
  311. return *this;
  312. }
  313. inline vec &set(T v0, T v1, T v2, T v3)
  314. {
  315. m_v[0] = v0;
  316. if (N >= 2)
  317. {
  318. m_v[1] = v1;
  319. if (N >= 3)
  320. {
  321. m_v[2] = v2;
  322. if (N >= 4)
  323. {
  324. m_v[3] = v3;
  325. clear_elements(5, N);
  326. }
  327. }
  328. }
  329. return *this;
  330. }
  331. inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; }
  332. template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; }
  333. inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); }
  334. inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); }
  335. inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; }
  336. inline vec operator+ () const { return *this; }
  337. inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; }
  338. inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; }
  339. inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; }
  340. inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; }
  341. inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; }
  342. inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; }
  343. friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; }
  344. friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; }
  345. friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; }
  346. friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; }
  347. friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; }
  348. friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; }
  349. static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; }
  350. inline T dot(const vec &rhs) const { return dot_product(*this, rhs); }
  351. inline T norm() const { return dot_product(*this, *this); }
  352. inline T length() const { return sqrt(norm()); }
  353. inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; }
  354. inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; }
  355. inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }
  356. inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }
  357. inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
  358. inline vec &clamp(T l, T h)
  359. {
  360. for (uint32_t i = 0; i < N; i++)
  361. m_v[i] = basisu::clamp(m_v[i], l, h);
  362. return *this;
  363. }
  364. static vec component_min(const vec& a, const vec& b)
  365. {
  366. vec res;
  367. for (uint32_t i = 0; i < N; i++)
  368. res[i] = minimum(a[i], b[i]);
  369. return res;
  370. }
  371. static vec component_max(const vec& a, const vec& b)
  372. {
  373. vec res;
  374. for (uint32_t i = 0; i < N; i++)
  375. res[i] = maximum(a[i], b[i]);
  376. return res;
  377. }
  378. };
  379. typedef vec<4, double> vec4D;
  380. typedef vec<3, double> vec3D;
  381. typedef vec<2, double> vec2D;
  382. typedef vec<1, double> vec1D;
  383. typedef vec<4, float> vec4F;
  384. typedef vec<3, float> vec3F;
  385. typedef vec<2, float> vec2F;
  386. typedef vec<1, float> vec1F;
  387. typedef vec<16, float> vec16F;
  388. template <uint32_t Rows, uint32_t Cols, typename T>
  389. class matrix
  390. {
  391. public:
  392. typedef vec<Rows, T> col_vec;
  393. typedef vec<Cols, T> row_vec;
  394. typedef T scalar_type;
  395. enum { rows = Rows, cols = Cols };
  396. protected:
  397. row_vec m_r[Rows];
  398. public:
  399. inline matrix() {}
  400. inline matrix(eZero) { set_zero(); }
  401. inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; }
  402. inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; }
  403. inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
  404. inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
  405. inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; }
  406. inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; }
  407. inline matrix &set_zero()
  408. {
  409. for (uint32_t i = 0; i < Rows; i++)
  410. m_r[i].set_zero();
  411. return *this;
  412. }
  413. inline matrix &set_identity()
  414. {
  415. for (uint32_t i = 0; i < Rows; i++)
  416. {
  417. m_r[i].set_zero();
  418. if (i < Cols)
  419. m_r[i][i] = 1.0f;
  420. }
  421. return *this;
  422. }
  423. };
  424. template<uint32_t N, typename VectorType>
  425. inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix)
  426. {
  427. VectorType axis;
  428. if (N == 1)
  429. axis.set(1.0f);
  430. else
  431. {
  432. for (uint32_t i = 0; i < N; i++)
  433. axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1)));
  434. }
  435. VectorType prev_axis(axis);
  436. // Power iterations
  437. for (uint32_t power_iter = 0; power_iter < 8; power_iter++)
  438. {
  439. VectorType trial_axis;
  440. double max_sum = 0;
  441. for (uint32_t i = 0; i < N; i++)
  442. {
  443. double sum = 0;
  444. for (uint32_t j = 0; j < N; j++)
  445. sum += cmatrix[i][j] * axis[j];
  446. trial_axis[i] = static_cast<float>(sum);
  447. max_sum = maximum(fabs(sum), max_sum);
  448. }
  449. if (max_sum != 0.0f)
  450. trial_axis *= static_cast<float>(1.0f / max_sum);
  451. VectorType delta_axis(prev_axis - trial_axis);
  452. prev_axis = axis;
  453. axis = trial_axis;
  454. if (delta_axis.norm() < .0024f)
  455. break;
  456. }
  457. return axis.normalize_in_place();
  458. }
  459. template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys)
  460. {
  461. for (uint32_t i = 0; i < num_indices; i++)
  462. pIndices[i] = i;
  463. std::sort(
  464. pIndices,
  465. pIndices + num_indices,
  466. [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; }
  467. );
  468. }
  469. // 1-4 byte direct Radix sort.
  470. template <typename T>
  471. T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size)
  472. {
  473. assert(key_ofs < sizeof(T));
  474. assert((key_size >= 1) && (key_size <= 4));
  475. uint32_t hist[256 * 4];
  476. memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
  477. #define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs))
  478. if (key_size == 4)
  479. {
  480. T* p = pBuf0;
  481. T* q = pBuf0 + num_vals;
  482. for (; p != q; p++)
  483. {
  484. const uint32_t key = BASISU_GET_KEY(p);
  485. hist[key & 0xFF]++;
  486. hist[256 + ((key >> 8) & 0xFF)]++;
  487. hist[512 + ((key >> 16) & 0xFF)]++;
  488. hist[768 + ((key >> 24) & 0xFF)]++;
  489. }
  490. }
  491. else if (key_size == 3)
  492. {
  493. T* p = pBuf0;
  494. T* q = pBuf0 + num_vals;
  495. for (; p != q; p++)
  496. {
  497. const uint32_t key = BASISU_GET_KEY(p);
  498. hist[key & 0xFF]++;
  499. hist[256 + ((key >> 8) & 0xFF)]++;
  500. hist[512 + ((key >> 16) & 0xFF)]++;
  501. }
  502. }
  503. else if (key_size == 2)
  504. {
  505. T* p = pBuf0;
  506. T* q = pBuf0 + (num_vals >> 1) * 2;
  507. for (; p != q; p += 2)
  508. {
  509. const uint32_t key0 = BASISU_GET_KEY(p);
  510. const uint32_t key1 = BASISU_GET_KEY(p + 1);
  511. hist[key0 & 0xFF]++;
  512. hist[256 + ((key0 >> 8) & 0xFF)]++;
  513. hist[key1 & 0xFF]++;
  514. hist[256 + ((key1 >> 8) & 0xFF)]++;
  515. }
  516. if (num_vals & 1)
  517. {
  518. const uint32_t key = BASISU_GET_KEY(p);
  519. hist[key & 0xFF]++;
  520. hist[256 + ((key >> 8) & 0xFF)]++;
  521. }
  522. }
  523. else
  524. {
  525. assert(key_size == 1);
  526. if (key_size != 1)
  527. return NULL;
  528. T* p = pBuf0;
  529. T* q = pBuf0 + (num_vals >> 1) * 2;
  530. for (; p != q; p += 2)
  531. {
  532. const uint32_t key0 = BASISU_GET_KEY(p);
  533. const uint32_t key1 = BASISU_GET_KEY(p + 1);
  534. hist[key0 & 0xFF]++;
  535. hist[key1 & 0xFF]++;
  536. }
  537. if (num_vals & 1)
  538. {
  539. const uint32_t key = BASISU_GET_KEY(p);
  540. hist[key & 0xFF]++;
  541. }
  542. }
  543. T* pCur = pBuf0;
  544. T* pNew = pBuf1;
  545. for (uint32_t pass = 0; pass < key_size; pass++)
  546. {
  547. const uint32_t* pHist = &hist[pass << 8];
  548. uint32_t offsets[256];
  549. uint32_t cur_ofs = 0;
  550. for (uint32_t i = 0; i < 256; i += 2)
  551. {
  552. offsets[i] = cur_ofs;
  553. cur_ofs += pHist[i];
  554. offsets[i + 1] = cur_ofs;
  555. cur_ofs += pHist[i + 1];
  556. }
  557. const uint32_t pass_shift = pass << 3;
  558. T* p = pCur;
  559. T* q = pCur + (num_vals >> 1) * 2;
  560. for (; p != q; p += 2)
  561. {
  562. uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;
  563. uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF;
  564. if (c0 == c1)
  565. {
  566. uint32_t dst_offset0 = offsets[c0];
  567. offsets[c0] = dst_offset0 + 2;
  568. pNew[dst_offset0] = p[0];
  569. pNew[dst_offset0 + 1] = p[1];
  570. }
  571. else
  572. {
  573. uint32_t dst_offset0 = offsets[c0]++;
  574. uint32_t dst_offset1 = offsets[c1]++;
  575. pNew[dst_offset0] = p[0];
  576. pNew[dst_offset1] = p[1];
  577. }
  578. }
  579. if (num_vals & 1)
  580. {
  581. uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;
  582. uint32_t dst_offset = offsets[c];
  583. offsets[c] = dst_offset + 1;
  584. pNew[dst_offset] = *p;
  585. }
  586. T* t = pCur;
  587. pCur = pNew;
  588. pNew = t;
  589. }
  590. return pCur;
  591. }
  592. #undef BASISU_GET_KEY
  593. // Very simple job pool with no dependencies.
  594. class job_pool
  595. {
  596. BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool);
  597. public:
  598. // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc.
  599. job_pool(uint32_t num_threads);
  600. ~job_pool();
  601. void add_job(const std::function<void()>& job);
  602. void add_job(std::function<void()>&& job);
  603. void wait_for_all();
  604. size_t get_total_threads() const { return 1 + m_threads.size(); }
  605. private:
  606. std::vector<std::thread> m_threads;
  607. std::vector<std::function<void()> > m_queue;
  608. std::mutex m_mutex;
  609. std::condition_variable m_has_work;
  610. std::condition_variable m_no_more_jobs;
  611. uint32_t m_num_active_jobs;
  612. std::atomic<bool> m_kill_flag;
  613. void job_thread(uint32_t index);
  614. };
  615. // Simple 64-bit color class
  616. class color_rgba_i16
  617. {
  618. public:
  619. union
  620. {
  621. int16_t m_comps[4];
  622. struct
  623. {
  624. int16_t r;
  625. int16_t g;
  626. int16_t b;
  627. int16_t a;
  628. };
  629. };
  630. inline color_rgba_i16()
  631. {
  632. static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4");
  633. }
  634. inline color_rgba_i16(int sr, int sg, int sb, int sa)
  635. {
  636. set(sr, sg, sb, sa);
  637. }
  638. inline color_rgba_i16 &set(int sr, int sg, int sb, int sa)
  639. {
  640. m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX);
  641. m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX);
  642. m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX);
  643. m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX);
  644. return *this;
  645. }
  646. };
  647. class color_rgba
  648. {
  649. public:
  650. union
  651. {
  652. uint8_t m_comps[4];
  653. struct
  654. {
  655. uint8_t r;
  656. uint8_t g;
  657. uint8_t b;
  658. uint8_t a;
  659. };
  660. };
  661. inline color_rgba()
  662. {
  663. static_assert(sizeof(*this) == 4, "sizeof(*this) != 4");
  664. static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)");
  665. }
  666. // Not too hot about this idea.
  667. inline color_rgba(const basist::color32& other) :
  668. r(other.r),
  669. g(other.g),
  670. b(other.b),
  671. a(other.a)
  672. {
  673. }
  674. color_rgba& operator= (const basist::color32& rhs)
  675. {
  676. r = rhs.r;
  677. g = rhs.g;
  678. b = rhs.b;
  679. a = rhs.a;
  680. return *this;
  681. }
  682. inline color_rgba(int y)
  683. {
  684. set(y);
  685. }
  686. inline color_rgba(int y, int na)
  687. {
  688. set(y, na);
  689. }
  690. inline color_rgba(int sr, int sg, int sb, int sa)
  691. {
  692. set(sr, sg, sb, sa);
  693. }
  694. inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa)
  695. {
  696. set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa);
  697. }
  698. inline color_rgba& set_noclamp_y(int y)
  699. {
  700. m_comps[0] = (uint8_t)y;
  701. m_comps[1] = (uint8_t)y;
  702. m_comps[2] = (uint8_t)y;
  703. m_comps[3] = (uint8_t)255;
  704. return *this;
  705. }
  706. inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa)
  707. {
  708. m_comps[0] = (uint8_t)sr;
  709. m_comps[1] = (uint8_t)sg;
  710. m_comps[2] = (uint8_t)sb;
  711. m_comps[3] = (uint8_t)sa;
  712. return *this;
  713. }
  714. inline color_rgba &set(int y)
  715. {
  716. m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
  717. m_comps[1] = m_comps[0];
  718. m_comps[2] = m_comps[0];
  719. m_comps[3] = 255;
  720. return *this;
  721. }
  722. inline color_rgba &set(int y, int na)
  723. {
  724. m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
  725. m_comps[1] = m_comps[0];
  726. m_comps[2] = m_comps[0];
  727. m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255));
  728. return *this;
  729. }
  730. inline color_rgba &set(int sr, int sg, int sb, int sa)
  731. {
  732. m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
  733. m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
  734. m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
  735. m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255));
  736. return *this;
  737. }
  738. inline color_rgba &set_rgb(int sr, int sg, int sb)
  739. {
  740. m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
  741. m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
  742. m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
  743. return *this;
  744. }
  745. inline color_rgba &set_rgb(const color_rgba &other)
  746. {
  747. r = other.r;
  748. g = other.g;
  749. b = other.b;
  750. return *this;
  751. }
  752. inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; }
  753. inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; }
  754. inline void clear()
  755. {
  756. m_comps[0] = 0;
  757. m_comps[1] = 0;
  758. m_comps[2] = 0;
  759. m_comps[3] = 0;
  760. }
  761. inline bool operator== (const color_rgba &rhs) const
  762. {
  763. if (m_comps[0] != rhs.m_comps[0]) return false;
  764. if (m_comps[1] != rhs.m_comps[1]) return false;
  765. if (m_comps[2] != rhs.m_comps[2]) return false;
  766. if (m_comps[3] != rhs.m_comps[3]) return false;
  767. return true;
  768. }
  769. inline bool operator!= (const color_rgba &rhs) const
  770. {
  771. return !(*this == rhs);
  772. }
  773. inline bool operator<(const color_rgba &rhs) const
  774. {
  775. for (int i = 0; i < 4; i++)
  776. {
  777. if (m_comps[i] < rhs.m_comps[i])
  778. return true;
  779. else if (m_comps[i] != rhs.m_comps[i])
  780. return false;
  781. }
  782. return false;
  783. }
  784. inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; }
  785. inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; }
  786. inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); }
  787. inline basist::color32 get_color32() const
  788. {
  789. return basist::color32(r, g, b, a);
  790. }
  791. static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }
  792. static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }
  793. };
  794. typedef basisu::vector<color_rgba> color_rgba_vec;
  795. const color_rgba g_black_color(0, 0, 0, 255);
  796. const color_rgba g_black_trans_color(0, 0, 0, 0);
  797. const color_rgba g_white_color(255, 255, 255, 255);
  798. inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1)
  799. {
  800. int dr = r0 - r1, dg = g0 - g1, db = b0 - b1;
  801. return dr * dr + dg * dg + db * db;
  802. }
  803. inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1)
  804. {
  805. int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1;
  806. return dr * dr + dg * dg + db * db + da * da;
  807. }
  808. inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha)
  809. {
  810. if (alpha)
  811. return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a);
  812. else
  813. return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b);
  814. }
  815. // TODO: Allow user to control channel weightings.
  816. inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha)
  817. {
  818. if (perceptual)
  819. {
  820. #if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE
  821. const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f;
  822. const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f;
  823. const float cr1 = e1.r - l1;
  824. const float cr2 = e2.r - l2;
  825. const float cb1 = e1.b - l1;
  826. const float cb2 = e2.b - l2;
  827. const float dl = l1 - l2;
  828. const float dcr = cr1 - cr2;
  829. const float dcb = cb1 - cb2;
  830. uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb);
  831. if (alpha)
  832. {
  833. int da = static_cast<int>(e1.a) - static_cast<int>(e2.a);
  834. d += static_cast<uint32_t>(128.0f*da*da);
  835. }
  836. return d;
  837. #elif 1
  838. int dr = e1.r - e2.r;
  839. int dg = e1.g - e2.g;
  840. int db = e1.b - e2.b;
  841. #if 0
  842. int delta_l = dr * 27 + dg * 92 + db * 9;
  843. int delta_cr = dr * 128 - delta_l;
  844. int delta_cb = db * 128 - delta_l;
  845. uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) +
  846. ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) +
  847. ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U);
  848. #else
  849. int64_t delta_l = dr * 27 + dg * 92 + db * 9;
  850. int64_t delta_cr = dr * 128 - delta_l;
  851. int64_t delta_cb = db * 128 - delta_l;
  852. uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) +
  853. ((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) +
  854. ((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U);
  855. #endif
  856. if (alpha)
  857. {
  858. int da = (e1.a - e2.a) << 7;
  859. // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring.
  860. id += ((uint32_t)(da * da) >> 7U);
  861. }
  862. return id;
  863. #else
  864. int dr = e1.r - e2.r;
  865. int dg = e1.g - e2.g;
  866. int db = e1.b - e2.b;
  867. int64_t delta_l = dr * 27 + dg * 92 + db * 9;
  868. int64_t delta_cr = dr * 128 - delta_l;
  869. int64_t delta_cb = db * 128 - delta_l;
  870. int64_t id = ((delta_l * delta_l) * 128) +
  871. ((delta_cr * delta_cr) * 26) +
  872. ((delta_cb * delta_cb) * 3);
  873. if (alpha)
  874. {
  875. int64_t da = (e1.a - e2.a);
  876. id += (da * da) * 128;
  877. }
  878. int d = (id + 8192) >> 14;
  879. return d;
  880. #endif
  881. }
  882. else
  883. return color_distance(e1, e2, alpha);
  884. }
  885. static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b)
  886. {
  887. const int dl = a.r - b.r;
  888. const int da = a.a - b.a;
  889. return dl * dl + da * da;
  890. }
  891. // String helpers
  892. inline int string_find_right(const std::string& filename, char c)
  893. {
  894. size_t result = filename.find_last_of(c);
  895. return (result == std::string::npos) ? -1 : (int)result;
  896. }
  897. inline std::string string_get_extension(const std::string &filename)
  898. {
  899. int sep = -1;
  900. #ifdef _WIN32
  901. sep = string_find_right(filename, '\\');
  902. #endif
  903. if (sep < 0)
  904. sep = string_find_right(filename, '/');
  905. int dot = string_find_right(filename, '.');
  906. if (dot <= sep)
  907. return "";
  908. std::string result(filename);
  909. result.erase(0, dot + 1);
  910. return result;
  911. }
  912. inline bool string_remove_extension(std::string &filename)
  913. {
  914. int sep = -1;
  915. #ifdef _WIN32
  916. sep = string_find_right(filename, '\\');
  917. #endif
  918. if (sep < 0)
  919. sep = string_find_right(filename, '/');
  920. int dot = string_find_right(filename, '.');
  921. if ((dot < sep) || (dot < 0))
  922. return false;
  923. filename.resize(dot);
  924. return true;
  925. }
  926. inline std::string string_format(const char* pFmt, ...)
  927. {
  928. char buf[2048];
  929. va_list args;
  930. va_start(args, pFmt);
  931. #ifdef _WIN32
  932. vsprintf_s(buf, sizeof(buf), pFmt, args);
  933. #else
  934. vsnprintf(buf, sizeof(buf), pFmt, args);
  935. #endif
  936. va_end(args);
  937. return std::string(buf);
  938. }
  939. inline std::string string_tolower(const std::string& s)
  940. {
  941. std::string result(s);
  942. for (size_t i = 0; i < result.size(); i++)
  943. {
  944. result[i] = (char)tolower((uint8_t)(result[i]));
  945. }
  946. return result;
  947. }
  948. inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc)
  949. {
  950. assert(pDst && pSrc && dst_len);
  951. if (!dst_len)
  952. return pDst;
  953. const size_t src_len = strlen(pSrc);
  954. const size_t src_len_plus_terminator = src_len + 1;
  955. if (src_len_plus_terminator <= dst_len)
  956. memcpy(pDst, pSrc, src_len_plus_terminator);
  957. else
  958. {
  959. if (dst_len > 1)
  960. memcpy(pDst, pSrc, dst_len - 1);
  961. pDst[dst_len - 1] = '\0';
  962. }
  963. return pDst;
  964. }
  965. inline bool string_ends_with(const std::string& s, char c)
  966. {
  967. return (s.size() != 0) && (s.back() == c);
  968. }
  969. inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt)
  970. {
  971. #ifdef _MSC_VER
  972. char drive_buf[_MAX_DRIVE] = { 0 };
  973. char dir_buf[_MAX_DIR] = { 0 };
  974. char fname_buf[_MAX_FNAME] = { 0 };
  975. char ext_buf[_MAX_EXT] = { 0 };
  976. errno_t error = _splitpath_s(p,
  977. pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0,
  978. pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0,
  979. pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0,
  980. pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0);
  981. if (error != 0)
  982. return false;
  983. if (pDrive) *pDrive = drive_buf;
  984. if (pDir) *pDir = dir_buf;
  985. if (pFilename) *pFilename = fname_buf;
  986. if (pExt) *pExt = ext_buf;
  987. return true;
  988. #else
  989. char dirtmp[1024], nametmp[1024];
  990. strcpy_safe(dirtmp, sizeof(dirtmp), p);
  991. strcpy_safe(nametmp, sizeof(nametmp), p);
  992. if (pDrive)
  993. pDrive->resize(0);
  994. const char *pDirName = dirname(dirtmp);
  995. const char* pBaseName = basename(nametmp);
  996. if ((!pDirName) || (!pBaseName))
  997. return false;
  998. if (pDir)
  999. {
  1000. *pDir = pDirName;
  1001. if ((pDir->size()) && (pDir->back() != '/'))
  1002. *pDir += "/";
  1003. }
  1004. if (pFilename)
  1005. {
  1006. *pFilename = pBaseName;
  1007. string_remove_extension(*pFilename);
  1008. }
  1009. if (pExt)
  1010. {
  1011. *pExt = pBaseName;
  1012. *pExt = string_get_extension(*pExt);
  1013. if (pExt->size())
  1014. *pExt = "." + *pExt;
  1015. }
  1016. return true;
  1017. #endif
  1018. }
  1019. inline bool is_path_separator(char c)
  1020. {
  1021. #ifdef _WIN32
  1022. return (c == '/') || (c == '\\');
  1023. #else
  1024. return (c == '/');
  1025. #endif
  1026. }
  1027. inline bool is_drive_separator(char c)
  1028. {
  1029. #ifdef _WIN32
  1030. return (c == ':');
  1031. #else
  1032. (void)c;
  1033. return false;
  1034. #endif
  1035. }
  1036. inline void string_combine_path(std::string &dst, const char *p, const char *q)
  1037. {
  1038. std::string temp(p);
  1039. if (temp.size() && !is_path_separator(q[0]))
  1040. {
  1041. if (!is_path_separator(temp.back()))
  1042. temp.append(1, BASISU_PATH_SEPERATOR_CHAR);
  1043. }
  1044. temp += q;
  1045. dst.swap(temp);
  1046. }
  1047. inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r)
  1048. {
  1049. string_combine_path(dst, p, q);
  1050. string_combine_path(dst, dst.c_str(), r);
  1051. }
  1052. inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt)
  1053. {
  1054. string_combine_path(dst, p, q, r);
  1055. if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.'))
  1056. dst.append(1, '.');
  1057. dst.append(pExt);
  1058. }
  1059. inline bool string_get_pathname(const char *p, std::string &path)
  1060. {
  1061. std::string temp_drive, temp_path;
  1062. if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL))
  1063. return false;
  1064. string_combine_path(path, temp_drive.c_str(), temp_path.c_str());
  1065. return true;
  1066. }
  1067. inline bool string_get_filename(const char *p, std::string &filename)
  1068. {
  1069. std::string temp_ext;
  1070. if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext))
  1071. return false;
  1072. filename += temp_ext;
  1073. return true;
  1074. }
  1075. class rand
  1076. {
  1077. std::mt19937 m_mt;
  1078. public:
  1079. rand() { }
  1080. rand(uint32_t s) { seed(s); }
  1081. void seed(uint32_t s) { m_mt.seed(s); }
  1082. // between [l,h]
  1083. int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); }
  1084. uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); }
  1085. bool bit() { return irand(0, 1) == 1; }
  1086. uint8_t byte() { return static_cast<uint8_t>(urand32()); }
  1087. // between [l,h)
  1088. float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); }
  1089. float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); }
  1090. };
  1091. class priority_queue
  1092. {
  1093. public:
  1094. priority_queue() :
  1095. m_size(0)
  1096. {
  1097. }
  1098. void clear()
  1099. {
  1100. m_heap.clear();
  1101. m_size = 0;
  1102. }
  1103. void init(uint32_t max_entries, uint32_t first_index, float first_priority)
  1104. {
  1105. m_heap.resize(max_entries + 1);
  1106. m_heap[1].m_index = first_index;
  1107. m_heap[1].m_priority = first_priority;
  1108. m_size = 1;
  1109. }
  1110. inline uint32_t size() const { return m_size; }
  1111. inline uint32_t get_top_index() const { return m_heap[1].m_index; }
  1112. inline float get_top_priority() const { return m_heap[1].m_priority; }
  1113. inline void delete_top()
  1114. {
  1115. assert(m_size > 0);
  1116. m_heap[1] = m_heap[m_size];
  1117. m_size--;
  1118. if (m_size)
  1119. down_heap(1);
  1120. }
  1121. inline void add_heap(uint32_t index, float priority)
  1122. {
  1123. m_size++;
  1124. uint32_t k = m_size;
  1125. if (m_size >= m_heap.size())
  1126. m_heap.resize(m_size + 1);
  1127. for (;;)
  1128. {
  1129. uint32_t parent_index = k >> 1;
  1130. if ((!parent_index) || (m_heap[parent_index].m_priority > priority))
  1131. break;
  1132. m_heap[k] = m_heap[parent_index];
  1133. k = parent_index;
  1134. }
  1135. m_heap[k].m_index = index;
  1136. m_heap[k].m_priority = priority;
  1137. }
  1138. private:
  1139. struct entry
  1140. {
  1141. uint32_t m_index;
  1142. float m_priority;
  1143. };
  1144. basisu::vector<entry> m_heap;
  1145. uint32_t m_size;
  1146. // Push down entry at index
  1147. inline void down_heap(uint32_t heap_index)
  1148. {
  1149. uint32_t orig_index = m_heap[heap_index].m_index;
  1150. const float orig_priority = m_heap[heap_index].m_priority;
  1151. uint32_t child_index;
  1152. while ((child_index = (heap_index << 1)) <= m_size)
  1153. {
  1154. if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index;
  1155. if (orig_priority > m_heap[child_index].m_priority)
  1156. break;
  1157. m_heap[heap_index] = m_heap[child_index];
  1158. heap_index = child_index;
  1159. }
  1160. m_heap[heap_index].m_index = orig_index;
  1161. m_heap[heap_index].m_priority = orig_priority;
  1162. }
  1163. };
  1164. // Tree structured vector quantization (TSVQ)
  1165. template <typename TrainingVectorType>
  1166. class tree_vector_quant
  1167. {
  1168. public:
  1169. typedef TrainingVectorType training_vec_type;
  1170. typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight;
  1171. typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs;
  1172. tree_vector_quant() :
  1173. m_next_codebook_index(0)
  1174. {
  1175. }
  1176. void clear()
  1177. {
  1178. clear_vector(m_training_vecs);
  1179. clear_vector(m_nodes);
  1180. m_next_codebook_index = 0;
  1181. }
  1182. void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); }
  1183. size_t get_total_training_vecs() const { return m_training_vecs.size(); }
  1184. const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; }
  1185. array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; }
  1186. void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const
  1187. {
  1188. for (uint32_t i = 0; i < m_nodes.size(); i++)
  1189. {
  1190. const tsvq_node &n = m_nodes[i];
  1191. if (!n.is_leaf())
  1192. continue;
  1193. codebook.resize(codebook.size() + 1);
  1194. codebook.back() = n.m_training_vecs;
  1195. }
  1196. }
  1197. void retrieve(basisu::vector<TrainingVectorType> &codebook) const
  1198. {
  1199. for (uint32_t i = 0; i < m_nodes.size(); i++)
  1200. {
  1201. const tsvq_node &n = m_nodes[i];
  1202. if (!n.is_leaf())
  1203. continue;
  1204. codebook.resize(codebook.size() + 1);
  1205. codebook.back() = n.m_origin;
  1206. }
  1207. }
  1208. void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const
  1209. {
  1210. uint_vec node_stack;
  1211. node_stack.reserve(512);
  1212. codebook.resize(0);
  1213. codebook.reserve(max_clusters);
  1214. uint32_t node_index = 0;
  1215. while (true)
  1216. {
  1217. const tsvq_node& cur = m_nodes[node_index];
  1218. if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
  1219. {
  1220. codebook.resize(codebook.size() + 1);
  1221. codebook.back() = cur.m_training_vecs;
  1222. if (node_stack.empty())
  1223. break;
  1224. node_index = node_stack.back();
  1225. node_stack.pop_back();
  1226. continue;
  1227. }
  1228. node_stack.push_back(cur.m_right_index);
  1229. node_index = cur.m_left_index;
  1230. }
  1231. }
  1232. bool generate(uint32_t max_size)
  1233. {
  1234. if (!m_training_vecs.size())
  1235. return false;
  1236. m_next_codebook_index = 0;
  1237. clear_vector(m_nodes);
  1238. m_nodes.reserve(max_size * 2 + 1);
  1239. m_nodes.push_back(prepare_root());
  1240. priority_queue var_heap;
  1241. var_heap.init(max_size, 0, m_nodes[0].m_var);
  1242. basisu::vector<uint32_t> l_children, r_children;
  1243. // Now split the worst nodes
  1244. l_children.reserve(m_training_vecs.size() + 1);
  1245. r_children.reserve(m_training_vecs.size() + 1);
  1246. uint32_t total_leaf_nodes = 1;
  1247. //interval_timer tm;
  1248. //tm.start();
  1249. while ((var_heap.size()) && (total_leaf_nodes < max_size))
  1250. {
  1251. const uint32_t node_index = var_heap.get_top_index();
  1252. const tsvq_node &node = m_nodes[node_index];
  1253. assert(node.m_var == var_heap.get_top_priority());
  1254. assert(node.is_leaf());
  1255. var_heap.delete_top();
  1256. if (node.m_training_vecs.size() > 1)
  1257. {
  1258. if (split_node(node_index, var_heap, l_children, r_children))
  1259. {
  1260. // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total.
  1261. total_leaf_nodes += 1;
  1262. }
  1263. }
  1264. }
  1265. //debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs());
  1266. return true;
  1267. }
  1268. private:
  1269. class tsvq_node
  1270. {
  1271. public:
  1272. inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { }
  1273. // vecs is erased
  1274. inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); }
  1275. inline bool is_leaf() const { return m_left_index < 0; }
  1276. float m_var;
  1277. uint64_t m_weight;
  1278. TrainingVectorType m_origin;
  1279. int32_t m_left_index, m_right_index;
  1280. basisu::vector<uint32_t> m_training_vecs;
  1281. int m_codebook_index;
  1282. };
  1283. typedef basisu::vector<tsvq_node> tsvq_node_vec;
  1284. tsvq_node_vec m_nodes;
  1285. array_of_weighted_training_vecs m_training_vecs;
  1286. uint32_t m_next_codebook_index;
  1287. tsvq_node prepare_root() const
  1288. {
  1289. double ttsum = 0.0f;
  1290. // Prepare root node containing all training vectors
  1291. tsvq_node root;
  1292. root.m_training_vecs.reserve(m_training_vecs.size());
  1293. for (uint32_t i = 0; i < m_training_vecs.size(); i++)
  1294. {
  1295. const TrainingVectorType &v = m_training_vecs[i].first;
  1296. const uint64_t weight = m_training_vecs[i].second;
  1297. root.m_training_vecs.push_back(i);
  1298. root.m_origin += (v * static_cast<float>(weight));
  1299. root.m_weight += weight;
  1300. ttsum += v.dot(v) * weight;
  1301. }
  1302. root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight));
  1303. root.m_origin *= (1.0f / root.m_weight);
  1304. return root;
  1305. }
  1306. bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children)
  1307. {
  1308. TrainingVectorType l_child_org, r_child_org;
  1309. uint64_t l_weight = 0, r_weight = 0;
  1310. float l_var = 0.0f, r_var = 0.0f;
  1311. // Compute initial left/right child origins
  1312. if (!prep_split(m_nodes[node_index], l_child_org, r_child_org))
  1313. return false;
  1314. // Use k-means iterations to refine these children vectors
  1315. if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children))
  1316. return false;
  1317. // Create children
  1318. const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1;
  1319. m_nodes[node_index].m_left_index = l_child_index;
  1320. m_nodes[node_index].m_right_index = r_child_index;
  1321. m_nodes[node_index].m_codebook_index = m_next_codebook_index;
  1322. m_next_codebook_index++;
  1323. m_nodes.resize(m_nodes.size() + 2);
  1324. tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index];
  1325. l_child.set(l_child_org, l_weight, l_var, l_children);
  1326. r_child.set(r_child_org, r_weight, r_var, r_children);
  1327. if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1))
  1328. {
  1329. TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first);
  1330. for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++)
  1331. {
  1332. if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first))
  1333. {
  1334. l_child.m_var = 1e-4f;
  1335. break;
  1336. }
  1337. }
  1338. }
  1339. if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1))
  1340. {
  1341. TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first);
  1342. for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++)
  1343. {
  1344. if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first))
  1345. {
  1346. r_child.m_var = 1e-4f;
  1347. break;
  1348. }
  1349. }
  1350. }
  1351. if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1))
  1352. var_heap.add_heap(l_child_index, l_child.m_var);
  1353. if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1))
  1354. var_heap.add_heap(r_child_index, r_child.m_var);
  1355. return true;
  1356. }
  1357. TrainingVectorType compute_split_axis(const tsvq_node &node) const
  1358. {
  1359. const uint32_t N = TrainingVectorType::num_elements;
  1360. matrix<N, N, float> cmatrix;
  1361. if ((N != 16) || (!g_cpu_supports_sse41))
  1362. {
  1363. cmatrix.set_zero();
  1364. // Compute covariance matrix from weighted input vectors
  1365. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1366. {
  1367. const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin);
  1368. const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v);
  1369. for (uint32_t x = 0; x < N; x++)
  1370. for (uint32_t y = x; y < N; y++)
  1371. cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y];
  1372. }
  1373. }
  1374. else
  1375. {
  1376. #if BASISU_SUPPORT_SSE
  1377. // Specialize the case with 16x16 matrices, which are quite expensive without SIMD.
  1378. // This SSE function takes pointers to void types, so do some sanity checks.
  1379. assert(sizeof(TrainingVectorType) == sizeof(float) * 16);
  1380. assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>));
  1381. update_covar_matrix_16x16_sse41(node.m_training_vecs.size(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix);
  1382. #endif
  1383. }
  1384. const float renorm_scale = 1.0f / node.m_weight;
  1385. for (uint32_t x = 0; x < N; x++)
  1386. for (uint32_t y = x; y < N; y++)
  1387. cmatrix[x][y] *= renorm_scale;
  1388. // Diagonal flip
  1389. for (uint32_t x = 0; x < (N - 1); x++)
  1390. for (uint32_t y = x + 1; y < N; y++)
  1391. cmatrix[y][x] = cmatrix[x][y];
  1392. return compute_pca_from_covar<N, TrainingVectorType>(cmatrix);
  1393. }
  1394. bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const
  1395. {
  1396. //const uint32_t N = TrainingVectorType::num_elements;
  1397. if (2 == node.m_training_vecs.size())
  1398. {
  1399. l_child_result = m_training_vecs[node.m_training_vecs[0]].first;
  1400. r_child_result = m_training_vecs[node.m_training_vecs[1]].first;
  1401. return true;
  1402. }
  1403. TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f);
  1404. double l_weight = 0.0f, r_weight = 0.0f;
  1405. // Compute initial left/right children
  1406. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1407. {
  1408. const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
  1409. const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
  1410. double t = (v - node.m_origin).dot(axis);
  1411. if (t >= 0.0f)
  1412. {
  1413. r_child += v * weight;
  1414. r_weight += weight;
  1415. }
  1416. else
  1417. {
  1418. l_child += v * weight;
  1419. l_weight += weight;
  1420. }
  1421. }
  1422. if ((l_weight > 0.0f) && (r_weight > 0.0f))
  1423. {
  1424. l_child_result = l_child * static_cast<float>(1.0f / l_weight);
  1425. r_child_result = r_child * static_cast<float>(1.0f / r_weight);
  1426. }
  1427. else
  1428. {
  1429. TrainingVectorType l(1e+20f);
  1430. TrainingVectorType h(-1e+20f);
  1431. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1432. {
  1433. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1434. l = TrainingVectorType::component_min(l, v);
  1435. h = TrainingVectorType::component_max(h, v);
  1436. }
  1437. TrainingVectorType r(h - l);
  1438. float largest_axis_v = 0.0f;
  1439. int largest_axis_index = -1;
  1440. for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++)
  1441. {
  1442. if (r[i] > largest_axis_v)
  1443. {
  1444. largest_axis_v = r[i];
  1445. largest_axis_index = i;
  1446. }
  1447. }
  1448. if (largest_axis_index < 0)
  1449. return false;
  1450. basisu::vector<float> keys(node.m_training_vecs.size());
  1451. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1452. keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index];
  1453. uint_vec indices(node.m_training_vecs.size());
  1454. indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]);
  1455. l_child.set_zero();
  1456. l_weight = 0;
  1457. r_child.set_zero();
  1458. r_weight = 0;
  1459. const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2;
  1460. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1461. {
  1462. const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
  1463. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1464. if (i < half_index)
  1465. {
  1466. l_child += v * weight;
  1467. l_weight += weight;
  1468. }
  1469. else
  1470. {
  1471. r_child += v * weight;
  1472. r_weight += weight;
  1473. }
  1474. }
  1475. if ((l_weight > 0.0f) && (r_weight > 0.0f))
  1476. {
  1477. l_child_result = l_child * static_cast<float>(1.0f / l_weight);
  1478. r_child_result = r_child * static_cast<float>(1.0f / r_weight);
  1479. }
  1480. else
  1481. {
  1482. l_child_result = l;
  1483. r_child_result = h;
  1484. }
  1485. }
  1486. return true;
  1487. }
  1488. bool refine_split(const tsvq_node &node,
  1489. TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children,
  1490. TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const
  1491. {
  1492. l_children.reserve(node.m_training_vecs.size());
  1493. r_children.reserve(node.m_training_vecs.size());
  1494. float prev_total_variance = 1e+10f;
  1495. // Refine left/right children locations using k-means iterations
  1496. const uint32_t cMaxIters = 6;
  1497. for (uint32_t iter = 0; iter < cMaxIters; iter++)
  1498. {
  1499. l_children.resize(0);
  1500. r_children.resize(0);
  1501. TrainingVectorType new_l_child(cZero), new_r_child(cZero);
  1502. double l_ttsum = 0.0f, r_ttsum = 0.0f;
  1503. l_weight = 0;
  1504. r_weight = 0;
  1505. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1506. {
  1507. const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
  1508. const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
  1509. double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v);
  1510. if (left_dist2 >= right_dist2)
  1511. {
  1512. new_r_child += (v * static_cast<float>(weight));
  1513. r_weight += weight;
  1514. r_ttsum += weight * v.dot(v);
  1515. r_children.push_back(node.m_training_vecs[i]);
  1516. }
  1517. else
  1518. {
  1519. new_l_child += (v * static_cast<float>(weight));
  1520. l_weight += weight;
  1521. l_ttsum += weight * v.dot(v);
  1522. l_children.push_back(node.m_training_vecs[i]);
  1523. }
  1524. }
  1525. // Node is unsplittable using the above algorithm - try something else to split it up.
  1526. if ((!l_weight) || (!r_weight))
  1527. {
  1528. l_children.resize(0);
  1529. new_l_child.set(0.0f);
  1530. l_ttsum = 0.0f;
  1531. l_weight = 0;
  1532. r_children.resize(0);
  1533. new_r_child.set(0.0f);
  1534. r_ttsum = 0.0f;
  1535. r_weight = 0;
  1536. TrainingVectorType firstVec;
  1537. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1538. {
  1539. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1540. const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
  1541. if ((!i) || (v == firstVec))
  1542. {
  1543. firstVec = v;
  1544. new_r_child += (v * static_cast<float>(weight));
  1545. r_weight += weight;
  1546. r_ttsum += weight * v.dot(v);
  1547. r_children.push_back(node.m_training_vecs[i]);
  1548. }
  1549. else
  1550. {
  1551. new_l_child += (v * static_cast<float>(weight));
  1552. l_weight += weight;
  1553. l_ttsum += weight * v.dot(v);
  1554. l_children.push_back(node.m_training_vecs[i]);
  1555. }
  1556. }
  1557. if ((!l_weight) || (!r_weight))
  1558. return false;
  1559. }
  1560. l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight));
  1561. r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight));
  1562. new_l_child *= (1.0f / l_weight);
  1563. new_r_child *= (1.0f / r_weight);
  1564. l_child = new_l_child;
  1565. r_child = new_r_child;
  1566. float total_var = l_var + r_var;
  1567. const float cGiveupVariance = .00001f;
  1568. if (total_var < cGiveupVariance)
  1569. break;
  1570. // Check to see if the variance has settled
  1571. const float cVarianceDeltaThresh = .00125f;
  1572. if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh)
  1573. break;
  1574. prev_total_variance = total_var;
  1575. }
  1576. return true;
  1577. }
  1578. };
  1579. struct weighted_block_group
  1580. {
  1581. uint64_t m_total_weight;
  1582. uint_vec m_indices;
  1583. };
  1584. template<typename Quantizer>
  1585. bool generate_hierarchical_codebook_threaded_internal(Quantizer& q,
  1586. uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
  1587. basisu::vector<uint_vec>& codebook,
  1588. basisu::vector<uint_vec>& parent_codebook,
  1589. uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool)
  1590. {
  1591. codebook.resize(0);
  1592. parent_codebook.resize(0);
  1593. if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16))
  1594. {
  1595. if (!q.generate(max_codebook_size))
  1596. return false;
  1597. q.retrieve(codebook);
  1598. if (max_parent_codebook_size)
  1599. q.retrieve(max_parent_codebook_size, parent_codebook);
  1600. return true;
  1601. }
  1602. const uint32_t cMaxThreads = 16;
  1603. if (max_threads > cMaxThreads)
  1604. max_threads = cMaxThreads;
  1605. if (!q.generate(max_threads))
  1606. return false;
  1607. basisu::vector<uint_vec> initial_codebook;
  1608. q.retrieve(initial_codebook);
  1609. if (initial_codebook.size() < max_threads)
  1610. {
  1611. codebook = initial_codebook;
  1612. if (max_parent_codebook_size)
  1613. q.retrieve(max_parent_codebook_size, parent_codebook);
  1614. return true;
  1615. }
  1616. Quantizer quantizers[cMaxThreads];
  1617. bool success_flags[cMaxThreads];
  1618. clear_obj(success_flags);
  1619. basisu::vector<uint_vec> local_clusters[cMaxThreads];
  1620. basisu::vector<uint_vec> local_parent_clusters[cMaxThreads];
  1621. for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
  1622. {
  1623. #ifndef __EMSCRIPTEN__
  1624. pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] {
  1625. #endif
  1626. Quantizer& lq = quantizers[thread_iter];
  1627. uint_vec& cluster_indices = initial_codebook[thread_iter];
  1628. uint_vec local_to_global(cluster_indices.size());
  1629. for (uint32_t i = 0; i < cluster_indices.size(); i++)
  1630. {
  1631. const uint32_t global_training_vec_index = cluster_indices[i];
  1632. local_to_global[i] = global_training_vec_index;
  1633. lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second);
  1634. }
  1635. const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs();
  1636. success_flags[thread_iter] = lq.generate(max_clusters);
  1637. if (success_flags[thread_iter])
  1638. {
  1639. lq.retrieve(local_clusters[thread_iter]);
  1640. for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++)
  1641. {
  1642. for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++)
  1643. local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]];
  1644. }
  1645. if (max_parent_codebook_size)
  1646. {
  1647. lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]);
  1648. for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++)
  1649. {
  1650. for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++)
  1651. local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]];
  1652. }
  1653. }
  1654. }
  1655. #ifndef __EMSCRIPTEN__
  1656. } );
  1657. #endif
  1658. } // thread_iter
  1659. #ifndef __EMSCRIPTEN__
  1660. pJob_pool->wait_for_all();
  1661. #endif
  1662. uint32_t total_clusters = 0, total_parent_clusters = 0;
  1663. for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++)
  1664. {
  1665. if (!success_flags[thread_iter])
  1666. return false;
  1667. total_clusters += (uint32_t)local_clusters[thread_iter].size();
  1668. total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size();
  1669. }
  1670. codebook.reserve(total_clusters);
  1671. parent_codebook.reserve(total_parent_clusters);
  1672. for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
  1673. {
  1674. for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++)
  1675. {
  1676. codebook.resize(codebook.size() + 1);
  1677. codebook.back().swap(local_clusters[thread_iter][j]);
  1678. }
  1679. for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++)
  1680. {
  1681. parent_codebook.resize(parent_codebook.size() + 1);
  1682. parent_codebook.back().swap(local_parent_clusters[thread_iter][j]);
  1683. }
  1684. }
  1685. return true;
  1686. }
  1687. template<typename Quantizer>
  1688. bool generate_hierarchical_codebook_threaded(Quantizer& q,
  1689. uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
  1690. basisu::vector<uint_vec>& codebook,
  1691. basisu::vector<uint_vec>& parent_codebook,
  1692. uint32_t max_threads, job_pool *pJob_pool,
  1693. bool even_odd_input_pairs_equal)
  1694. {
  1695. typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher;
  1696. typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group,
  1697. training_vec_bit_hasher> group_hash;
  1698. //interval_timer tm;
  1699. //tm.start();
  1700. group_hash unique_vecs;
  1701. unique_vecs.reserve(20000);
  1702. weighted_block_group g;
  1703. if (even_odd_input_pairs_equal)
  1704. {
  1705. g.m_indices.resize(2);
  1706. assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0);
  1707. for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2)
  1708. {
  1709. assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first);
  1710. g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second;
  1711. g.m_indices[0] = i;
  1712. g.m_indices[1] = i + 1;
  1713. auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
  1714. if (!ins_res.second)
  1715. {
  1716. (ins_res.first)->second.m_total_weight += g.m_total_weight;
  1717. (ins_res.first)->second.m_indices.push_back(i);
  1718. (ins_res.first)->second.m_indices.push_back(i + 1);
  1719. }
  1720. }
  1721. }
  1722. else
  1723. {
  1724. g.m_indices.resize(1);
  1725. for (uint32_t i = 0; i < q.get_training_vecs().size(); i++)
  1726. {
  1727. g.m_total_weight = q.get_training_vecs()[i].second;
  1728. g.m_indices[0] = i;
  1729. auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
  1730. if (!ins_res.second)
  1731. {
  1732. (ins_res.first)->second.m_total_weight += g.m_total_weight;
  1733. (ins_res.first)->second.m_indices.push_back(i);
  1734. }
  1735. }
  1736. }
  1737. //debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs());
  1738. debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size());
  1739. Quantizer group_quant;
  1740. typedef typename group_hash::const_iterator group_hash_const_iter;
  1741. basisu::vector<group_hash_const_iter> unique_vec_iters;
  1742. unique_vec_iters.reserve(unique_vecs.size());
  1743. for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter)
  1744. {
  1745. group_quant.add_training_vec(iter->first, iter->second.m_total_weight);
  1746. unique_vec_iters.push_back(iter);
  1747. }
  1748. bool limit_clusterizers = true;
  1749. if (unique_vecs.size() <= max_codebook_size)
  1750. limit_clusterizers = false;
  1751. debug_printf("Limit clusterizers: %u\n", limit_clusterizers);
  1752. basisu::vector<uint_vec> group_codebook, group_parent_codebook;
  1753. bool status = generate_hierarchical_codebook_threaded_internal(group_quant,
  1754. max_codebook_size, max_parent_codebook_size,
  1755. group_codebook,
  1756. group_parent_codebook,
  1757. (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool);
  1758. if (!status)
  1759. return false;
  1760. codebook.resize(0);
  1761. for (uint32_t i = 0; i < group_codebook.size(); i++)
  1762. {
  1763. codebook.resize(codebook.size() + 1);
  1764. for (uint32_t j = 0; j < group_codebook[i].size(); j++)
  1765. {
  1766. const uint32_t group_index = group_codebook[i][j];
  1767. typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
  1768. const uint_vec& training_vec_indices = group_iter->second.m_indices;
  1769. append_vector(codebook.back(), training_vec_indices);
  1770. }
  1771. }
  1772. parent_codebook.resize(0);
  1773. for (uint32_t i = 0; i < group_parent_codebook.size(); i++)
  1774. {
  1775. parent_codebook.resize(parent_codebook.size() + 1);
  1776. for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++)
  1777. {
  1778. const uint32_t group_index = group_parent_codebook[i][j];
  1779. typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
  1780. const uint_vec& training_vec_indices = group_iter->second.m_indices;
  1781. append_vector(parent_codebook.back(), training_vec_indices);
  1782. }
  1783. }
  1784. return true;
  1785. }
  1786. // Canonical Huffman coding
  1787. class histogram
  1788. {
  1789. basisu::vector<uint32_t> m_hist;
  1790. public:
  1791. histogram(uint32_t size = 0) { init(size); }
  1792. void clear()
  1793. {
  1794. clear_vector(m_hist);
  1795. }
  1796. void init(uint32_t size)
  1797. {
  1798. m_hist.resize(0);
  1799. m_hist.resize(size);
  1800. }
  1801. inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); }
  1802. inline const uint32_t &operator[] (uint32_t index) const
  1803. {
  1804. return m_hist[index];
  1805. }
  1806. inline uint32_t &operator[] (uint32_t index)
  1807. {
  1808. return m_hist[index];
  1809. }
  1810. inline void inc(uint32_t index)
  1811. {
  1812. m_hist[index]++;
  1813. }
  1814. uint64_t get_total() const
  1815. {
  1816. uint64_t total = 0;
  1817. for (uint32_t i = 0; i < m_hist.size(); ++i)
  1818. total += m_hist[i];
  1819. return total;
  1820. }
  1821. double get_entropy() const
  1822. {
  1823. double total = static_cast<double>(get_total());
  1824. if (total == 0.0f)
  1825. return 0.0f;
  1826. const double inv_total = 1.0f / total;
  1827. const double neg_inv_log2 = -1.0f / log(2.0f);
  1828. double e = 0.0f;
  1829. for (uint32_t i = 0; i < m_hist.size(); i++)
  1830. if (m_hist[i])
  1831. e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]);
  1832. return e;
  1833. }
  1834. };
  1835. struct sym_freq
  1836. {
  1837. uint32_t m_key;
  1838. uint16_t m_sym_index;
  1839. };
  1840. sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);
  1841. void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms);
  1842. void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size);
  1843. class huffman_encoding_table
  1844. {
  1845. public:
  1846. huffman_encoding_table()
  1847. {
  1848. }
  1849. void clear()
  1850. {
  1851. clear_vector(m_codes);
  1852. clear_vector(m_code_sizes);
  1853. }
  1854. bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize)
  1855. {
  1856. return init(h.size(), &h[0], max_code_size);
  1857. }
  1858. bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size);
  1859. bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size);
  1860. inline const uint16_vec &get_codes() const { return m_codes; }
  1861. inline const uint8_vec &get_code_sizes() const { return m_code_sizes; }
  1862. uint32_t get_total_used_codes() const
  1863. {
  1864. for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--)
  1865. if (m_code_sizes[i])
  1866. return i + 1;
  1867. return 0;
  1868. }
  1869. private:
  1870. uint16_vec m_codes;
  1871. uint8_vec m_code_sizes;
  1872. };
  1873. class bitwise_coder
  1874. {
  1875. public:
  1876. bitwise_coder() :
  1877. m_bit_buffer(0),
  1878. m_bit_buffer_size(0),
  1879. m_total_bits(0)
  1880. {
  1881. }
  1882. inline void clear()
  1883. {
  1884. clear_vector(m_bytes);
  1885. m_bit_buffer = 0;
  1886. m_bit_buffer_size = 0;
  1887. m_total_bits = 0;
  1888. }
  1889. inline void restart()
  1890. {
  1891. m_bytes.resize(0);
  1892. m_bit_buffer = 0;
  1893. m_bit_buffer_size = 0;
  1894. m_total_bits = 0;
  1895. }
  1896. inline const uint8_vec &get_bytes() const { return m_bytes; }
  1897. inline uint64_t get_total_bits() const { return m_total_bits; }
  1898. inline void clear_total_bits() { m_total_bits = 0; }
  1899. inline void init(uint32_t reserve_size = 1024)
  1900. {
  1901. m_bytes.reserve(reserve_size);
  1902. m_bytes.resize(0);
  1903. m_bit_buffer = 0;
  1904. m_bit_buffer_size = 0;
  1905. m_total_bits = 0;
  1906. }
  1907. inline uint32_t flush()
  1908. {
  1909. if (m_bit_buffer_size)
  1910. {
  1911. m_total_bits += 8 - (m_bit_buffer_size & 7);
  1912. append_byte(static_cast<uint8_t>(m_bit_buffer));
  1913. m_bit_buffer = 0;
  1914. m_bit_buffer_size = 0;
  1915. return 8;
  1916. }
  1917. return 0;
  1918. }
  1919. inline uint32_t put_bits(uint32_t bits, uint32_t num_bits)
  1920. {
  1921. assert(num_bits <= 32);
  1922. assert(bits < (1ULL << num_bits));
  1923. if (!num_bits)
  1924. return 0;
  1925. m_total_bits += num_bits;
  1926. uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer;
  1927. m_bit_buffer_size += num_bits;
  1928. while (m_bit_buffer_size >= 8)
  1929. {
  1930. append_byte(static_cast<uint8_t>(v));
  1931. v >>= 8;
  1932. m_bit_buffer_size -= 8;
  1933. }
  1934. m_bit_buffer = static_cast<uint8_t>(v);
  1935. return num_bits;
  1936. }
  1937. inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab)
  1938. {
  1939. uint32_t code = tab.get_codes()[sym];
  1940. uint32_t code_size = tab.get_code_sizes()[sym];
  1941. assert(code_size >= 1);
  1942. put_bits(code, code_size);
  1943. return code_size;
  1944. }
  1945. inline uint32_t put_truncated_binary(uint32_t v, uint32_t n)
  1946. {
  1947. assert((n >= 2) && (v < n));
  1948. uint32_t k = floor_log2i(n);
  1949. uint32_t u = (1 << (k + 1)) - n;
  1950. if (v < u)
  1951. return put_bits(v, k);
  1952. uint32_t x = v + u;
  1953. assert((x >> 1) >= u);
  1954. put_bits(x >> 1, k);
  1955. put_bits(x & 1, 1);
  1956. return k + 1;
  1957. }
  1958. inline uint32_t put_rice(uint32_t v, uint32_t m)
  1959. {
  1960. assert(m);
  1961. const uint64_t start_bits = m_total_bits;
  1962. uint32_t q = v >> m, r = v & ((1 << m) - 1);
  1963. // rice coding sanity check
  1964. assert(q <= 64);
  1965. for (; q > 16; q -= 16)
  1966. put_bits(0xFFFF, 16);
  1967. put_bits((1 << q) - 1, q);
  1968. put_bits(r << 1, m + 1);
  1969. return (uint32_t)(m_total_bits - start_bits);
  1970. }
  1971. inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits)
  1972. {
  1973. assert(chunk_bits);
  1974. const uint32_t chunk_size = 1 << chunk_bits;
  1975. const uint32_t chunk_mask = chunk_size - 1;
  1976. uint32_t total_bits = 0;
  1977. for ( ; ; )
  1978. {
  1979. uint32_t next_v = v >> chunk_bits;
  1980. total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1);
  1981. if (!next_v)
  1982. break;
  1983. v = next_v;
  1984. }
  1985. return total_bits;
  1986. }
  1987. uint32_t emit_huffman_table(const huffman_encoding_table &tab);
  1988. private:
  1989. uint8_vec m_bytes;
  1990. uint32_t m_bit_buffer, m_bit_buffer_size;
  1991. uint64_t m_total_bits;
  1992. void append_byte(uint8_t c)
  1993. {
  1994. m_bytes.resize(m_bytes.size() + 1);
  1995. m_bytes.back() = c;
  1996. }
  1997. static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len);
  1998. static void end_zero_run(uint16_vec &syms, uint32_t &run_size);
  1999. };
  2000. class huff2D
  2001. {
  2002. public:
  2003. huff2D() { }
  2004. huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); }
  2005. inline const histogram &get_histogram() const { return m_histogram; }
  2006. inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; }
  2007. inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group)
  2008. {
  2009. assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1);
  2010. m_bits_per_sym = bits_per_sym;
  2011. m_total_syms_per_group = total_syms_per_group;
  2012. m_cur_sym_bits = 0;
  2013. m_cur_num_syms = 0;
  2014. m_decode_syms_remaining = 0;
  2015. m_next_decoder_group_index = 0;
  2016. m_histogram.init(1 << (bits_per_sym * total_syms_per_group));
  2017. }
  2018. inline void clear()
  2019. {
  2020. m_group_bits.clear();
  2021. m_cur_sym_bits = 0;
  2022. m_cur_num_syms = 0;
  2023. m_decode_syms_remaining = 0;
  2024. m_next_decoder_group_index = 0;
  2025. }
  2026. inline void emit(uint32_t sym)
  2027. {
  2028. m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym));
  2029. m_cur_num_syms++;
  2030. if (m_cur_num_syms == m_total_syms_per_group)
  2031. flush();
  2032. }
  2033. inline void flush()
  2034. {
  2035. if (m_cur_num_syms)
  2036. {
  2037. m_group_bits.push_back(m_cur_sym_bits);
  2038. m_histogram.inc(m_cur_sym_bits);
  2039. m_cur_sym_bits = 0;
  2040. m_cur_num_syms = 0;
  2041. }
  2042. }
  2043. inline bool start_encoding(uint32_t code_size_limit = 16)
  2044. {
  2045. flush();
  2046. if (!m_encoding_table.init(m_histogram, code_size_limit))
  2047. return false;
  2048. m_decode_syms_remaining = 0;
  2049. m_next_decoder_group_index = 0;
  2050. return true;
  2051. }
  2052. inline uint32_t emit_next_sym(bitwise_coder &c)
  2053. {
  2054. uint32_t bits = 0;
  2055. if (!m_decode_syms_remaining)
  2056. {
  2057. bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table);
  2058. m_decode_syms_remaining = m_total_syms_per_group;
  2059. }
  2060. m_decode_syms_remaining--;
  2061. return bits;
  2062. }
  2063. inline void emit_flush()
  2064. {
  2065. m_decode_syms_remaining = 0;
  2066. }
  2067. private:
  2068. uint_vec m_group_bits;
  2069. huffman_encoding_table m_encoding_table;
  2070. histogram m_histogram;
  2071. uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining;
  2072. };
  2073. bool huffman_test(int rand_seed);
  2074. // VQ index reordering
  2075. class palette_index_reorderer
  2076. {
  2077. public:
  2078. palette_index_reorderer()
  2079. {
  2080. }
  2081. void clear()
  2082. {
  2083. clear_vector(m_hist);
  2084. clear_vector(m_total_count_to_picked);
  2085. clear_vector(m_entries_picked);
  2086. clear_vector(m_entries_to_do);
  2087. clear_vector(m_remap_table);
  2088. }
  2089. // returns [0,1] distance of entry i to entry j
  2090. typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx);
  2091. void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  2092. // Table remaps old to new symbol indices
  2093. inline const uint_vec &get_remap_table() const { return m_remap_table; }
  2094. private:
  2095. uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table;
  2096. inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; }
  2097. inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } }
  2098. void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices);
  2099. void find_initial(uint32_t num_syms);
  2100. void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  2101. float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  2102. };
  2103. // Simple 32-bit 2D image class
  2104. class image
  2105. {
  2106. public:
  2107. image() :
  2108. m_width(0), m_height(0), m_pitch(0)
  2109. {
  2110. }
  2111. image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
  2112. m_width(0), m_height(0), m_pitch(0)
  2113. {
  2114. resize(w, h, p);
  2115. }
  2116. image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) :
  2117. m_width(0), m_height(0), m_pitch(0)
  2118. {
  2119. init(pImage, width, height, comps);
  2120. }
  2121. image(const image &other) :
  2122. m_width(0), m_height(0), m_pitch(0)
  2123. {
  2124. *this = other;
  2125. }
  2126. image &swap(image &other)
  2127. {
  2128. std::swap(m_width, other.m_width);
  2129. std::swap(m_height, other.m_height);
  2130. std::swap(m_pitch, other.m_pitch);
  2131. m_pixels.swap(other.m_pixels);
  2132. return *this;
  2133. }
  2134. image &operator= (const image &rhs)
  2135. {
  2136. if (this != &rhs)
  2137. {
  2138. m_width = rhs.m_width;
  2139. m_height = rhs.m_height;
  2140. m_pitch = rhs.m_pitch;
  2141. m_pixels = rhs.m_pixels;
  2142. }
  2143. return *this;
  2144. }
  2145. image &clear()
  2146. {
  2147. m_width = 0;
  2148. m_height = 0;
  2149. m_pitch = 0;
  2150. clear_vector(m_pixels);
  2151. return *this;
  2152. }
  2153. image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color)
  2154. {
  2155. return crop(w, h, p, background);
  2156. }
  2157. image &set_all(const color_rgba &c)
  2158. {
  2159. for (uint32_t i = 0; i < m_pixels.size(); i++)
  2160. m_pixels[i] = c;
  2161. return *this;
  2162. }
  2163. void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps)
  2164. {
  2165. assert(comps >= 1 && comps <= 4);
  2166. resize(width, height);
  2167. for (uint32_t y = 0; y < height; y++)
  2168. {
  2169. for (uint32_t x = 0; x < width; x++)
  2170. {
  2171. const uint8_t *pSrc = &pImage[(x + y * width) * comps];
  2172. color_rgba &dst = (*this)(x, y);
  2173. if (comps == 1)
  2174. {
  2175. dst.r = pSrc[0];
  2176. dst.g = pSrc[0];
  2177. dst.b = pSrc[0];
  2178. dst.a = 255;
  2179. }
  2180. else if (comps == 2)
  2181. {
  2182. dst.r = pSrc[0];
  2183. dst.g = pSrc[0];
  2184. dst.b = pSrc[0];
  2185. dst.a = pSrc[1];
  2186. }
  2187. else
  2188. {
  2189. dst.r = pSrc[0];
  2190. dst.g = pSrc[1];
  2191. dst.b = pSrc[2];
  2192. if (comps == 4)
  2193. dst.a = pSrc[3];
  2194. else
  2195. dst.a = 255;
  2196. }
  2197. }
  2198. }
  2199. }
  2200. image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c)
  2201. {
  2202. for (uint32_t iy = 0; iy < h; iy++)
  2203. for (uint32_t ix = 0; ix < w; ix++)
  2204. set_clipped(x + ix, y + iy, c);
  2205. return *this;
  2206. }
  2207. image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c)
  2208. {
  2209. for (uint32_t iy = 0; iy < h; iy++)
  2210. for (uint32_t ix = 0; ix < w; ix++)
  2211. set_clipped_alpha(x + ix, y + iy, c);
  2212. return *this;
  2213. }
  2214. image &crop_dup_borders(uint32_t w, uint32_t h)
  2215. {
  2216. const uint32_t orig_w = m_width, orig_h = m_height;
  2217. crop(w, h);
  2218. if (orig_w && orig_h)
  2219. {
  2220. if (m_width > orig_w)
  2221. {
  2222. for (uint32_t x = orig_w; x < m_width; x++)
  2223. for (uint32_t y = 0; y < m_height; y++)
  2224. set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
  2225. }
  2226. if (m_height > orig_h)
  2227. {
  2228. for (uint32_t y = orig_h; y < m_height; y++)
  2229. for (uint32_t x = 0; x < m_width; x++)
  2230. set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
  2231. }
  2232. }
  2233. return *this;
  2234. }
  2235. // pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer).
  2236. image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX)
  2237. {
  2238. if (p == UINT32_MAX)
  2239. p = w;
  2240. clear();
  2241. if ((!p) || (!w) || (!h))
  2242. return *this;
  2243. m_pixels.grant_ownership(pPixels, p * h, p * h);
  2244. m_width = w;
  2245. m_height = h;
  2246. m_pitch = p;
  2247. return *this;
  2248. }
  2249. image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true)
  2250. {
  2251. if (p == UINT32_MAX)
  2252. p = w;
  2253. if ((w == m_width) && (m_height == h) && (m_pitch == p))
  2254. return *this;
  2255. if ((!w) || (!h) || (!p))
  2256. {
  2257. clear();
  2258. return *this;
  2259. }
  2260. color_rgba_vec cur_state;
  2261. cur_state.swap(m_pixels);
  2262. m_pixels.resize(p * h);
  2263. if (init_image)
  2264. {
  2265. if (m_width || m_height)
  2266. {
  2267. for (uint32_t y = 0; y < h; y++)
  2268. {
  2269. for (uint32_t x = 0; x < w; x++)
  2270. {
  2271. if ((x < m_width) && (y < m_height))
  2272. m_pixels[x + y * p] = cur_state[x + y * m_pitch];
  2273. else
  2274. m_pixels[x + y * p] = background;
  2275. }
  2276. }
  2277. }
  2278. else
  2279. {
  2280. m_pixels.set_all(background);
  2281. }
  2282. }
  2283. m_width = w;
  2284. m_height = h;
  2285. m_pitch = p;
  2286. return *this;
  2287. }
  2288. inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2289. inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2290. inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2291. inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2292. inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
  2293. {
  2294. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2295. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2296. return m_pixels[x + y * m_pitch];
  2297. }
  2298. inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
  2299. {
  2300. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2301. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2302. return m_pixels[x + y * m_pitch];
  2303. }
  2304. inline image &set_clipped(int x, int y, const color_rgba &c)
  2305. {
  2306. if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
  2307. (*this)(x, y) = c;
  2308. return *this;
  2309. }
  2310. inline image& set_clipped_alpha(int x, int y, const color_rgba& c)
  2311. {
  2312. if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
  2313. (*this)(x, y).m_comps[3] = c.m_comps[3];
  2314. return *this;
  2315. }
  2316. // Very straightforward blit with full clipping. Not fast, but it works.
  2317. image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
  2318. {
  2319. for (int y = 0; y < src_h; y++)
  2320. {
  2321. const int sy = src_y + y;
  2322. if (sy < 0)
  2323. continue;
  2324. else if (sy >= (int)src.get_height())
  2325. break;
  2326. for (int x = 0; x < src_w; x++)
  2327. {
  2328. const int sx = src_x + x;
  2329. if (sx < 0)
  2330. continue;
  2331. else if (sx >= (int)src.get_height())
  2332. break;
  2333. set_clipped(dst_x + x, dst_y + y, src(sx, sy));
  2334. }
  2335. }
  2336. return *this;
  2337. }
  2338. const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
  2339. {
  2340. if (((src_x + w) > m_width) || ((src_y + h) > m_height))
  2341. {
  2342. // Slower clamping case
  2343. for (uint32_t y = 0; y < h; y++)
  2344. for (uint32_t x = 0; x < w; x++)
  2345. *pDst++ = get_clamped(src_x + x, src_y + y);
  2346. }
  2347. else
  2348. {
  2349. const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch];
  2350. for (uint32_t y = 0; y < h; y++)
  2351. {
  2352. memcpy(pDst, pSrc, w * sizeof(color_rgba));
  2353. pSrc += m_pitch;
  2354. pDst += w;
  2355. }
  2356. }
  2357. return *this;
  2358. }
  2359. image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
  2360. {
  2361. for (uint32_t y = 0; y < h; y++)
  2362. for (uint32_t x = 0; x < w; x++)
  2363. set_clipped(dst_x + x, dst_y + y, *pSrc++);
  2364. return *this;
  2365. }
  2366. inline uint32_t get_width() const { return m_width; }
  2367. inline uint32_t get_height() const { return m_height; }
  2368. inline uint32_t get_pitch() const { return m_pitch; }
  2369. inline uint32_t get_total_pixels() const { return m_width * m_height; }
  2370. inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
  2371. inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
  2372. inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
  2373. inline const color_rgba_vec &get_pixels() const { return m_pixels; }
  2374. inline color_rgba_vec &get_pixels() { return m_pixels; }
  2375. inline const color_rgba *get_ptr() const { return &m_pixels[0]; }
  2376. inline color_rgba *get_ptr() { return &m_pixels[0]; }
  2377. bool has_alpha(uint32_t channel = 3) const
  2378. {
  2379. for (uint32_t y = 0; y < m_height; ++y)
  2380. for (uint32_t x = 0; x < m_width; ++x)
  2381. if ((*this)(x, y)[channel] < 255)
  2382. return true;
  2383. return false;
  2384. }
  2385. image &set_alpha(uint8_t a)
  2386. {
  2387. for (uint32_t y = 0; y < m_height; ++y)
  2388. for (uint32_t x = 0; x < m_width; ++x)
  2389. (*this)(x, y).a = a;
  2390. return *this;
  2391. }
  2392. image &flip_y()
  2393. {
  2394. for (uint32_t y = 0; y < m_height / 2; ++y)
  2395. for (uint32_t x = 0; x < m_width; ++x)
  2396. std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
  2397. return *this;
  2398. }
  2399. // TODO: There are many ways to do this, not sure this is the best way.
  2400. image &renormalize_normal_map()
  2401. {
  2402. for (uint32_t y = 0; y < m_height; y++)
  2403. {
  2404. for (uint32_t x = 0; x < m_width; x++)
  2405. {
  2406. color_rgba &c = (*this)(x, y);
  2407. if ((c.r == 128) && (c.g == 128) && (c.b == 128))
  2408. continue;
  2409. vec3F v(c.r, c.g, c.b);
  2410. v = (v * (2.0f / 255.0f)) - vec3F(1.0f);
  2411. v.clamp(-1.0f, 1.0f);
  2412. float length = v.length();
  2413. const float cValidThresh = .077f;
  2414. if (length < cValidThresh)
  2415. {
  2416. c.set(128, 128, 128, c.a);
  2417. }
  2418. else if (fabs(length - 1.0f) > cValidThresh)
  2419. {
  2420. if (length)
  2421. v /= length;
  2422. for (uint32_t i = 0; i < 3; i++)
  2423. c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f));
  2424. if ((c.g == 128) && (c.r == 128))
  2425. {
  2426. if (c.b < 128)
  2427. c.b = 0;
  2428. else
  2429. c.b = 255;
  2430. }
  2431. }
  2432. }
  2433. }
  2434. return *this;
  2435. }
  2436. void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...);
  2437. private:
  2438. uint32_t m_width, m_height, m_pitch; // all in pixels
  2439. color_rgba_vec m_pixels;
  2440. };
  2441. // Float images
  2442. typedef basisu::vector<vec4F> vec4F_vec;
  2443. class imagef
  2444. {
  2445. public:
  2446. imagef() :
  2447. m_width(0), m_height(0), m_pitch(0)
  2448. {
  2449. }
  2450. imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
  2451. m_width(0), m_height(0), m_pitch(0)
  2452. {
  2453. resize(w, h, p);
  2454. }
  2455. imagef(const imagef &other) :
  2456. m_width(0), m_height(0), m_pitch(0)
  2457. {
  2458. *this = other;
  2459. }
  2460. imagef &swap(imagef &other)
  2461. {
  2462. std::swap(m_width, other.m_width);
  2463. std::swap(m_height, other.m_height);
  2464. std::swap(m_pitch, other.m_pitch);
  2465. m_pixels.swap(other.m_pixels);
  2466. return *this;
  2467. }
  2468. imagef &operator= (const imagef &rhs)
  2469. {
  2470. if (this != &rhs)
  2471. {
  2472. m_width = rhs.m_width;
  2473. m_height = rhs.m_height;
  2474. m_pitch = rhs.m_pitch;
  2475. m_pixels = rhs.m_pixels;
  2476. }
  2477. return *this;
  2478. }
  2479. imagef &clear()
  2480. {
  2481. m_width = 0;
  2482. m_height = 0;
  2483. m_pitch = 0;
  2484. clear_vector(m_pixels);
  2485. return *this;
  2486. }
  2487. imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0))
  2488. {
  2489. const uint32_t width = src.get_width();
  2490. const uint32_t height = src.get_height();
  2491. resize(width, height);
  2492. for (int y = 0; y < (int)height; y++)
  2493. {
  2494. for (uint32_t x = 0; x < width; x++)
  2495. {
  2496. const color_rgba &src_pixel = src(x, y);
  2497. (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]);
  2498. }
  2499. }
  2500. return *this;
  2501. }
  2502. imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
  2503. {
  2504. return resize(other.get_width(), other.get_height(), p, background);
  2505. }
  2506. imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
  2507. {
  2508. return crop(w, h, p, background);
  2509. }
  2510. imagef &set_all(const vec4F &c)
  2511. {
  2512. for (uint32_t i = 0; i < m_pixels.size(); i++)
  2513. m_pixels[i] = c;
  2514. return *this;
  2515. }
  2516. imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c)
  2517. {
  2518. for (uint32_t iy = 0; iy < h; iy++)
  2519. for (uint32_t ix = 0; ix < w; ix++)
  2520. set_clipped(x + ix, y + iy, c);
  2521. return *this;
  2522. }
  2523. imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1))
  2524. {
  2525. if (p == UINT32_MAX)
  2526. p = w;
  2527. if ((w == m_width) && (m_height == h) && (m_pitch == p))
  2528. return *this;
  2529. if ((!w) || (!h) || (!p))
  2530. {
  2531. clear();
  2532. return *this;
  2533. }
  2534. vec4F_vec cur_state;
  2535. cur_state.swap(m_pixels);
  2536. m_pixels.resize(p * h);
  2537. for (uint32_t y = 0; y < h; y++)
  2538. {
  2539. for (uint32_t x = 0; x < w; x++)
  2540. {
  2541. if ((x < m_width) && (y < m_height))
  2542. m_pixels[x + y * p] = cur_state[x + y * m_pitch];
  2543. else
  2544. m_pixels[x + y * p] = background;
  2545. }
  2546. }
  2547. m_width = w;
  2548. m_height = h;
  2549. m_pitch = p;
  2550. return *this;
  2551. }
  2552. imagef& crop_dup_borders(uint32_t w, uint32_t h)
  2553. {
  2554. const uint32_t orig_w = m_width, orig_h = m_height;
  2555. crop(w, h);
  2556. if (orig_w && orig_h)
  2557. {
  2558. if (m_width > orig_w)
  2559. {
  2560. for (uint32_t x = orig_w; x < m_width; x++)
  2561. for (uint32_t y = 0; y < m_height; y++)
  2562. set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
  2563. }
  2564. if (m_height > orig_h)
  2565. {
  2566. for (uint32_t y = orig_h; y < m_height; y++)
  2567. for (uint32_t x = 0; x < m_width; x++)
  2568. set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
  2569. }
  2570. }
  2571. return *this;
  2572. }
  2573. inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2574. inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2575. inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2576. inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2577. inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
  2578. {
  2579. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2580. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2581. return m_pixels[x + y * m_pitch];
  2582. }
  2583. inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
  2584. {
  2585. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2586. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2587. return m_pixels[x + y * m_pitch];
  2588. }
  2589. inline imagef &set_clipped(int x, int y, const vec4F &c)
  2590. {
  2591. if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
  2592. (*this)(x, y) = c;
  2593. return *this;
  2594. }
  2595. // Very straightforward blit with full clipping. Not fast, but it works.
  2596. imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
  2597. {
  2598. for (int y = 0; y < src_h; y++)
  2599. {
  2600. const int sy = src_y + y;
  2601. if (sy < 0)
  2602. continue;
  2603. else if (sy >= (int)src.get_height())
  2604. break;
  2605. for (int x = 0; x < src_w; x++)
  2606. {
  2607. const int sx = src_x + x;
  2608. if (sx < 0)
  2609. continue;
  2610. else if (sx >= (int)src.get_height())
  2611. break;
  2612. set_clipped(dst_x + x, dst_y + y, src(sx, sy));
  2613. }
  2614. }
  2615. return *this;
  2616. }
  2617. const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
  2618. {
  2619. for (uint32_t y = 0; y < h; y++)
  2620. for (uint32_t x = 0; x < w; x++)
  2621. *pDst++ = get_clamped(src_x + x, src_y + y);
  2622. return *this;
  2623. }
  2624. imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
  2625. {
  2626. for (uint32_t y = 0; y < h; y++)
  2627. for (uint32_t x = 0; x < w; x++)
  2628. set_clipped(dst_x + x, dst_y + y, *pSrc++);
  2629. return *this;
  2630. }
  2631. inline uint32_t get_width() const { return m_width; }
  2632. inline uint32_t get_height() const { return m_height; }
  2633. inline uint32_t get_pitch() const { return m_pitch; }
  2634. inline uint32_t get_total_pixels() const { return m_width * m_height; }
  2635. inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
  2636. inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
  2637. inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
  2638. inline const vec4F_vec &get_pixels() const { return m_pixels; }
  2639. inline vec4F_vec &get_pixels() { return m_pixels; }
  2640. inline const vec4F *get_ptr() const { return &m_pixels[0]; }
  2641. inline vec4F *get_ptr() { return &m_pixels[0]; }
  2642. bool clean_astc_hdr_pixels(float highest_mag)
  2643. {
  2644. bool status = true;
  2645. bool nan_msg = false;
  2646. bool inf_msg = false;
  2647. bool neg_zero_msg = false;
  2648. bool neg_msg = false;
  2649. bool clamp_msg = false;
  2650. for (uint32_t iy = 0; iy < m_height; iy++)
  2651. {
  2652. for (uint32_t ix = 0; ix < m_width; ix++)
  2653. {
  2654. vec4F& c = (*this)(ix, iy);
  2655. for (uint32_t s = 0; s < 4; s++)
  2656. {
  2657. float &p = c[s];
  2658. union { float f; uint32_t u; } x; x.f = p;
  2659. if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000))
  2660. {
  2661. if (std::isnan(p))
  2662. {
  2663. if (!nan_msg)
  2664. {
  2665. fprintf(stderr, "One or more pixels was NaN, setting to 0.\n");
  2666. nan_msg = true;
  2667. }
  2668. }
  2669. if (std::isinf(p))
  2670. {
  2671. if (!inf_msg)
  2672. {
  2673. fprintf(stderr, "One or more pixels was INF, setting to 0.\n");
  2674. inf_msg = true;
  2675. }
  2676. }
  2677. if (x.u == 0x80000000)
  2678. {
  2679. if (!neg_zero_msg)
  2680. {
  2681. fprintf(stderr, "One or more pixels was -0, setting them to 0.\n");
  2682. neg_zero_msg = true;
  2683. }
  2684. }
  2685. p = 0.0f;
  2686. status = false;
  2687. }
  2688. else
  2689. {
  2690. //const float o = p;
  2691. if (p < 0.0f)
  2692. {
  2693. p = 0.0f;
  2694. if (!neg_msg)
  2695. {
  2696. fprintf(stderr, "One or more pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n");
  2697. neg_msg = true;
  2698. }
  2699. status = false;
  2700. }
  2701. if (p > highest_mag)
  2702. {
  2703. p = highest_mag;
  2704. if (!clamp_msg)
  2705. {
  2706. fprintf(stderr, "One or more pixels had to be clamped to %f.\n", highest_mag);
  2707. clamp_msg = true;
  2708. }
  2709. status = false;
  2710. }
  2711. }
  2712. }
  2713. }
  2714. }
  2715. return status;
  2716. }
  2717. imagef& flip_y()
  2718. {
  2719. for (uint32_t y = 0; y < m_height / 2; ++y)
  2720. for (uint32_t x = 0; x < m_width; ++x)
  2721. std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
  2722. return *this;
  2723. }
  2724. private:
  2725. uint32_t m_width, m_height, m_pitch; // all in pixels
  2726. vec4F_vec m_pixels;
  2727. };
  2728. // REC 709 coefficients
  2729. const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f;
  2730. inline float get_luminance(const vec4F &c)
  2731. {
  2732. return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B;
  2733. }
  2734. float linear_to_srgb(float l);
  2735. float srgb_to_linear(float s);
  2736. // Image metrics
  2737. class image_metrics
  2738. {
  2739. public:
  2740. // TODO: Add ssim
  2741. double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
  2742. bool m_has_neg, m_hf_mag_overflow, m_any_abnormal;
  2743. image_metrics()
  2744. {
  2745. clear();
  2746. }
  2747. void clear()
  2748. {
  2749. m_max = 0;
  2750. m_mean = 0;
  2751. m_mean_squared = 0;
  2752. m_rms = 0;
  2753. m_psnr = 0;
  2754. m_ssim = 0;
  2755. m_has_neg = false;
  2756. m_hf_mag_overflow = false;
  2757. m_any_abnormal = false;
  2758. }
  2759. void print(const char *pPrefix = nullptr) { printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); }
  2760. void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); }
  2761. void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false);
  2762. void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
  2763. void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
  2764. void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
  2765. };
  2766. // Image saving/loading/resampling
  2767. bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr);
  2768. bool load_png(const char* pFilename, image& img);
  2769. inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); }
  2770. bool load_tga(const char* pFilename, image& img);
  2771. inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); }
  2772. bool load_qoi(const char* pFilename, image& img);
  2773. bool load_jpg(const char *pFilename, image& img);
  2774. inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }
  2775. // Currently loads .PNG, .TGA, or .JPG
  2776. bool load_image(const char* pFilename, image& img);
  2777. inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }
  2778. // Supports .HDR and most (but not all) .EXR's (see TinyEXR).
  2779. bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true);
  2780. inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true) { return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear); }
  2781. enum class hdr_image_type
  2782. {
  2783. cHITRGBAHalfFloat = 0,
  2784. cHITRGBAFloat = 1,
  2785. cHITPNGImage = 2,
  2786. cHITEXRImage = 3,
  2787. cHITHDRImage = 4
  2788. };
  2789. bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear);
  2790. uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);
  2791. uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);
  2792. struct rgbe_header_info
  2793. {
  2794. std::string m_program;
  2795. // Note no validation is done, either gamma or exposure may be 0.
  2796. double m_gamma;
  2797. bool m_has_gamma;
  2798. double m_exposure; // watts/steradian/m^2.
  2799. bool m_has_exposure;
  2800. void clear()
  2801. {
  2802. m_program.clear();
  2803. m_gamma = 1.0f;
  2804. m_has_gamma = false;
  2805. m_exposure = 1.0f;
  2806. m_has_exposure = false;
  2807. }
  2808. };
  2809. bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info);
  2810. bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info);
  2811. bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info);
  2812. bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info);
  2813. bool read_exr(const char* pFilename, imagef& img, int& n_chans);
  2814. bool read_exr(const void* pMem, size_t mem_size, imagef& img);
  2815. enum
  2816. {
  2817. WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic
  2818. WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats
  2819. WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block)
  2820. };
  2821. // Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images.
  2822. bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags);
  2823. enum
  2824. {
  2825. cImageSaveGrayscale = 1,
  2826. cImageSaveIgnoreAlpha = 2
  2827. };
  2828. bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0);
  2829. inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }
  2830. bool read_file_to_vec(const char* pFilename, uint8_vec& data);
  2831. bool read_file_to_data(const char* pFilename, void *pData, size_t len);
  2832. bool write_data_to_file(const char* pFilename, const void* pData, size_t len);
  2833. inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }
  2834. bool image_resample(const image &src, image &dst, bool srgb = false,
  2835. const char *pFilter = "lanczos4", float filter_scale = 1.0f,
  2836. bool wrapping = false,
  2837. uint32_t first_comp = 0, uint32_t num_comps = 4);
  2838. bool image_resample(const imagef& src, imagef& dst,
  2839. const char* pFilter = "lanczos4", float filter_scale = 1.0f,
  2840. bool wrapping = false,
  2841. uint32_t first_comp = 0, uint32_t num_comps = 4);
  2842. // Timing
  2843. typedef uint64_t timer_ticks;
  2844. class interval_timer
  2845. {
  2846. public:
  2847. interval_timer();
  2848. void start();
  2849. void stop();
  2850. double get_elapsed_secs() const;
  2851. inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); }
  2852. static void init();
  2853. static inline timer_ticks get_ticks_per_sec() { return g_freq; }
  2854. static timer_ticks get_ticks();
  2855. static double ticks_to_secs(timer_ticks ticks);
  2856. static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; }
  2857. private:
  2858. static timer_ticks g_init_ticks, g_freq;
  2859. static double g_timer_freq;
  2860. timer_ticks m_start_time, m_stop_time;
  2861. bool m_started, m_stopped;
  2862. };
  2863. inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); }
  2864. // 2D array
  2865. template<typename T>
  2866. class vector2D
  2867. {
  2868. typedef basisu::vector<T> TVec;
  2869. uint32_t m_width, m_height;
  2870. TVec m_values;
  2871. public:
  2872. vector2D() :
  2873. m_width(0),
  2874. m_height(0)
  2875. {
  2876. }
  2877. vector2D(uint32_t w, uint32_t h) :
  2878. m_width(0),
  2879. m_height(0)
  2880. {
  2881. resize(w, h);
  2882. }
  2883. vector2D(const vector2D &other)
  2884. {
  2885. *this = other;
  2886. }
  2887. vector2D &operator= (const vector2D &other)
  2888. {
  2889. if (this != &other)
  2890. {
  2891. m_width = other.m_width;
  2892. m_height = other.m_height;
  2893. m_values = other.m_values;
  2894. }
  2895. return *this;
  2896. }
  2897. inline bool operator== (const vector2D &rhs) const
  2898. {
  2899. return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values);
  2900. }
  2901. inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); }
  2902. inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
  2903. inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
  2904. inline const T &operator[] (uint32_t i) const { return m_values[i]; }
  2905. inline T &operator[] (uint32_t i) { return m_values[i]; }
  2906. inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2907. inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2908. void clear()
  2909. {
  2910. m_width = 0;
  2911. m_height = 0;
  2912. m_values.clear();
  2913. }
  2914. void set_all(const T&val)
  2915. {
  2916. vector_set_all(m_values, val);
  2917. }
  2918. inline const T* get_ptr() const { return &m_values[0]; }
  2919. inline T* get_ptr() { return &m_values[0]; }
  2920. vector2D &resize(uint32_t new_width, uint32_t new_height)
  2921. {
  2922. if ((m_width == new_width) && (m_height == new_height))
  2923. return *this;
  2924. TVec oldVals(new_width * new_height);
  2925. oldVals.swap(m_values);
  2926. const uint32_t w = minimum(m_width, new_width);
  2927. const uint32_t h = minimum(m_height, new_height);
  2928. if ((w) && (h))
  2929. {
  2930. for (uint32_t y = 0; y < h; y++)
  2931. for (uint32_t x = 0; x < w; x++)
  2932. m_values[x + y * new_width] = oldVals[x + y * m_width];
  2933. }
  2934. m_width = new_width;
  2935. m_height = new_height;
  2936. return *this;
  2937. }
  2938. };
  2939. inline FILE *fopen_safe(const char *pFilename, const char *pMode)
  2940. {
  2941. #ifdef _WIN32
  2942. FILE *pFile = nullptr;
  2943. fopen_s(&pFile, pFilename, pMode);
  2944. return pFile;
  2945. #else
  2946. return fopen(pFilename, pMode);
  2947. #endif
  2948. }
  2949. void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1);
  2950. const uint32_t cPixelBlockWidth = 4;
  2951. const uint32_t cPixelBlockHeight = 4;
  2952. const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight;
  2953. struct pixel_block
  2954. {
  2955. color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
  2956. inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
  2957. inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
  2958. inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; }
  2959. inline color_rgba* get_ptr() { return &m_pixels[0][0]; }
  2960. inline void clear() { clear_obj(*this); }
  2961. inline bool operator== (const pixel_block& rhs) const
  2962. {
  2963. return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
  2964. }
  2965. };
  2966. typedef basisu::vector<pixel_block> pixel_block_vec;
  2967. struct pixel_block_hdr
  2968. {
  2969. vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
  2970. inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
  2971. inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
  2972. inline const vec4F* get_ptr() const { return &m_pixels[0][0]; }
  2973. inline vec4F* get_ptr() { return &m_pixels[0][0]; }
  2974. inline void clear() { clear_obj(*this); }
  2975. inline bool operator== (const pixel_block& rhs) const
  2976. {
  2977. return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
  2978. }
  2979. };
  2980. typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec;
  2981. void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure);
  2982. bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img);
  2983. // Intersection
  2984. enum eClear { cClear = 0 };
  2985. enum eInitExpand { cInitExpand = 0 };
  2986. template<typename vector_type>
  2987. class ray
  2988. {
  2989. public:
  2990. typedef vector_type vector_t;
  2991. typedef typename vector_type::scalar_type scalar_type;
  2992. inline ray() { }
  2993. inline ray(eClear) { clear(); }
  2994. inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { }
  2995. inline void clear()
  2996. {
  2997. m_origin.clear();
  2998. m_direction.clear();
  2999. }
  3000. inline const vector_type& get_origin(void) const { return m_origin; }
  3001. inline void set_origin(const vector_type& origin) { m_origin = origin; }
  3002. inline const vector_type& get_direction(void) const { return m_direction; }
  3003. inline void set_direction(const vector_type& direction) { m_direction = direction; }
  3004. inline void set_endpoints(const vector_type& start, const vector_type& end)
  3005. {
  3006. m_origin = start;
  3007. m_direction = end - start;
  3008. m_direction.normalize_in_place();
  3009. }
  3010. inline vector_type eval(scalar_type t) const
  3011. {
  3012. return m_origin + m_direction * t;
  3013. }
  3014. private:
  3015. vector_type m_origin;
  3016. vector_type m_direction;
  3017. };
  3018. typedef ray<vec2F> ray2F;
  3019. typedef ray<vec3F> ray3F;
  3020. template<typename T>
  3021. class vec_interval
  3022. {
  3023. public:
  3024. enum { N = T::num_elements };
  3025. typedef typename T::scalar_type scalar_type;
  3026. inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; }
  3027. inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; }
  3028. inline vec_interval() { }
  3029. inline vec_interval(eClear) { clear(); }
  3030. inline vec_interval(eInitExpand) { init_expand(); }
  3031. inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); }
  3032. inline void init_expand()
  3033. {
  3034. m_bounds[0].set(1e+30f, 1e+30f, 1e+30f);
  3035. m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f);
  3036. }
  3037. inline vec_interval expand(const T& p)
  3038. {
  3039. for (uint32_t c = 0; c < N; c++)
  3040. {
  3041. if (p[c] < m_bounds[0][c])
  3042. m_bounds[0][c] = p[c];
  3043. if (p[c] > m_bounds[1][c])
  3044. m_bounds[1][c] = p[c];
  3045. }
  3046. return *this;
  3047. }
  3048. inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; }
  3049. inline T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; }
  3050. const T& get_low() const { return m_bounds[0]; }
  3051. T& get_low() { return m_bounds[0]; }
  3052. const T& get_high() const { return m_bounds[1]; }
  3053. T& get_high() { return m_bounds[1]; }
  3054. scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; }
  3055. bool contains(const T& p) const
  3056. {
  3057. const T& low = get_low(), high = get_high();
  3058. for (uint32_t i = 0; i < N; i++)
  3059. {
  3060. if (p[i] < low[i])
  3061. return false;
  3062. if (p[i] > high[i])
  3063. return false;
  3064. }
  3065. return true;
  3066. }
  3067. private:
  3068. T m_bounds[2];
  3069. };
  3070. typedef vec_interval<vec1F> vec_interval1F;
  3071. typedef vec_interval<vec2F> vec_interval2F;
  3072. typedef vec_interval<vec3F> vec_interval3F;
  3073. typedef vec_interval<vec4F> vec_interval4F;
  3074. typedef vec_interval2F aabb2F;
  3075. typedef vec_interval3F aabb3F;
  3076. namespace intersection
  3077. {
  3078. enum result
  3079. {
  3080. cBackfacing = -1,
  3081. cFailure = 0,
  3082. cSuccess,
  3083. cParallel,
  3084. cInside,
  3085. };
  3086. // Returns cInside, cSuccess, or cFailure.
  3087. // Algorithm: Graphics Gems 1
  3088. template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
  3089. result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
  3090. {
  3091. enum
  3092. {
  3093. cNumDim = vector_type::num_elements,
  3094. cRight = 0,
  3095. cLeft = 1,
  3096. cMiddle = 2
  3097. };
  3098. bool inside = true;
  3099. int quadrant[cNumDim];
  3100. scalar_type candidate_plane[cNumDim];
  3101. for (int i = 0; i < cNumDim; i++)
  3102. {
  3103. if (ray.get_origin()[i] < box[0][i])
  3104. {
  3105. quadrant[i] = cLeft;
  3106. candidate_plane[i] = box[0][i];
  3107. inside = false;
  3108. }
  3109. else if (ray.get_origin()[i] > box[1][i])
  3110. {
  3111. quadrant[i] = cRight;
  3112. candidate_plane[i] = box[1][i];
  3113. inside = false;
  3114. }
  3115. else
  3116. {
  3117. quadrant[i] = cMiddle;
  3118. }
  3119. }
  3120. if (inside)
  3121. {
  3122. coord = ray.get_origin();
  3123. t = 0.0f;
  3124. return cInside;
  3125. }
  3126. scalar_type max_t[cNumDim];
  3127. for (int i = 0; i < cNumDim; i++)
  3128. {
  3129. if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f))
  3130. max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i];
  3131. else
  3132. max_t[i] = -1.0f;
  3133. }
  3134. int which_plane = 0;
  3135. for (int i = 1; i < cNumDim; i++)
  3136. if (max_t[which_plane] < max_t[i])
  3137. which_plane = i;
  3138. if (max_t[which_plane] < 0.0f)
  3139. return cFailure;
  3140. for (int i = 0; i < cNumDim; i++)
  3141. {
  3142. if (i != which_plane)
  3143. {
  3144. coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i];
  3145. if ((coord[i] < box[0][i]) || (coord[i] > box[1][i]))
  3146. return cFailure;
  3147. }
  3148. else
  3149. {
  3150. coord[i] = candidate_plane[i];
  3151. }
  3152. assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]);
  3153. }
  3154. t = max_t[which_plane];
  3155. return cSuccess;
  3156. }
  3157. template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
  3158. result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
  3159. {
  3160. if (!box.contains(ray.get_origin()))
  3161. {
  3162. started_within = false;
  3163. return ray_aabb(coord, t, ray, box);
  3164. }
  3165. started_within = true;
  3166. typename vector_type::T diag_dist = box.diagonal_length() * 1.5f;
  3167. ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction());
  3168. result res(ray_aabb(coord, t, outside_ray, box));
  3169. if (res != cSuccess)
  3170. return res;
  3171. t = basisu::maximum(0.0f, diag_dist - t);
  3172. return cSuccess;
  3173. }
  3174. } // intersect
  3175. // This float->half conversion matches how "F32TO16" works on Intel GPU's.
  3176. // Input cannot be negative, Inf or Nan.
  3177. inline basist::half_float float_to_half_non_neg_no_nan_inf(float val)
  3178. {
  3179. union { float f; int32_t i; uint32_t u; } fi = { val };
  3180. const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF;
  3181. int e = 0, m = 0;
  3182. assert(((fi.i >> 31) == 0) && (flt_e != 0xFF));
  3183. // not zero or denormal
  3184. if (flt_e != 0)
  3185. {
  3186. int new_exp = flt_e - 127;
  3187. if (new_exp > 15)
  3188. e = 31;
  3189. else if (new_exp < -14)
  3190. m = lrintf((1 << 24) * fabsf(fi.f));
  3191. else
  3192. {
  3193. e = new_exp + 15;
  3194. m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
  3195. }
  3196. }
  3197. assert((0 <= m) && (m <= 1024));
  3198. if (m == 1024)
  3199. {
  3200. e++;
  3201. m = 0;
  3202. }
  3203. assert((e >= 0) && (e <= 31));
  3204. assert((m >= 0) && (m <= 1023));
  3205. basist::half_float result = (basist::half_float)((e << 10) | m);
  3206. return result;
  3207. }
  3208. // Supports positive and denormals only. No NaN or Inf.
  3209. inline float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
  3210. {
  3211. assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
  3212. union fu32
  3213. {
  3214. uint32_t u;
  3215. float f;
  3216. };
  3217. static const fu32 K = { 0x77800000 };
  3218. fu32 o;
  3219. o.u = h << 13;
  3220. o.f *= K.f;
  3221. return o.f;
  3222. }
  3223. } // namespace basisu