basisu_enc.h 89 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456
  1. // basisu_enc.h
  2. // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. #pragma once
  16. #include "../transcoder/basisu.h"
  17. #include "../transcoder/basisu_transcoder_internal.h"
  18. #include <mutex>
  19. #include <atomic>
  20. #include <condition_variable>
  21. #include <functional>
  22. #include <thread>
  23. #include <unordered_map>
  24. #include <ostream>
  25. #if !defined(_WIN32) || defined(__MINGW32__)
  26. #include <libgen.h>
  27. #endif
  28. // This module is really just a huge grab bag of classes and helper functions needed by the encoder.
  29. // If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time.
  30. #define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0)
  31. #if BASISU_SUPPORT_SSE
  32. // Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors.
  33. extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16);
  34. #endif
  35. namespace basisu
  36. {
  37. extern uint8_t g_hamming_dist[256];
  38. extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8];
  39. // true if basisu_encoder_init() has been called and returned.
  40. extern bool g_library_initialized;
  41. // Encoder library initialization.
  42. // This function MUST be called before encoding anything!
  43. void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
  44. void basisu_encoder_deinit();
  45. // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1
  46. extern void detect_sse41();
  47. #if BASISU_SUPPORT_SSE
  48. extern bool g_cpu_supports_sse41;
  49. #else
  50. const bool g_cpu_supports_sse41 = false;
  51. #endif
  52. void error_vprintf(const char* pFmt, va_list args);
  53. void error_printf(const char *pFmt, ...);
  54. // Helpers
  55. inline uint8_t clamp255(int32_t i)
  56. {
  57. return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
  58. }
  59. inline int32_t clampi(int32_t value, int32_t low, int32_t high)
  60. {
  61. if (value < low)
  62. value = low;
  63. else if (value > high)
  64. value = high;
  65. return value;
  66. }
  67. inline uint8_t mul_8(uint32_t v, uint32_t a)
  68. {
  69. v = v * a + 128;
  70. return (uint8_t)((v + (v >> 8)) >> 8);
  71. }
  72. inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
  73. {
  74. assert(codesize <= 64);
  75. uint64_t bits = 0;
  76. uint32_t total_bits = 0;
  77. while (total_bits < codesize)
  78. {
  79. uint32_t byte_bit_offset = bit_offset & 7;
  80. uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);
  81. uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
  82. byte_bits &= ((1 << bits_to_read) - 1);
  83. bits |= ((uint64_t)(byte_bits) << total_bits);
  84. total_bits += bits_to_read;
  85. bit_offset += bits_to_read;
  86. }
  87. return bits;
  88. }
  89. inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
  90. {
  91. assert(codesize <= 32);
  92. uint32_t bits = 0;
  93. uint32_t total_bits = 0;
  94. while (total_bits < codesize)
  95. {
  96. uint32_t byte_bit_offset = bit_offset & 7;
  97. uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);
  98. uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
  99. byte_bits &= ((1 << bits_to_read) - 1);
  100. bits |= (byte_bits << total_bits);
  101. total_bits += bits_to_read;
  102. bit_offset += bits_to_read;
  103. }
  104. return bits;
  105. }
  106. // Hashing
  107. inline uint32_t bitmix32c(uint32_t v)
  108. {
  109. v = (v + 0x7ed55d16) + (v << 12);
  110. v = (v ^ 0xc761c23c) ^ (v >> 19);
  111. v = (v + 0x165667b1) + (v << 5);
  112. v = (v + 0xd3a2646c) ^ (v << 9);
  113. v = (v + 0xfd7046c5) + (v << 3);
  114. v = (v ^ 0xb55a4f09) ^ (v >> 16);
  115. return v;
  116. }
  117. inline uint32_t bitmix32(uint32_t v)
  118. {
  119. v -= (v << 6);
  120. v ^= (v >> 17);
  121. v -= (v << 9);
  122. v ^= (v << 4);
  123. v -= (v << 3);
  124. v ^= (v << 10);
  125. v ^= (v >> 15);
  126. return v;
  127. }
  128. inline uint32_t wang_hash(uint32_t seed)
  129. {
  130. seed = (seed ^ 61) ^ (seed >> 16);
  131. seed *= 9;
  132. seed = seed ^ (seed >> 4);
  133. seed *= 0x27d4eb2d;
  134. seed = seed ^ (seed >> 15);
  135. return seed;
  136. }
  137. uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);
  138. template <typename Key>
  139. struct bit_hasher
  140. {
  141. std::size_t operator()(const Key& k) const
  142. {
  143. return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k));
  144. }
  145. };
  146. class running_stat
  147. {
  148. public:
  149. running_stat() { clear(); }
  150. void clear()
  151. {
  152. m_n = 0;
  153. m_total = 0;
  154. m_old_m = 0;
  155. m_new_m = 0;
  156. m_old_s = 0;
  157. m_new_s = 0;
  158. m_min = 0;
  159. m_max = 0;
  160. }
  161. void push(double x)
  162. {
  163. m_n++;
  164. m_total += x;
  165. if (m_n == 1)
  166. {
  167. m_old_m = m_new_m = x;
  168. m_old_s = 0.0;
  169. m_min = x;
  170. m_max = x;
  171. }
  172. else
  173. {
  174. // See Knuth TAOCP vol 2, 3rd edition, page 232
  175. m_new_m = m_old_m + (x - m_old_m) / m_n;
  176. m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m);
  177. m_old_m = m_new_m;
  178. m_old_s = m_new_s;
  179. m_min = basisu::minimum(x, m_min);
  180. m_max = basisu::maximum(x, m_max);
  181. }
  182. }
  183. uint32_t get_num() const
  184. {
  185. return m_n;
  186. }
  187. double get_total() const
  188. {
  189. return m_total;
  190. }
  191. double get_mean() const
  192. {
  193. return (m_n > 0) ? m_new_m : 0.0;
  194. }
  195. // Returns sample variance
  196. double get_variance() const
  197. {
  198. return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0);
  199. }
  200. double get_std_dev() const
  201. {
  202. return sqrt(get_variance());
  203. }
  204. double get_min() const
  205. {
  206. return m_min;
  207. }
  208. double get_max() const
  209. {
  210. return m_max;
  211. }
  212. private:
  213. uint32_t m_n;
  214. double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max;
  215. };
  216. // Linear algebra
  217. template <uint32_t N, typename T>
  218. class vec
  219. {
  220. protected:
  221. T m_v[N];
  222. public:
  223. enum { num_elements = N };
  224. inline vec() { }
  225. inline vec(eZero) { set_zero(); }
  226. explicit inline vec(T val) { set(val); }
  227. inline vec(T v0, T v1) { set(v0, v1); }
  228. inline vec(T v0, T v1, T v2) { set(v0, v1, v2); }
  229. inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); }
  230. inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; }
  231. template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); }
  232. inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; }
  233. inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; }
  234. inline T getX() const { return m_v[0]; }
  235. inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; }
  236. inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; }
  237. inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; }
  238. inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; }
  239. inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
  240. inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
  241. template <uint32_t OtherN, typename OtherT>
  242. inline vec &set(const vec<OtherN, OtherT> &other)
  243. {
  244. uint32_t i;
  245. if ((const void *)(&other) == (const void *)(this))
  246. return *this;
  247. const uint32_t m = minimum(OtherN, N);
  248. for (i = 0; i < m; i++)
  249. m_v[i] = static_cast<T>(other[i]);
  250. for (; i < N; i++)
  251. m_v[i] = 0;
  252. return *this;
  253. }
  254. inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; }
  255. inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; }
  256. inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; }
  257. inline vec &set(T v0, T v1)
  258. {
  259. m_v[0] = v0;
  260. if (N >= 2)
  261. {
  262. m_v[1] = v1;
  263. clear_elements(2, N);
  264. }
  265. return *this;
  266. }
  267. inline vec &set(T v0, T v1, T v2)
  268. {
  269. m_v[0] = v0;
  270. if (N >= 2)
  271. {
  272. m_v[1] = v1;
  273. if (N >= 3)
  274. {
  275. m_v[2] = v2;
  276. clear_elements(3, N);
  277. }
  278. }
  279. return *this;
  280. }
  281. inline vec &set(T v0, T v1, T v2, T v3)
  282. {
  283. m_v[0] = v0;
  284. if (N >= 2)
  285. {
  286. m_v[1] = v1;
  287. if (N >= 3)
  288. {
  289. m_v[2] = v2;
  290. if (N >= 4)
  291. {
  292. m_v[3] = v3;
  293. clear_elements(5, N);
  294. }
  295. }
  296. }
  297. return *this;
  298. }
  299. inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; }
  300. template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; }
  301. inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); }
  302. inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); }
  303. inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; }
  304. inline vec operator+ () const { return *this; }
  305. inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; }
  306. inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; }
  307. inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; }
  308. inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; }
  309. inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; }
  310. inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; }
  311. friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; }
  312. friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; }
  313. friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; }
  314. friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; }
  315. friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; }
  316. friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; }
  317. static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; }
  318. inline T dot(const vec &rhs) const { return dot_product(*this, rhs); }
  319. inline T norm() const { return dot_product(*this, *this); }
  320. inline T length() const { return sqrt(norm()); }
  321. inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; }
  322. inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; }
  323. inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }
  324. inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }
  325. inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
  326. inline vec &clamp(T l, T h)
  327. {
  328. for (uint32_t i = 0; i < N; i++)
  329. m_v[i] = basisu::clamp(m_v[i], l, h);
  330. return *this;
  331. }
  332. static vec component_min(const vec& a, const vec& b)
  333. {
  334. vec res;
  335. for (uint32_t i = 0; i < N; i++)
  336. res[i] = minimum(a[i], b[i]);
  337. return res;
  338. }
  339. static vec component_max(const vec& a, const vec& b)
  340. {
  341. vec res;
  342. for (uint32_t i = 0; i < N; i++)
  343. res[i] = maximum(a[i], b[i]);
  344. return res;
  345. }
  346. };
  347. typedef vec<4, double> vec4D;
  348. typedef vec<3, double> vec3D;
  349. typedef vec<2, double> vec2D;
  350. typedef vec<1, double> vec1D;
  351. typedef vec<4, float> vec4F;
  352. typedef vec<3, float> vec3F;
  353. typedef vec<2, float> vec2F;
  354. typedef vec<1, float> vec1F;
  355. typedef vec<16, float> vec16F;
  356. template <uint32_t Rows, uint32_t Cols, typename T>
  357. class matrix
  358. {
  359. public:
  360. typedef vec<Rows, T> col_vec;
  361. typedef vec<Cols, T> row_vec;
  362. typedef T scalar_type;
  363. enum { rows = Rows, cols = Cols };
  364. protected:
  365. row_vec m_r[Rows];
  366. public:
  367. inline matrix() {}
  368. inline matrix(eZero) { set_zero(); }
  369. inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; }
  370. inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; }
  371. inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
  372. inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
  373. inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; }
  374. inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; }
  375. inline matrix &set_zero()
  376. {
  377. for (uint32_t i = 0; i < Rows; i++)
  378. m_r[i].set_zero();
  379. return *this;
  380. }
  381. inline matrix &set_identity()
  382. {
  383. for (uint32_t i = 0; i < Rows; i++)
  384. {
  385. m_r[i].set_zero();
  386. if (i < Cols)
  387. m_r[i][i] = 1.0f;
  388. }
  389. return *this;
  390. }
  391. };
  392. template<uint32_t N, typename VectorType>
  393. inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix)
  394. {
  395. VectorType axis;
  396. if (N == 1)
  397. axis.set(1.0f);
  398. else
  399. {
  400. for (uint32_t i = 0; i < N; i++)
  401. axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1)));
  402. }
  403. VectorType prev_axis(axis);
  404. // Power iterations
  405. for (uint32_t power_iter = 0; power_iter < 8; power_iter++)
  406. {
  407. VectorType trial_axis;
  408. double max_sum = 0;
  409. for (uint32_t i = 0; i < N; i++)
  410. {
  411. double sum = 0;
  412. for (uint32_t j = 0; j < N; j++)
  413. sum += cmatrix[i][j] * axis[j];
  414. trial_axis[i] = static_cast<float>(sum);
  415. max_sum = maximum(fabs(sum), max_sum);
  416. }
  417. if (max_sum != 0.0f)
  418. trial_axis *= static_cast<float>(1.0f / max_sum);
  419. VectorType delta_axis(prev_axis - trial_axis);
  420. prev_axis = axis;
  421. axis = trial_axis;
  422. if (delta_axis.norm() < .0024f)
  423. break;
  424. }
  425. return axis.normalize_in_place();
  426. }
  427. template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys)
  428. {
  429. for (uint32_t i = 0; i < num_indices; i++)
  430. pIndices[i] = i;
  431. std::sort(
  432. pIndices,
  433. pIndices + num_indices,
  434. [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; }
  435. );
  436. }
  437. // 1-4 byte direct Radix sort.
  438. template <typename T>
  439. T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size)
  440. {
  441. assert(key_ofs < sizeof(T));
  442. assert((key_size >= 1) && (key_size <= 4));
  443. uint32_t hist[256 * 4];
  444. memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
  445. #define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs))
  446. if (key_size == 4)
  447. {
  448. T* p = pBuf0;
  449. T* q = pBuf0 + num_vals;
  450. for (; p != q; p++)
  451. {
  452. const uint32_t key = BASISU_GET_KEY(p);
  453. hist[key & 0xFF]++;
  454. hist[256 + ((key >> 8) & 0xFF)]++;
  455. hist[512 + ((key >> 16) & 0xFF)]++;
  456. hist[768 + ((key >> 24) & 0xFF)]++;
  457. }
  458. }
  459. else if (key_size == 3)
  460. {
  461. T* p = pBuf0;
  462. T* q = pBuf0 + num_vals;
  463. for (; p != q; p++)
  464. {
  465. const uint32_t key = BASISU_GET_KEY(p);
  466. hist[key & 0xFF]++;
  467. hist[256 + ((key >> 8) & 0xFF)]++;
  468. hist[512 + ((key >> 16) & 0xFF)]++;
  469. }
  470. }
  471. else if (key_size == 2)
  472. {
  473. T* p = pBuf0;
  474. T* q = pBuf0 + (num_vals >> 1) * 2;
  475. for (; p != q; p += 2)
  476. {
  477. const uint32_t key0 = BASISU_GET_KEY(p);
  478. const uint32_t key1 = BASISU_GET_KEY(p + 1);
  479. hist[key0 & 0xFF]++;
  480. hist[256 + ((key0 >> 8) & 0xFF)]++;
  481. hist[key1 & 0xFF]++;
  482. hist[256 + ((key1 >> 8) & 0xFF)]++;
  483. }
  484. if (num_vals & 1)
  485. {
  486. const uint32_t key = BASISU_GET_KEY(p);
  487. hist[key & 0xFF]++;
  488. hist[256 + ((key >> 8) & 0xFF)]++;
  489. }
  490. }
  491. else
  492. {
  493. assert(key_size == 1);
  494. if (key_size != 1)
  495. return NULL;
  496. T* p = pBuf0;
  497. T* q = pBuf0 + (num_vals >> 1) * 2;
  498. for (; p != q; p += 2)
  499. {
  500. const uint32_t key0 = BASISU_GET_KEY(p);
  501. const uint32_t key1 = BASISU_GET_KEY(p + 1);
  502. hist[key0 & 0xFF]++;
  503. hist[key1 & 0xFF]++;
  504. }
  505. if (num_vals & 1)
  506. {
  507. const uint32_t key = BASISU_GET_KEY(p);
  508. hist[key & 0xFF]++;
  509. }
  510. }
  511. T* pCur = pBuf0;
  512. T* pNew = pBuf1;
  513. for (uint32_t pass = 0; pass < key_size; pass++)
  514. {
  515. const uint32_t* pHist = &hist[pass << 8];
  516. uint32_t offsets[256];
  517. uint32_t cur_ofs = 0;
  518. for (uint32_t i = 0; i < 256; i += 2)
  519. {
  520. offsets[i] = cur_ofs;
  521. cur_ofs += pHist[i];
  522. offsets[i + 1] = cur_ofs;
  523. cur_ofs += pHist[i + 1];
  524. }
  525. const uint32_t pass_shift = pass << 3;
  526. T* p = pCur;
  527. T* q = pCur + (num_vals >> 1) * 2;
  528. for (; p != q; p += 2)
  529. {
  530. uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;
  531. uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF;
  532. if (c0 == c1)
  533. {
  534. uint32_t dst_offset0 = offsets[c0];
  535. offsets[c0] = dst_offset0 + 2;
  536. pNew[dst_offset0] = p[0];
  537. pNew[dst_offset0 + 1] = p[1];
  538. }
  539. else
  540. {
  541. uint32_t dst_offset0 = offsets[c0]++;
  542. uint32_t dst_offset1 = offsets[c1]++;
  543. pNew[dst_offset0] = p[0];
  544. pNew[dst_offset1] = p[1];
  545. }
  546. }
  547. if (num_vals & 1)
  548. {
  549. uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;
  550. uint32_t dst_offset = offsets[c];
  551. offsets[c] = dst_offset + 1;
  552. pNew[dst_offset] = *p;
  553. }
  554. T* t = pCur;
  555. pCur = pNew;
  556. pNew = t;
  557. }
  558. return pCur;
  559. }
  560. #undef BASISU_GET_KEY
  561. // Very simple job pool with no dependencies.
  562. class job_pool
  563. {
  564. BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool);
  565. public:
  566. // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc.
  567. job_pool(uint32_t num_threads);
  568. ~job_pool();
  569. void add_job(const std::function<void()>& job);
  570. void add_job(std::function<void()>&& job);
  571. void wait_for_all();
  572. size_t get_total_threads() const { return 1 + m_threads.size(); }
  573. private:
  574. std::vector<std::thread> m_threads;
  575. std::vector<std::function<void()> > m_queue;
  576. std::mutex m_mutex;
  577. std::condition_variable m_has_work;
  578. std::condition_variable m_no_more_jobs;
  579. uint32_t m_num_active_jobs;
  580. std::atomic<bool> m_kill_flag;
  581. void job_thread(uint32_t index);
  582. };
  583. // Simple 32-bit color class
  584. class color_rgba_i16
  585. {
  586. public:
  587. union
  588. {
  589. int16_t m_comps[4];
  590. struct
  591. {
  592. int16_t r;
  593. int16_t g;
  594. int16_t b;
  595. int16_t a;
  596. };
  597. };
  598. inline color_rgba_i16()
  599. {
  600. static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4");
  601. }
  602. inline color_rgba_i16(int sr, int sg, int sb, int sa)
  603. {
  604. set(sr, sg, sb, sa);
  605. }
  606. inline color_rgba_i16 &set(int sr, int sg, int sb, int sa)
  607. {
  608. m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX);
  609. m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX);
  610. m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX);
  611. m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX);
  612. return *this;
  613. }
  614. };
  615. class color_rgba
  616. {
  617. public:
  618. union
  619. {
  620. uint8_t m_comps[4];
  621. struct
  622. {
  623. uint8_t r;
  624. uint8_t g;
  625. uint8_t b;
  626. uint8_t a;
  627. };
  628. };
  629. inline color_rgba()
  630. {
  631. static_assert(sizeof(*this) == 4, "sizeof(*this) != 4");
  632. static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)");
  633. }
  634. // Not too hot about this idea.
  635. inline color_rgba(const basist::color32& other) :
  636. r(other.r),
  637. g(other.g),
  638. b(other.b),
  639. a(other.a)
  640. {
  641. }
  642. color_rgba& operator= (const basist::color32& rhs)
  643. {
  644. r = rhs.r;
  645. g = rhs.g;
  646. b = rhs.b;
  647. a = rhs.a;
  648. return *this;
  649. }
  650. inline color_rgba(int y)
  651. {
  652. set(y);
  653. }
  654. inline color_rgba(int y, int na)
  655. {
  656. set(y, na);
  657. }
  658. inline color_rgba(int sr, int sg, int sb, int sa)
  659. {
  660. set(sr, sg, sb, sa);
  661. }
  662. inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa)
  663. {
  664. set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa);
  665. }
  666. inline color_rgba& set_noclamp_y(int y)
  667. {
  668. m_comps[0] = (uint8_t)y;
  669. m_comps[1] = (uint8_t)y;
  670. m_comps[2] = (uint8_t)y;
  671. m_comps[3] = (uint8_t)255;
  672. return *this;
  673. }
  674. inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa)
  675. {
  676. m_comps[0] = (uint8_t)sr;
  677. m_comps[1] = (uint8_t)sg;
  678. m_comps[2] = (uint8_t)sb;
  679. m_comps[3] = (uint8_t)sa;
  680. return *this;
  681. }
  682. inline color_rgba &set(int y)
  683. {
  684. m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
  685. m_comps[1] = m_comps[0];
  686. m_comps[2] = m_comps[0];
  687. m_comps[3] = 255;
  688. return *this;
  689. }
  690. inline color_rgba &set(int y, int na)
  691. {
  692. m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
  693. m_comps[1] = m_comps[0];
  694. m_comps[2] = m_comps[0];
  695. m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255));
  696. return *this;
  697. }
  698. inline color_rgba &set(int sr, int sg, int sb, int sa)
  699. {
  700. m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
  701. m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
  702. m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
  703. m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255));
  704. return *this;
  705. }
  706. inline color_rgba &set_rgb(int sr, int sg, int sb)
  707. {
  708. m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
  709. m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
  710. m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
  711. return *this;
  712. }
  713. inline color_rgba &set_rgb(const color_rgba &other)
  714. {
  715. r = other.r;
  716. g = other.g;
  717. b = other.b;
  718. return *this;
  719. }
  720. inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; }
  721. inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; }
  722. inline void clear()
  723. {
  724. m_comps[0] = 0;
  725. m_comps[1] = 0;
  726. m_comps[2] = 0;
  727. m_comps[3] = 0;
  728. }
  729. inline bool operator== (const color_rgba &rhs) const
  730. {
  731. if (m_comps[0] != rhs.m_comps[0]) return false;
  732. if (m_comps[1] != rhs.m_comps[1]) return false;
  733. if (m_comps[2] != rhs.m_comps[2]) return false;
  734. if (m_comps[3] != rhs.m_comps[3]) return false;
  735. return true;
  736. }
  737. inline bool operator!= (const color_rgba &rhs) const
  738. {
  739. return !(*this == rhs);
  740. }
  741. inline bool operator<(const color_rgba &rhs) const
  742. {
  743. for (int i = 0; i < 4; i++)
  744. {
  745. if (m_comps[i] < rhs.m_comps[i])
  746. return true;
  747. else if (m_comps[i] != rhs.m_comps[i])
  748. return false;
  749. }
  750. return false;
  751. }
  752. inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; }
  753. inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; }
  754. inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); }
  755. inline basist::color32 get_color32() const
  756. {
  757. return basist::color32(r, g, b, a);
  758. }
  759. static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }
  760. static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }
  761. };
  762. typedef basisu::vector<color_rgba> color_rgba_vec;
  763. const color_rgba g_black_color(0, 0, 0, 255);
  764. const color_rgba g_black_trans_color(0, 0, 0, 0);
  765. const color_rgba g_white_color(255, 255, 255, 255);
  766. inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1)
  767. {
  768. int dr = r0 - r1, dg = g0 - g1, db = b0 - b1;
  769. return dr * dr + dg * dg + db * db;
  770. }
  771. inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1)
  772. {
  773. int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1;
  774. return dr * dr + dg * dg + db * db + da * da;
  775. }
  776. inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha)
  777. {
  778. if (alpha)
  779. return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a);
  780. else
  781. return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b);
  782. }
  783. // TODO: Allow user to control channel weightings.
  784. inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha)
  785. {
  786. if (perceptual)
  787. {
  788. #if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE
  789. const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f;
  790. const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f;
  791. const float cr1 = e1.r - l1;
  792. const float cr2 = e2.r - l2;
  793. const float cb1 = e1.b - l1;
  794. const float cb2 = e2.b - l2;
  795. const float dl = l1 - l2;
  796. const float dcr = cr1 - cr2;
  797. const float dcb = cb1 - cb2;
  798. uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb);
  799. if (alpha)
  800. {
  801. int da = static_cast<int>(e1.a) - static_cast<int>(e2.a);
  802. d += static_cast<uint32_t>(128.0f*da*da);
  803. }
  804. return d;
  805. #elif 1
  806. int dr = e1.r - e2.r;
  807. int dg = e1.g - e2.g;
  808. int db = e1.b - e2.b;
  809. #if 0
  810. int delta_l = dr * 27 + dg * 92 + db * 9;
  811. int delta_cr = dr * 128 - delta_l;
  812. int delta_cb = db * 128 - delta_l;
  813. uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) +
  814. ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) +
  815. ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U);
  816. #else
  817. int64_t delta_l = dr * 27 + dg * 92 + db * 9;
  818. int64_t delta_cr = dr * 128 - delta_l;
  819. int64_t delta_cb = db * 128 - delta_l;
  820. uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) +
  821. ((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) +
  822. ((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U);
  823. #endif
  824. if (alpha)
  825. {
  826. int da = (e1.a - e2.a) << 7;
  827. // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring.
  828. id += ((uint32_t)(da * da) >> 7U);
  829. }
  830. return id;
  831. #else
  832. int dr = e1.r - e2.r;
  833. int dg = e1.g - e2.g;
  834. int db = e1.b - e2.b;
  835. int64_t delta_l = dr * 27 + dg * 92 + db * 9;
  836. int64_t delta_cr = dr * 128 - delta_l;
  837. int64_t delta_cb = db * 128 - delta_l;
  838. int64_t id = ((delta_l * delta_l) * 128) +
  839. ((delta_cr * delta_cr) * 26) +
  840. ((delta_cb * delta_cb) * 3);
  841. if (alpha)
  842. {
  843. int64_t da = (e1.a - e2.a);
  844. id += (da * da) * 128;
  845. }
  846. int d = (id + 8192) >> 14;
  847. return d;
  848. #endif
  849. }
  850. else
  851. return color_distance(e1, e2, alpha);
  852. }
  853. static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b)
  854. {
  855. const int dl = a.r - b.r;
  856. const int da = a.a - b.a;
  857. return dl * dl + da * da;
  858. }
  859. // String helpers
  860. inline int string_find_right(const std::string& filename, char c)
  861. {
  862. size_t result = filename.find_last_of(c);
  863. return (result == std::string::npos) ? -1 : (int)result;
  864. }
  865. inline std::string string_get_extension(const std::string &filename)
  866. {
  867. int sep = -1;
  868. #ifdef _WIN32
  869. sep = string_find_right(filename, '\\');
  870. #endif
  871. if (sep < 0)
  872. sep = string_find_right(filename, '/');
  873. int dot = string_find_right(filename, '.');
  874. if (dot <= sep)
  875. return "";
  876. std::string result(filename);
  877. result.erase(0, dot + 1);
  878. return result;
  879. }
  880. inline bool string_remove_extension(std::string &filename)
  881. {
  882. int sep = -1;
  883. #ifdef _WIN32
  884. sep = string_find_right(filename, '\\');
  885. #endif
  886. if (sep < 0)
  887. sep = string_find_right(filename, '/');
  888. int dot = string_find_right(filename, '.');
  889. if ((dot < sep) || (dot < 0))
  890. return false;
  891. filename.resize(dot);
  892. return true;
  893. }
  894. inline std::string string_format(const char* pFmt, ...)
  895. {
  896. char buf[2048];
  897. va_list args;
  898. va_start(args, pFmt);
  899. #ifdef _WIN32
  900. vsprintf_s(buf, sizeof(buf), pFmt, args);
  901. #else
  902. vsnprintf(buf, sizeof(buf), pFmt, args);
  903. #endif
  904. va_end(args);
  905. return std::string(buf);
  906. }
  907. inline std::string string_tolower(const std::string& s)
  908. {
  909. std::string result(s);
  910. for (size_t i = 0; i < result.size(); i++)
  911. result[i] = (char)tolower((int)result[i]);
  912. return result;
  913. }
  914. inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc)
  915. {
  916. assert(pDst && pSrc && dst_len);
  917. if (!dst_len)
  918. return pDst;
  919. const size_t src_len = strlen(pSrc);
  920. const size_t src_len_plus_terminator = src_len + 1;
  921. if (src_len_plus_terminator <= dst_len)
  922. memcpy(pDst, pSrc, src_len_plus_terminator);
  923. else
  924. {
  925. if (dst_len > 1)
  926. memcpy(pDst, pSrc, dst_len - 1);
  927. pDst[dst_len - 1] = '\0';
  928. }
  929. return pDst;
  930. }
  931. inline bool string_ends_with(const std::string& s, char c)
  932. {
  933. return (s.size() != 0) && (s.back() == c);
  934. }
  935. inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt)
  936. {
  937. #ifdef _MSC_VER
  938. char drive_buf[_MAX_DRIVE] = { 0 };
  939. char dir_buf[_MAX_DIR] = { 0 };
  940. char fname_buf[_MAX_FNAME] = { 0 };
  941. char ext_buf[_MAX_EXT] = { 0 };
  942. errno_t error = _splitpath_s(p,
  943. pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0,
  944. pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0,
  945. pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0,
  946. pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0);
  947. if (error != 0)
  948. return false;
  949. if (pDrive) *pDrive = drive_buf;
  950. if (pDir) *pDir = dir_buf;
  951. if (pFilename) *pFilename = fname_buf;
  952. if (pExt) *pExt = ext_buf;
  953. return true;
  954. #else
  955. char dirtmp[1024], nametmp[1024];
  956. strcpy_safe(dirtmp, sizeof(dirtmp), p);
  957. strcpy_safe(nametmp, sizeof(nametmp), p);
  958. if (pDrive)
  959. pDrive->resize(0);
  960. const char *pDirName = dirname(dirtmp);
  961. const char* pBaseName = basename(nametmp);
  962. if ((!pDirName) || (!pBaseName))
  963. return false;
  964. if (pDir)
  965. {
  966. *pDir = pDirName;
  967. if ((pDir->size()) && (pDir->back() != '/'))
  968. *pDir += "/";
  969. }
  970. if (pFilename)
  971. {
  972. *pFilename = pBaseName;
  973. string_remove_extension(*pFilename);
  974. }
  975. if (pExt)
  976. {
  977. *pExt = pBaseName;
  978. *pExt = string_get_extension(*pExt);
  979. if (pExt->size())
  980. *pExt = "." + *pExt;
  981. }
  982. return true;
  983. #endif
  984. }
  985. inline bool is_path_separator(char c)
  986. {
  987. #ifdef _WIN32
  988. return (c == '/') || (c == '\\');
  989. #else
  990. return (c == '/');
  991. #endif
  992. }
  993. inline bool is_drive_separator(char c)
  994. {
  995. #ifdef _WIN32
  996. return (c == ':');
  997. #else
  998. (void)c;
  999. return false;
  1000. #endif
  1001. }
  1002. inline void string_combine_path(std::string &dst, const char *p, const char *q)
  1003. {
  1004. std::string temp(p);
  1005. if (temp.size() && !is_path_separator(q[0]))
  1006. {
  1007. if (!is_path_separator(temp.back()))
  1008. temp.append(1, BASISU_PATH_SEPERATOR_CHAR);
  1009. }
  1010. temp += q;
  1011. dst.swap(temp);
  1012. }
  1013. inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r)
  1014. {
  1015. string_combine_path(dst, p, q);
  1016. string_combine_path(dst, dst.c_str(), r);
  1017. }
  1018. inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt)
  1019. {
  1020. string_combine_path(dst, p, q, r);
  1021. if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.'))
  1022. dst.append(1, '.');
  1023. dst.append(pExt);
  1024. }
  1025. inline bool string_get_pathname(const char *p, std::string &path)
  1026. {
  1027. std::string temp_drive, temp_path;
  1028. if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL))
  1029. return false;
  1030. string_combine_path(path, temp_drive.c_str(), temp_path.c_str());
  1031. return true;
  1032. }
  1033. inline bool string_get_filename(const char *p, std::string &filename)
  1034. {
  1035. std::string temp_ext;
  1036. if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext))
  1037. return false;
  1038. filename += temp_ext;
  1039. return true;
  1040. }
  1041. class rand
  1042. {
  1043. std::mt19937 m_mt;
  1044. public:
  1045. rand() { }
  1046. rand(uint32_t s) { seed(s); }
  1047. void seed(uint32_t s) { m_mt.seed(s); }
  1048. // between [l,h]
  1049. int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); }
  1050. uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); }
  1051. bool bit() { return irand(0, 1) == 1; }
  1052. uint8_t byte() { return static_cast<uint8_t>(urand32()); }
  1053. // between [l,h)
  1054. float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); }
  1055. float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); }
  1056. };
  1057. class priority_queue
  1058. {
  1059. public:
  1060. priority_queue() :
  1061. m_size(0)
  1062. {
  1063. }
  1064. void clear()
  1065. {
  1066. m_heap.clear();
  1067. m_size = 0;
  1068. }
  1069. void init(uint32_t max_entries, uint32_t first_index, float first_priority)
  1070. {
  1071. m_heap.resize(max_entries + 1);
  1072. m_heap[1].m_index = first_index;
  1073. m_heap[1].m_priority = first_priority;
  1074. m_size = 1;
  1075. }
  1076. inline uint32_t size() const { return m_size; }
  1077. inline uint32_t get_top_index() const { return m_heap[1].m_index; }
  1078. inline float get_top_priority() const { return m_heap[1].m_priority; }
  1079. inline void delete_top()
  1080. {
  1081. assert(m_size > 0);
  1082. m_heap[1] = m_heap[m_size];
  1083. m_size--;
  1084. if (m_size)
  1085. down_heap(1);
  1086. }
  1087. inline void add_heap(uint32_t index, float priority)
  1088. {
  1089. m_size++;
  1090. uint32_t k = m_size;
  1091. if (m_size >= m_heap.size())
  1092. m_heap.resize(m_size + 1);
  1093. for (;;)
  1094. {
  1095. uint32_t parent_index = k >> 1;
  1096. if ((!parent_index) || (m_heap[parent_index].m_priority > priority))
  1097. break;
  1098. m_heap[k] = m_heap[parent_index];
  1099. k = parent_index;
  1100. }
  1101. m_heap[k].m_index = index;
  1102. m_heap[k].m_priority = priority;
  1103. }
  1104. private:
  1105. struct entry
  1106. {
  1107. uint32_t m_index;
  1108. float m_priority;
  1109. };
  1110. basisu::vector<entry> m_heap;
  1111. uint32_t m_size;
  1112. // Push down entry at index
  1113. inline void down_heap(uint32_t heap_index)
  1114. {
  1115. uint32_t orig_index = m_heap[heap_index].m_index;
  1116. const float orig_priority = m_heap[heap_index].m_priority;
  1117. uint32_t child_index;
  1118. while ((child_index = (heap_index << 1)) <= m_size)
  1119. {
  1120. if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index;
  1121. if (orig_priority > m_heap[child_index].m_priority)
  1122. break;
  1123. m_heap[heap_index] = m_heap[child_index];
  1124. heap_index = child_index;
  1125. }
  1126. m_heap[heap_index].m_index = orig_index;
  1127. m_heap[heap_index].m_priority = orig_priority;
  1128. }
  1129. };
  1130. // Tree structured vector quantization (TSVQ)
  1131. template <typename TrainingVectorType>
  1132. class tree_vector_quant
  1133. {
  1134. public:
  1135. typedef TrainingVectorType training_vec_type;
  1136. typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight;
  1137. typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs;
  1138. tree_vector_quant() :
  1139. m_next_codebook_index(0)
  1140. {
  1141. }
  1142. void clear()
  1143. {
  1144. clear_vector(m_training_vecs);
  1145. clear_vector(m_nodes);
  1146. m_next_codebook_index = 0;
  1147. }
  1148. void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); }
  1149. size_t get_total_training_vecs() const { return m_training_vecs.size(); }
  1150. const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; }
  1151. array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; }
  1152. void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const
  1153. {
  1154. for (uint32_t i = 0; i < m_nodes.size(); i++)
  1155. {
  1156. const tsvq_node &n = m_nodes[i];
  1157. if (!n.is_leaf())
  1158. continue;
  1159. codebook.resize(codebook.size() + 1);
  1160. codebook.back() = n.m_training_vecs;
  1161. }
  1162. }
  1163. void retrieve(basisu::vector<TrainingVectorType> &codebook) const
  1164. {
  1165. for (uint32_t i = 0; i < m_nodes.size(); i++)
  1166. {
  1167. const tsvq_node &n = m_nodes[i];
  1168. if (!n.is_leaf())
  1169. continue;
  1170. codebook.resize(codebook.size() + 1);
  1171. codebook.back() = n.m_origin;
  1172. }
  1173. }
  1174. void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const
  1175. {
  1176. uint_vec node_stack;
  1177. node_stack.reserve(512);
  1178. codebook.resize(0);
  1179. codebook.reserve(max_clusters);
  1180. uint32_t node_index = 0;
  1181. while (true)
  1182. {
  1183. const tsvq_node& cur = m_nodes[node_index];
  1184. if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
  1185. {
  1186. codebook.resize(codebook.size() + 1);
  1187. codebook.back() = cur.m_training_vecs;
  1188. if (node_stack.empty())
  1189. break;
  1190. node_index = node_stack.back();
  1191. node_stack.pop_back();
  1192. continue;
  1193. }
  1194. node_stack.push_back(cur.m_right_index);
  1195. node_index = cur.m_left_index;
  1196. }
  1197. }
  1198. bool generate(uint32_t max_size)
  1199. {
  1200. if (!m_training_vecs.size())
  1201. return false;
  1202. m_next_codebook_index = 0;
  1203. clear_vector(m_nodes);
  1204. m_nodes.reserve(max_size * 2 + 1);
  1205. m_nodes.push_back(prepare_root());
  1206. priority_queue var_heap;
  1207. var_heap.init(max_size, 0, m_nodes[0].m_var);
  1208. basisu::vector<uint32_t> l_children, r_children;
  1209. // Now split the worst nodes
  1210. l_children.reserve(m_training_vecs.size() + 1);
  1211. r_children.reserve(m_training_vecs.size() + 1);
  1212. uint32_t total_leaf_nodes = 1;
  1213. //interval_timer tm;
  1214. //tm.start();
  1215. while ((var_heap.size()) && (total_leaf_nodes < max_size))
  1216. {
  1217. const uint32_t node_index = var_heap.get_top_index();
  1218. const tsvq_node &node = m_nodes[node_index];
  1219. assert(node.m_var == var_heap.get_top_priority());
  1220. assert(node.is_leaf());
  1221. var_heap.delete_top();
  1222. if (node.m_training_vecs.size() > 1)
  1223. {
  1224. if (split_node(node_index, var_heap, l_children, r_children))
  1225. {
  1226. // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total.
  1227. total_leaf_nodes += 1;
  1228. }
  1229. }
  1230. }
  1231. //debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs());
  1232. return true;
  1233. }
  1234. private:
  1235. class tsvq_node
  1236. {
  1237. public:
  1238. inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { }
  1239. // vecs is erased
  1240. inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); }
  1241. inline bool is_leaf() const { return m_left_index < 0; }
  1242. float m_var;
  1243. uint64_t m_weight;
  1244. TrainingVectorType m_origin;
  1245. int32_t m_left_index, m_right_index;
  1246. basisu::vector<uint32_t> m_training_vecs;
  1247. int m_codebook_index;
  1248. };
  1249. typedef basisu::vector<tsvq_node> tsvq_node_vec;
  1250. tsvq_node_vec m_nodes;
  1251. array_of_weighted_training_vecs m_training_vecs;
  1252. uint32_t m_next_codebook_index;
  1253. tsvq_node prepare_root() const
  1254. {
  1255. double ttsum = 0.0f;
  1256. // Prepare root node containing all training vectors
  1257. tsvq_node root;
  1258. root.m_training_vecs.reserve(m_training_vecs.size());
  1259. for (uint32_t i = 0; i < m_training_vecs.size(); i++)
  1260. {
  1261. const TrainingVectorType &v = m_training_vecs[i].first;
  1262. const uint64_t weight = m_training_vecs[i].second;
  1263. root.m_training_vecs.push_back(i);
  1264. root.m_origin += (v * static_cast<float>(weight));
  1265. root.m_weight += weight;
  1266. ttsum += v.dot(v) * weight;
  1267. }
  1268. root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight));
  1269. root.m_origin *= (1.0f / root.m_weight);
  1270. return root;
  1271. }
  1272. bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children)
  1273. {
  1274. TrainingVectorType l_child_org, r_child_org;
  1275. uint64_t l_weight = 0, r_weight = 0;
  1276. float l_var = 0.0f, r_var = 0.0f;
  1277. // Compute initial left/right child origins
  1278. if (!prep_split(m_nodes[node_index], l_child_org, r_child_org))
  1279. return false;
  1280. // Use k-means iterations to refine these children vectors
  1281. if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children))
  1282. return false;
  1283. // Create children
  1284. const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1;
  1285. m_nodes[node_index].m_left_index = l_child_index;
  1286. m_nodes[node_index].m_right_index = r_child_index;
  1287. m_nodes[node_index].m_codebook_index = m_next_codebook_index;
  1288. m_next_codebook_index++;
  1289. m_nodes.resize(m_nodes.size() + 2);
  1290. tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index];
  1291. l_child.set(l_child_org, l_weight, l_var, l_children);
  1292. r_child.set(r_child_org, r_weight, r_var, r_children);
  1293. if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1))
  1294. {
  1295. TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first);
  1296. for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++)
  1297. {
  1298. if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first))
  1299. {
  1300. l_child.m_var = 1e-4f;
  1301. break;
  1302. }
  1303. }
  1304. }
  1305. if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1))
  1306. {
  1307. TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first);
  1308. for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++)
  1309. {
  1310. if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first))
  1311. {
  1312. r_child.m_var = 1e-4f;
  1313. break;
  1314. }
  1315. }
  1316. }
  1317. if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1))
  1318. var_heap.add_heap(l_child_index, l_child.m_var);
  1319. if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1))
  1320. var_heap.add_heap(r_child_index, r_child.m_var);
  1321. return true;
  1322. }
  1323. TrainingVectorType compute_split_axis(const tsvq_node &node) const
  1324. {
  1325. const uint32_t N = TrainingVectorType::num_elements;
  1326. matrix<N, N, float> cmatrix;
  1327. if ((N != 16) || (!g_cpu_supports_sse41))
  1328. {
  1329. cmatrix.set_zero();
  1330. // Compute covariance matrix from weighted input vectors
  1331. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1332. {
  1333. const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin);
  1334. const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v);
  1335. for (uint32_t x = 0; x < N; x++)
  1336. for (uint32_t y = x; y < N; y++)
  1337. cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y];
  1338. }
  1339. }
  1340. else
  1341. {
  1342. #if BASISU_SUPPORT_SSE
  1343. // Specialize the case with 16x16 matrices, which are quite expensive without SIMD.
  1344. // This SSE function takes pointers to void types, so do some sanity checks.
  1345. assert(sizeof(TrainingVectorType) == sizeof(float) * 16);
  1346. assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>));
  1347. update_covar_matrix_16x16_sse41(node.m_training_vecs.size(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix);
  1348. #endif
  1349. }
  1350. const float renorm_scale = 1.0f / node.m_weight;
  1351. for (uint32_t x = 0; x < N; x++)
  1352. for (uint32_t y = x; y < N; y++)
  1353. cmatrix[x][y] *= renorm_scale;
  1354. // Diagonal flip
  1355. for (uint32_t x = 0; x < (N - 1); x++)
  1356. for (uint32_t y = x + 1; y < N; y++)
  1357. cmatrix[y][x] = cmatrix[x][y];
  1358. return compute_pca_from_covar<N, TrainingVectorType>(cmatrix);
  1359. }
  1360. bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const
  1361. {
  1362. //const uint32_t N = TrainingVectorType::num_elements;
  1363. if (2 == node.m_training_vecs.size())
  1364. {
  1365. l_child_result = m_training_vecs[node.m_training_vecs[0]].first;
  1366. r_child_result = m_training_vecs[node.m_training_vecs[1]].first;
  1367. return true;
  1368. }
  1369. TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f);
  1370. double l_weight = 0.0f, r_weight = 0.0f;
  1371. // Compute initial left/right children
  1372. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1373. {
  1374. const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
  1375. const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
  1376. double t = (v - node.m_origin).dot(axis);
  1377. if (t >= 0.0f)
  1378. {
  1379. r_child += v * weight;
  1380. r_weight += weight;
  1381. }
  1382. else
  1383. {
  1384. l_child += v * weight;
  1385. l_weight += weight;
  1386. }
  1387. }
  1388. if ((l_weight > 0.0f) && (r_weight > 0.0f))
  1389. {
  1390. l_child_result = l_child * static_cast<float>(1.0f / l_weight);
  1391. r_child_result = r_child * static_cast<float>(1.0f / r_weight);
  1392. }
  1393. else
  1394. {
  1395. TrainingVectorType l(1e+20f);
  1396. TrainingVectorType h(-1e+20f);
  1397. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1398. {
  1399. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1400. l = TrainingVectorType::component_min(l, v);
  1401. h = TrainingVectorType::component_max(h, v);
  1402. }
  1403. TrainingVectorType r(h - l);
  1404. float largest_axis_v = 0.0f;
  1405. int largest_axis_index = -1;
  1406. for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++)
  1407. {
  1408. if (r[i] > largest_axis_v)
  1409. {
  1410. largest_axis_v = r[i];
  1411. largest_axis_index = i;
  1412. }
  1413. }
  1414. if (largest_axis_index < 0)
  1415. return false;
  1416. basisu::vector<float> keys(node.m_training_vecs.size());
  1417. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1418. keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index];
  1419. uint_vec indices(node.m_training_vecs.size());
  1420. indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]);
  1421. l_child.set_zero();
  1422. l_weight = 0;
  1423. r_child.set_zero();
  1424. r_weight = 0;
  1425. const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2;
  1426. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1427. {
  1428. const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
  1429. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1430. if (i < half_index)
  1431. {
  1432. l_child += v * weight;
  1433. l_weight += weight;
  1434. }
  1435. else
  1436. {
  1437. r_child += v * weight;
  1438. r_weight += weight;
  1439. }
  1440. }
  1441. if ((l_weight > 0.0f) && (r_weight > 0.0f))
  1442. {
  1443. l_child_result = l_child * static_cast<float>(1.0f / l_weight);
  1444. r_child_result = r_child * static_cast<float>(1.0f / r_weight);
  1445. }
  1446. else
  1447. {
  1448. l_child_result = l;
  1449. r_child_result = h;
  1450. }
  1451. }
  1452. return true;
  1453. }
  1454. bool refine_split(const tsvq_node &node,
  1455. TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children,
  1456. TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const
  1457. {
  1458. l_children.reserve(node.m_training_vecs.size());
  1459. r_children.reserve(node.m_training_vecs.size());
  1460. float prev_total_variance = 1e+10f;
  1461. // Refine left/right children locations using k-means iterations
  1462. const uint32_t cMaxIters = 6;
  1463. for (uint32_t iter = 0; iter < cMaxIters; iter++)
  1464. {
  1465. l_children.resize(0);
  1466. r_children.resize(0);
  1467. TrainingVectorType new_l_child(cZero), new_r_child(cZero);
  1468. double l_ttsum = 0.0f, r_ttsum = 0.0f;
  1469. l_weight = 0;
  1470. r_weight = 0;
  1471. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1472. {
  1473. const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
  1474. const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
  1475. double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v);
  1476. if (left_dist2 >= right_dist2)
  1477. {
  1478. new_r_child += (v * static_cast<float>(weight));
  1479. r_weight += weight;
  1480. r_ttsum += weight * v.dot(v);
  1481. r_children.push_back(node.m_training_vecs[i]);
  1482. }
  1483. else
  1484. {
  1485. new_l_child += (v * static_cast<float>(weight));
  1486. l_weight += weight;
  1487. l_ttsum += weight * v.dot(v);
  1488. l_children.push_back(node.m_training_vecs[i]);
  1489. }
  1490. }
  1491. // Node is unsplittable using the above algorithm - try something else to split it up.
  1492. if ((!l_weight) || (!r_weight))
  1493. {
  1494. l_children.resize(0);
  1495. new_l_child.set(0.0f);
  1496. l_ttsum = 0.0f;
  1497. l_weight = 0;
  1498. r_children.resize(0);
  1499. new_r_child.set(0.0f);
  1500. r_ttsum = 0.0f;
  1501. r_weight = 0;
  1502. TrainingVectorType firstVec;
  1503. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1504. {
  1505. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1506. const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
  1507. if ((!i) || (v == firstVec))
  1508. {
  1509. firstVec = v;
  1510. new_r_child += (v * static_cast<float>(weight));
  1511. r_weight += weight;
  1512. r_ttsum += weight * v.dot(v);
  1513. r_children.push_back(node.m_training_vecs[i]);
  1514. }
  1515. else
  1516. {
  1517. new_l_child += (v * static_cast<float>(weight));
  1518. l_weight += weight;
  1519. l_ttsum += weight * v.dot(v);
  1520. l_children.push_back(node.m_training_vecs[i]);
  1521. }
  1522. }
  1523. if ((!l_weight) || (!r_weight))
  1524. return false;
  1525. }
  1526. l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight));
  1527. r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight));
  1528. new_l_child *= (1.0f / l_weight);
  1529. new_r_child *= (1.0f / r_weight);
  1530. l_child = new_l_child;
  1531. r_child = new_r_child;
  1532. float total_var = l_var + r_var;
  1533. const float cGiveupVariance = .00001f;
  1534. if (total_var < cGiveupVariance)
  1535. break;
  1536. // Check to see if the variance has settled
  1537. const float cVarianceDeltaThresh = .00125f;
  1538. if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh)
  1539. break;
  1540. prev_total_variance = total_var;
  1541. }
  1542. return true;
  1543. }
  1544. };
  1545. struct weighted_block_group
  1546. {
  1547. uint64_t m_total_weight;
  1548. uint_vec m_indices;
  1549. };
  1550. template<typename Quantizer>
  1551. bool generate_hierarchical_codebook_threaded_internal(Quantizer& q,
  1552. uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
  1553. basisu::vector<uint_vec>& codebook,
  1554. basisu::vector<uint_vec>& parent_codebook,
  1555. uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool)
  1556. {
  1557. codebook.resize(0);
  1558. parent_codebook.resize(0);
  1559. if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16))
  1560. {
  1561. if (!q.generate(max_codebook_size))
  1562. return false;
  1563. q.retrieve(codebook);
  1564. if (max_parent_codebook_size)
  1565. q.retrieve(max_parent_codebook_size, parent_codebook);
  1566. return true;
  1567. }
  1568. const uint32_t cMaxThreads = 16;
  1569. if (max_threads > cMaxThreads)
  1570. max_threads = cMaxThreads;
  1571. if (!q.generate(max_threads))
  1572. return false;
  1573. basisu::vector<uint_vec> initial_codebook;
  1574. q.retrieve(initial_codebook);
  1575. if (initial_codebook.size() < max_threads)
  1576. {
  1577. codebook = initial_codebook;
  1578. if (max_parent_codebook_size)
  1579. q.retrieve(max_parent_codebook_size, parent_codebook);
  1580. return true;
  1581. }
  1582. Quantizer quantizers[cMaxThreads];
  1583. bool success_flags[cMaxThreads];
  1584. clear_obj(success_flags);
  1585. basisu::vector<uint_vec> local_clusters[cMaxThreads];
  1586. basisu::vector<uint_vec> local_parent_clusters[cMaxThreads];
  1587. for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
  1588. {
  1589. #ifndef __EMSCRIPTEN__
  1590. pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] {
  1591. #endif
  1592. Quantizer& lq = quantizers[thread_iter];
  1593. uint_vec& cluster_indices = initial_codebook[thread_iter];
  1594. uint_vec local_to_global(cluster_indices.size());
  1595. for (uint32_t i = 0; i < cluster_indices.size(); i++)
  1596. {
  1597. const uint32_t global_training_vec_index = cluster_indices[i];
  1598. local_to_global[i] = global_training_vec_index;
  1599. lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second);
  1600. }
  1601. const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs();
  1602. success_flags[thread_iter] = lq.generate(max_clusters);
  1603. if (success_flags[thread_iter])
  1604. {
  1605. lq.retrieve(local_clusters[thread_iter]);
  1606. for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++)
  1607. {
  1608. for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++)
  1609. local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]];
  1610. }
  1611. if (max_parent_codebook_size)
  1612. {
  1613. lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]);
  1614. for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++)
  1615. {
  1616. for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++)
  1617. local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]];
  1618. }
  1619. }
  1620. }
  1621. #ifndef __EMSCRIPTEN__
  1622. } );
  1623. #endif
  1624. } // thread_iter
  1625. #ifndef __EMSCRIPTEN__
  1626. pJob_pool->wait_for_all();
  1627. #endif
  1628. uint32_t total_clusters = 0, total_parent_clusters = 0;
  1629. for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++)
  1630. {
  1631. if (!success_flags[thread_iter])
  1632. return false;
  1633. total_clusters += (uint32_t)local_clusters[thread_iter].size();
  1634. total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size();
  1635. }
  1636. codebook.reserve(total_clusters);
  1637. parent_codebook.reserve(total_parent_clusters);
  1638. for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
  1639. {
  1640. for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++)
  1641. {
  1642. codebook.resize(codebook.size() + 1);
  1643. codebook.back().swap(local_clusters[thread_iter][j]);
  1644. }
  1645. for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++)
  1646. {
  1647. parent_codebook.resize(parent_codebook.size() + 1);
  1648. parent_codebook.back().swap(local_parent_clusters[thread_iter][j]);
  1649. }
  1650. }
  1651. return true;
  1652. }
  1653. template<typename Quantizer>
  1654. bool generate_hierarchical_codebook_threaded(Quantizer& q,
  1655. uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
  1656. basisu::vector<uint_vec>& codebook,
  1657. basisu::vector<uint_vec>& parent_codebook,
  1658. uint32_t max_threads, job_pool *pJob_pool,
  1659. bool even_odd_input_pairs_equal)
  1660. {
  1661. typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher;
  1662. typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group,
  1663. training_vec_bit_hasher> group_hash;
  1664. //interval_timer tm;
  1665. //tm.start();
  1666. group_hash unique_vecs;
  1667. unique_vecs.reserve(20000);
  1668. weighted_block_group g;
  1669. if (even_odd_input_pairs_equal)
  1670. {
  1671. g.m_indices.resize(2);
  1672. assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0);
  1673. for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2)
  1674. {
  1675. assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first);
  1676. g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second;
  1677. g.m_indices[0] = i;
  1678. g.m_indices[1] = i + 1;
  1679. auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
  1680. if (!ins_res.second)
  1681. {
  1682. (ins_res.first)->second.m_total_weight += g.m_total_weight;
  1683. (ins_res.first)->second.m_indices.push_back(i);
  1684. (ins_res.first)->second.m_indices.push_back(i + 1);
  1685. }
  1686. }
  1687. }
  1688. else
  1689. {
  1690. g.m_indices.resize(1);
  1691. for (uint32_t i = 0; i < q.get_training_vecs().size(); i++)
  1692. {
  1693. g.m_total_weight = q.get_training_vecs()[i].second;
  1694. g.m_indices[0] = i;
  1695. auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
  1696. if (!ins_res.second)
  1697. {
  1698. (ins_res.first)->second.m_total_weight += g.m_total_weight;
  1699. (ins_res.first)->second.m_indices.push_back(i);
  1700. }
  1701. }
  1702. }
  1703. //debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs());
  1704. debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size());
  1705. Quantizer group_quant;
  1706. typedef typename group_hash::const_iterator group_hash_const_iter;
  1707. basisu::vector<group_hash_const_iter> unique_vec_iters;
  1708. unique_vec_iters.reserve(unique_vecs.size());
  1709. for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter)
  1710. {
  1711. group_quant.add_training_vec(iter->first, iter->second.m_total_weight);
  1712. unique_vec_iters.push_back(iter);
  1713. }
  1714. bool limit_clusterizers = true;
  1715. if (unique_vecs.size() <= max_codebook_size)
  1716. limit_clusterizers = false;
  1717. debug_printf("Limit clusterizers: %u\n", limit_clusterizers);
  1718. basisu::vector<uint_vec> group_codebook, group_parent_codebook;
  1719. bool status = generate_hierarchical_codebook_threaded_internal(group_quant,
  1720. max_codebook_size, max_parent_codebook_size,
  1721. group_codebook,
  1722. group_parent_codebook,
  1723. (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool);
  1724. if (!status)
  1725. return false;
  1726. codebook.resize(0);
  1727. for (uint32_t i = 0; i < group_codebook.size(); i++)
  1728. {
  1729. codebook.resize(codebook.size() + 1);
  1730. for (uint32_t j = 0; j < group_codebook[i].size(); j++)
  1731. {
  1732. const uint32_t group_index = group_codebook[i][j];
  1733. typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
  1734. const uint_vec& training_vec_indices = group_iter->second.m_indices;
  1735. append_vector(codebook.back(), training_vec_indices);
  1736. }
  1737. }
  1738. parent_codebook.resize(0);
  1739. for (uint32_t i = 0; i < group_parent_codebook.size(); i++)
  1740. {
  1741. parent_codebook.resize(parent_codebook.size() + 1);
  1742. for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++)
  1743. {
  1744. const uint32_t group_index = group_parent_codebook[i][j];
  1745. typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
  1746. const uint_vec& training_vec_indices = group_iter->second.m_indices;
  1747. append_vector(parent_codebook.back(), training_vec_indices);
  1748. }
  1749. }
  1750. return true;
  1751. }
  1752. // Canonical Huffman coding
  1753. class histogram
  1754. {
  1755. basisu::vector<uint32_t> m_hist;
  1756. public:
  1757. histogram(uint32_t size = 0) { init(size); }
  1758. void clear()
  1759. {
  1760. clear_vector(m_hist);
  1761. }
  1762. void init(uint32_t size)
  1763. {
  1764. m_hist.resize(0);
  1765. m_hist.resize(size);
  1766. }
  1767. inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); }
  1768. inline const uint32_t &operator[] (uint32_t index) const
  1769. {
  1770. return m_hist[index];
  1771. }
  1772. inline uint32_t &operator[] (uint32_t index)
  1773. {
  1774. return m_hist[index];
  1775. }
  1776. inline void inc(uint32_t index)
  1777. {
  1778. m_hist[index]++;
  1779. }
  1780. uint64_t get_total() const
  1781. {
  1782. uint64_t total = 0;
  1783. for (uint32_t i = 0; i < m_hist.size(); ++i)
  1784. total += m_hist[i];
  1785. return total;
  1786. }
  1787. double get_entropy() const
  1788. {
  1789. double total = static_cast<double>(get_total());
  1790. if (total == 0.0f)
  1791. return 0.0f;
  1792. const double inv_total = 1.0f / total;
  1793. const double neg_inv_log2 = -1.0f / log(2.0f);
  1794. double e = 0.0f;
  1795. for (uint32_t i = 0; i < m_hist.size(); i++)
  1796. if (m_hist[i])
  1797. e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]);
  1798. return e;
  1799. }
  1800. };
  1801. struct sym_freq
  1802. {
  1803. uint32_t m_key;
  1804. uint16_t m_sym_index;
  1805. };
  1806. sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);
  1807. void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms);
  1808. void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size);
  1809. class huffman_encoding_table
  1810. {
  1811. public:
  1812. huffman_encoding_table()
  1813. {
  1814. }
  1815. void clear()
  1816. {
  1817. clear_vector(m_codes);
  1818. clear_vector(m_code_sizes);
  1819. }
  1820. bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize)
  1821. {
  1822. return init(h.size(), &h[0], max_code_size);
  1823. }
  1824. bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size);
  1825. bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size);
  1826. inline const uint16_vec &get_codes() const { return m_codes; }
  1827. inline const uint8_vec &get_code_sizes() const { return m_code_sizes; }
  1828. uint32_t get_total_used_codes() const
  1829. {
  1830. for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--)
  1831. if (m_code_sizes[i])
  1832. return i + 1;
  1833. return 0;
  1834. }
  1835. private:
  1836. uint16_vec m_codes;
  1837. uint8_vec m_code_sizes;
  1838. };
  1839. class bitwise_coder
  1840. {
  1841. public:
  1842. bitwise_coder() :
  1843. m_bit_buffer(0),
  1844. m_bit_buffer_size(0),
  1845. m_total_bits(0)
  1846. {
  1847. }
  1848. inline void clear()
  1849. {
  1850. clear_vector(m_bytes);
  1851. m_bit_buffer = 0;
  1852. m_bit_buffer_size = 0;
  1853. m_total_bits = 0;
  1854. }
  1855. inline const uint8_vec &get_bytes() const { return m_bytes; }
  1856. inline uint64_t get_total_bits() const { return m_total_bits; }
  1857. inline void clear_total_bits() { m_total_bits = 0; }
  1858. inline void init(uint32_t reserve_size = 1024)
  1859. {
  1860. m_bytes.reserve(reserve_size);
  1861. m_bytes.resize(0);
  1862. m_bit_buffer = 0;
  1863. m_bit_buffer_size = 0;
  1864. m_total_bits = 0;
  1865. }
  1866. inline uint32_t flush()
  1867. {
  1868. if (m_bit_buffer_size)
  1869. {
  1870. m_total_bits += 8 - (m_bit_buffer_size & 7);
  1871. append_byte(static_cast<uint8_t>(m_bit_buffer));
  1872. m_bit_buffer = 0;
  1873. m_bit_buffer_size = 0;
  1874. return 8;
  1875. }
  1876. return 0;
  1877. }
  1878. inline uint32_t put_bits(uint32_t bits, uint32_t num_bits)
  1879. {
  1880. assert(num_bits <= 32);
  1881. assert(bits < (1ULL << num_bits));
  1882. if (!num_bits)
  1883. return 0;
  1884. m_total_bits += num_bits;
  1885. uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer;
  1886. m_bit_buffer_size += num_bits;
  1887. while (m_bit_buffer_size >= 8)
  1888. {
  1889. append_byte(static_cast<uint8_t>(v));
  1890. v >>= 8;
  1891. m_bit_buffer_size -= 8;
  1892. }
  1893. m_bit_buffer = static_cast<uint8_t>(v);
  1894. return num_bits;
  1895. }
  1896. inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab)
  1897. {
  1898. uint32_t code = tab.get_codes()[sym];
  1899. uint32_t code_size = tab.get_code_sizes()[sym];
  1900. assert(code_size >= 1);
  1901. put_bits(code, code_size);
  1902. return code_size;
  1903. }
  1904. inline uint32_t put_truncated_binary(uint32_t v, uint32_t n)
  1905. {
  1906. assert((n >= 2) && (v < n));
  1907. uint32_t k = floor_log2i(n);
  1908. uint32_t u = (1 << (k + 1)) - n;
  1909. if (v < u)
  1910. return put_bits(v, k);
  1911. uint32_t x = v + u;
  1912. assert((x >> 1) >= u);
  1913. put_bits(x >> 1, k);
  1914. put_bits(x & 1, 1);
  1915. return k + 1;
  1916. }
  1917. inline uint32_t put_rice(uint32_t v, uint32_t m)
  1918. {
  1919. assert(m);
  1920. const uint64_t start_bits = m_total_bits;
  1921. uint32_t q = v >> m, r = v & ((1 << m) - 1);
  1922. // rice coding sanity check
  1923. assert(q <= 64);
  1924. for (; q > 16; q -= 16)
  1925. put_bits(0xFFFF, 16);
  1926. put_bits((1 << q) - 1, q);
  1927. put_bits(r << 1, m + 1);
  1928. return (uint32_t)(m_total_bits - start_bits);
  1929. }
  1930. inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits)
  1931. {
  1932. assert(chunk_bits);
  1933. const uint32_t chunk_size = 1 << chunk_bits;
  1934. const uint32_t chunk_mask = chunk_size - 1;
  1935. uint32_t total_bits = 0;
  1936. for ( ; ; )
  1937. {
  1938. uint32_t next_v = v >> chunk_bits;
  1939. total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1);
  1940. if (!next_v)
  1941. break;
  1942. v = next_v;
  1943. }
  1944. return total_bits;
  1945. }
  1946. uint32_t emit_huffman_table(const huffman_encoding_table &tab);
  1947. private:
  1948. uint8_vec m_bytes;
  1949. uint32_t m_bit_buffer, m_bit_buffer_size;
  1950. uint64_t m_total_bits;
  1951. void append_byte(uint8_t c)
  1952. {
  1953. m_bytes.resize(m_bytes.size() + 1);
  1954. m_bytes.back() = c;
  1955. }
  1956. static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len);
  1957. static void end_zero_run(uint16_vec &syms, uint32_t &run_size);
  1958. };
  1959. class huff2D
  1960. {
  1961. public:
  1962. huff2D() { }
  1963. huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); }
  1964. inline const histogram &get_histogram() const { return m_histogram; }
  1965. inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; }
  1966. inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group)
  1967. {
  1968. assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1);
  1969. m_bits_per_sym = bits_per_sym;
  1970. m_total_syms_per_group = total_syms_per_group;
  1971. m_cur_sym_bits = 0;
  1972. m_cur_num_syms = 0;
  1973. m_decode_syms_remaining = 0;
  1974. m_next_decoder_group_index = 0;
  1975. m_histogram.init(1 << (bits_per_sym * total_syms_per_group));
  1976. }
  1977. inline void clear()
  1978. {
  1979. m_group_bits.clear();
  1980. m_cur_sym_bits = 0;
  1981. m_cur_num_syms = 0;
  1982. m_decode_syms_remaining = 0;
  1983. m_next_decoder_group_index = 0;
  1984. }
  1985. inline void emit(uint32_t sym)
  1986. {
  1987. m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym));
  1988. m_cur_num_syms++;
  1989. if (m_cur_num_syms == m_total_syms_per_group)
  1990. flush();
  1991. }
  1992. inline void flush()
  1993. {
  1994. if (m_cur_num_syms)
  1995. {
  1996. m_group_bits.push_back(m_cur_sym_bits);
  1997. m_histogram.inc(m_cur_sym_bits);
  1998. m_cur_sym_bits = 0;
  1999. m_cur_num_syms = 0;
  2000. }
  2001. }
  2002. inline bool start_encoding(uint32_t code_size_limit = 16)
  2003. {
  2004. flush();
  2005. if (!m_encoding_table.init(m_histogram, code_size_limit))
  2006. return false;
  2007. m_decode_syms_remaining = 0;
  2008. m_next_decoder_group_index = 0;
  2009. return true;
  2010. }
  2011. inline uint32_t emit_next_sym(bitwise_coder &c)
  2012. {
  2013. uint32_t bits = 0;
  2014. if (!m_decode_syms_remaining)
  2015. {
  2016. bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table);
  2017. m_decode_syms_remaining = m_total_syms_per_group;
  2018. }
  2019. m_decode_syms_remaining--;
  2020. return bits;
  2021. }
  2022. inline void emit_flush()
  2023. {
  2024. m_decode_syms_remaining = 0;
  2025. }
  2026. private:
  2027. uint_vec m_group_bits;
  2028. huffman_encoding_table m_encoding_table;
  2029. histogram m_histogram;
  2030. uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining;
  2031. };
  2032. bool huffman_test(int rand_seed);
  2033. // VQ index reordering
  2034. class palette_index_reorderer
  2035. {
  2036. public:
  2037. palette_index_reorderer()
  2038. {
  2039. }
  2040. void clear()
  2041. {
  2042. clear_vector(m_hist);
  2043. clear_vector(m_total_count_to_picked);
  2044. clear_vector(m_entries_picked);
  2045. clear_vector(m_entries_to_do);
  2046. clear_vector(m_remap_table);
  2047. }
  2048. // returns [0,1] distance of entry i to entry j
  2049. typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx);
  2050. void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  2051. // Table remaps old to new symbol indices
  2052. inline const uint_vec &get_remap_table() const { return m_remap_table; }
  2053. private:
  2054. uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table;
  2055. inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; }
  2056. inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } }
  2057. void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices);
  2058. void find_initial(uint32_t num_syms);
  2059. void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  2060. float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  2061. };
  2062. // Simple 32-bit 2D image class
  2063. class image
  2064. {
  2065. public:
  2066. image() :
  2067. m_width(0), m_height(0), m_pitch(0)
  2068. {
  2069. }
  2070. image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
  2071. m_width(0), m_height(0), m_pitch(0)
  2072. {
  2073. resize(w, h, p);
  2074. }
  2075. image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) :
  2076. m_width(0), m_height(0), m_pitch(0)
  2077. {
  2078. init(pImage, width, height, comps);
  2079. }
  2080. image(const image &other) :
  2081. m_width(0), m_height(0), m_pitch(0)
  2082. {
  2083. *this = other;
  2084. }
  2085. image &swap(image &other)
  2086. {
  2087. std::swap(m_width, other.m_width);
  2088. std::swap(m_height, other.m_height);
  2089. std::swap(m_pitch, other.m_pitch);
  2090. m_pixels.swap(other.m_pixels);
  2091. return *this;
  2092. }
  2093. image &operator= (const image &rhs)
  2094. {
  2095. if (this != &rhs)
  2096. {
  2097. m_width = rhs.m_width;
  2098. m_height = rhs.m_height;
  2099. m_pitch = rhs.m_pitch;
  2100. m_pixels = rhs.m_pixels;
  2101. }
  2102. return *this;
  2103. }
  2104. image &clear()
  2105. {
  2106. m_width = 0;
  2107. m_height = 0;
  2108. m_pitch = 0;
  2109. clear_vector(m_pixels);
  2110. return *this;
  2111. }
  2112. image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color)
  2113. {
  2114. return crop(w, h, p, background);
  2115. }
  2116. image &set_all(const color_rgba &c)
  2117. {
  2118. for (uint32_t i = 0; i < m_pixels.size(); i++)
  2119. m_pixels[i] = c;
  2120. return *this;
  2121. }
  2122. void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps)
  2123. {
  2124. assert(comps >= 1 && comps <= 4);
  2125. resize(width, height);
  2126. for (uint32_t y = 0; y < height; y++)
  2127. {
  2128. for (uint32_t x = 0; x < width; x++)
  2129. {
  2130. const uint8_t *pSrc = &pImage[(x + y * width) * comps];
  2131. color_rgba &dst = (*this)(x, y);
  2132. if (comps == 1)
  2133. {
  2134. dst.r = pSrc[0];
  2135. dst.g = pSrc[0];
  2136. dst.b = pSrc[0];
  2137. dst.a = 255;
  2138. }
  2139. else if (comps == 2)
  2140. {
  2141. dst.r = pSrc[0];
  2142. dst.g = pSrc[0];
  2143. dst.b = pSrc[0];
  2144. dst.a = pSrc[1];
  2145. }
  2146. else
  2147. {
  2148. dst.r = pSrc[0];
  2149. dst.g = pSrc[1];
  2150. dst.b = pSrc[2];
  2151. if (comps == 4)
  2152. dst.a = pSrc[3];
  2153. else
  2154. dst.a = 255;
  2155. }
  2156. }
  2157. }
  2158. }
  2159. image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c)
  2160. {
  2161. for (uint32_t iy = 0; iy < h; iy++)
  2162. for (uint32_t ix = 0; ix < w; ix++)
  2163. set_clipped(x + ix, y + iy, c);
  2164. return *this;
  2165. }
  2166. image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c)
  2167. {
  2168. for (uint32_t iy = 0; iy < h; iy++)
  2169. for (uint32_t ix = 0; ix < w; ix++)
  2170. set_clipped_alpha(x + ix, y + iy, c);
  2171. return *this;
  2172. }
  2173. image &crop_dup_borders(uint32_t w, uint32_t h)
  2174. {
  2175. const uint32_t orig_w = m_width, orig_h = m_height;
  2176. crop(w, h);
  2177. if (orig_w && orig_h)
  2178. {
  2179. if (m_width > orig_w)
  2180. {
  2181. for (uint32_t x = orig_w; x < m_width; x++)
  2182. for (uint32_t y = 0; y < m_height; y++)
  2183. set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
  2184. }
  2185. if (m_height > orig_h)
  2186. {
  2187. for (uint32_t y = orig_h; y < m_height; y++)
  2188. for (uint32_t x = 0; x < m_width; x++)
  2189. set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
  2190. }
  2191. }
  2192. return *this;
  2193. }
  2194. // pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer).
  2195. image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX)
  2196. {
  2197. if (p == UINT32_MAX)
  2198. p = w;
  2199. clear();
  2200. if ((!p) || (!w) || (!h))
  2201. return *this;
  2202. m_pixels.grant_ownership(pPixels, p * h, p * h);
  2203. m_width = w;
  2204. m_height = h;
  2205. m_pitch = p;
  2206. return *this;
  2207. }
  2208. image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true)
  2209. {
  2210. if (p == UINT32_MAX)
  2211. p = w;
  2212. if ((w == m_width) && (m_height == h) && (m_pitch == p))
  2213. return *this;
  2214. if ((!w) || (!h) || (!p))
  2215. {
  2216. clear();
  2217. return *this;
  2218. }
  2219. color_rgba_vec cur_state;
  2220. cur_state.swap(m_pixels);
  2221. m_pixels.resize(p * h);
  2222. if (init_image)
  2223. {
  2224. if (m_width || m_height)
  2225. {
  2226. for (uint32_t y = 0; y < h; y++)
  2227. {
  2228. for (uint32_t x = 0; x < w; x++)
  2229. {
  2230. if ((x < m_width) && (y < m_height))
  2231. m_pixels[x + y * p] = cur_state[x + y * m_pitch];
  2232. else
  2233. m_pixels[x + y * p] = background;
  2234. }
  2235. }
  2236. }
  2237. else
  2238. {
  2239. m_pixels.set_all(background);
  2240. }
  2241. }
  2242. m_width = w;
  2243. m_height = h;
  2244. m_pitch = p;
  2245. return *this;
  2246. }
  2247. inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2248. inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2249. inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2250. inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2251. inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
  2252. {
  2253. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2254. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2255. return m_pixels[x + y * m_pitch];
  2256. }
  2257. inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
  2258. {
  2259. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2260. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2261. return m_pixels[x + y * m_pitch];
  2262. }
  2263. inline image &set_clipped(int x, int y, const color_rgba &c)
  2264. {
  2265. if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
  2266. (*this)(x, y) = c;
  2267. return *this;
  2268. }
  2269. inline image& set_clipped_alpha(int x, int y, const color_rgba& c)
  2270. {
  2271. if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
  2272. (*this)(x, y).m_comps[3] = c.m_comps[3];
  2273. return *this;
  2274. }
  2275. // Very straightforward blit with full clipping. Not fast, but it works.
  2276. image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
  2277. {
  2278. for (int y = 0; y < src_h; y++)
  2279. {
  2280. const int sy = src_y + y;
  2281. if (sy < 0)
  2282. continue;
  2283. else if (sy >= (int)src.get_height())
  2284. break;
  2285. for (int x = 0; x < src_w; x++)
  2286. {
  2287. const int sx = src_x + x;
  2288. if (sx < 0)
  2289. continue;
  2290. else if (sx >= (int)src.get_height())
  2291. break;
  2292. set_clipped(dst_x + x, dst_y + y, src(sx, sy));
  2293. }
  2294. }
  2295. return *this;
  2296. }
  2297. const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
  2298. {
  2299. if (((src_x + w) > m_width) || ((src_y + h) > m_height))
  2300. {
  2301. // Slower clamping case
  2302. for (uint32_t y = 0; y < h; y++)
  2303. for (uint32_t x = 0; x < w; x++)
  2304. *pDst++ = get_clamped(src_x + x, src_y + y);
  2305. }
  2306. else
  2307. {
  2308. const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch];
  2309. for (uint32_t y = 0; y < h; y++)
  2310. {
  2311. memcpy(pDst, pSrc, w * sizeof(color_rgba));
  2312. pSrc += m_pitch;
  2313. pDst += w;
  2314. }
  2315. }
  2316. return *this;
  2317. }
  2318. image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
  2319. {
  2320. for (uint32_t y = 0; y < h; y++)
  2321. for (uint32_t x = 0; x < w; x++)
  2322. set_clipped(dst_x + x, dst_y + y, *pSrc++);
  2323. return *this;
  2324. }
  2325. inline uint32_t get_width() const { return m_width; }
  2326. inline uint32_t get_height() const { return m_height; }
  2327. inline uint32_t get_pitch() const { return m_pitch; }
  2328. inline uint32_t get_total_pixels() const { return m_width * m_height; }
  2329. inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
  2330. inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
  2331. inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
  2332. inline const color_rgba_vec &get_pixels() const { return m_pixels; }
  2333. inline color_rgba_vec &get_pixels() { return m_pixels; }
  2334. inline const color_rgba *get_ptr() const { return &m_pixels[0]; }
  2335. inline color_rgba *get_ptr() { return &m_pixels[0]; }
  2336. bool has_alpha() const
  2337. {
  2338. for (uint32_t y = 0; y < m_height; ++y)
  2339. for (uint32_t x = 0; x < m_width; ++x)
  2340. if ((*this)(x, y).a < 255)
  2341. return true;
  2342. return false;
  2343. }
  2344. image &set_alpha(uint8_t a)
  2345. {
  2346. for (uint32_t y = 0; y < m_height; ++y)
  2347. for (uint32_t x = 0; x < m_width; ++x)
  2348. (*this)(x, y).a = a;
  2349. return *this;
  2350. }
  2351. image &flip_y()
  2352. {
  2353. for (uint32_t y = 0; y < m_height / 2; ++y)
  2354. for (uint32_t x = 0; x < m_width; ++x)
  2355. std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
  2356. return *this;
  2357. }
  2358. // TODO: There are many ways to do this, not sure this is the best way.
  2359. image &renormalize_normal_map()
  2360. {
  2361. for (uint32_t y = 0; y < m_height; y++)
  2362. {
  2363. for (uint32_t x = 0; x < m_width; x++)
  2364. {
  2365. color_rgba &c = (*this)(x, y);
  2366. if ((c.r == 128) && (c.g == 128) && (c.b == 128))
  2367. continue;
  2368. vec3F v(c.r, c.g, c.b);
  2369. v = (v * (2.0f / 255.0f)) - vec3F(1.0f);
  2370. v.clamp(-1.0f, 1.0f);
  2371. float length = v.length();
  2372. const float cValidThresh = .077f;
  2373. if (length < cValidThresh)
  2374. {
  2375. c.set(128, 128, 128, c.a);
  2376. }
  2377. else if (fabs(length - 1.0f) > cValidThresh)
  2378. {
  2379. if (length)
  2380. v /= length;
  2381. for (uint32_t i = 0; i < 3; i++)
  2382. c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f));
  2383. if ((c.g == 128) && (c.r == 128))
  2384. {
  2385. if (c.b < 128)
  2386. c.b = 0;
  2387. else
  2388. c.b = 255;
  2389. }
  2390. }
  2391. }
  2392. }
  2393. return *this;
  2394. }
  2395. void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...);
  2396. private:
  2397. uint32_t m_width, m_height, m_pitch; // all in pixels
  2398. color_rgba_vec m_pixels;
  2399. };
  2400. // Float images
  2401. typedef basisu::vector<vec4F> vec4F_vec;
  2402. class imagef
  2403. {
  2404. public:
  2405. imagef() :
  2406. m_width(0), m_height(0), m_pitch(0)
  2407. {
  2408. }
  2409. imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
  2410. m_width(0), m_height(0), m_pitch(0)
  2411. {
  2412. resize(w, h, p);
  2413. }
  2414. imagef(const imagef &other) :
  2415. m_width(0), m_height(0), m_pitch(0)
  2416. {
  2417. *this = other;
  2418. }
  2419. imagef &swap(imagef &other)
  2420. {
  2421. std::swap(m_width, other.m_width);
  2422. std::swap(m_height, other.m_height);
  2423. std::swap(m_pitch, other.m_pitch);
  2424. m_pixels.swap(other.m_pixels);
  2425. return *this;
  2426. }
  2427. imagef &operator= (const imagef &rhs)
  2428. {
  2429. if (this != &rhs)
  2430. {
  2431. m_width = rhs.m_width;
  2432. m_height = rhs.m_height;
  2433. m_pitch = rhs.m_pitch;
  2434. m_pixels = rhs.m_pixels;
  2435. }
  2436. return *this;
  2437. }
  2438. imagef &clear()
  2439. {
  2440. m_width = 0;
  2441. m_height = 0;
  2442. m_pitch = 0;
  2443. clear_vector(m_pixels);
  2444. return *this;
  2445. }
  2446. imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0))
  2447. {
  2448. const uint32_t width = src.get_width();
  2449. const uint32_t height = src.get_height();
  2450. resize(width, height);
  2451. for (int y = 0; y < (int)height; y++)
  2452. {
  2453. for (uint32_t x = 0; x < width; x++)
  2454. {
  2455. const color_rgba &src_pixel = src(x, y);
  2456. (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]);
  2457. }
  2458. }
  2459. return *this;
  2460. }
  2461. imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
  2462. {
  2463. return resize(other.get_width(), other.get_height(), p, background);
  2464. }
  2465. imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
  2466. {
  2467. return crop(w, h, p, background);
  2468. }
  2469. imagef &set_all(const vec4F &c)
  2470. {
  2471. for (uint32_t i = 0; i < m_pixels.size(); i++)
  2472. m_pixels[i] = c;
  2473. return *this;
  2474. }
  2475. imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c)
  2476. {
  2477. for (uint32_t iy = 0; iy < h; iy++)
  2478. for (uint32_t ix = 0; ix < w; ix++)
  2479. set_clipped(x + ix, y + iy, c);
  2480. return *this;
  2481. }
  2482. imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1))
  2483. {
  2484. if (p == UINT32_MAX)
  2485. p = w;
  2486. if ((w == m_width) && (m_height == h) && (m_pitch == p))
  2487. return *this;
  2488. if ((!w) || (!h) || (!p))
  2489. {
  2490. clear();
  2491. return *this;
  2492. }
  2493. vec4F_vec cur_state;
  2494. cur_state.swap(m_pixels);
  2495. m_pixels.resize(p * h);
  2496. for (uint32_t y = 0; y < h; y++)
  2497. {
  2498. for (uint32_t x = 0; x < w; x++)
  2499. {
  2500. if ((x < m_width) && (y < m_height))
  2501. m_pixels[x + y * p] = cur_state[x + y * m_pitch];
  2502. else
  2503. m_pixels[x + y * p] = background;
  2504. }
  2505. }
  2506. m_width = w;
  2507. m_height = h;
  2508. m_pitch = p;
  2509. return *this;
  2510. }
  2511. inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2512. inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2513. inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2514. inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2515. inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
  2516. {
  2517. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2518. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2519. return m_pixels[x + y * m_pitch];
  2520. }
  2521. inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
  2522. {
  2523. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2524. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2525. return m_pixels[x + y * m_pitch];
  2526. }
  2527. inline imagef &set_clipped(int x, int y, const vec4F &c)
  2528. {
  2529. if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
  2530. (*this)(x, y) = c;
  2531. return *this;
  2532. }
  2533. // Very straightforward blit with full clipping. Not fast, but it works.
  2534. imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
  2535. {
  2536. for (int y = 0; y < src_h; y++)
  2537. {
  2538. const int sy = src_y + y;
  2539. if (sy < 0)
  2540. continue;
  2541. else if (sy >= (int)src.get_height())
  2542. break;
  2543. for (int x = 0; x < src_w; x++)
  2544. {
  2545. const int sx = src_x + x;
  2546. if (sx < 0)
  2547. continue;
  2548. else if (sx >= (int)src.get_height())
  2549. break;
  2550. set_clipped(dst_x + x, dst_y + y, src(sx, sy));
  2551. }
  2552. }
  2553. return *this;
  2554. }
  2555. const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
  2556. {
  2557. for (uint32_t y = 0; y < h; y++)
  2558. for (uint32_t x = 0; x < w; x++)
  2559. *pDst++ = get_clamped(src_x + x, src_y + y);
  2560. return *this;
  2561. }
  2562. imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
  2563. {
  2564. for (uint32_t y = 0; y < h; y++)
  2565. for (uint32_t x = 0; x < w; x++)
  2566. set_clipped(dst_x + x, dst_y + y, *pSrc++);
  2567. return *this;
  2568. }
  2569. inline uint32_t get_width() const { return m_width; }
  2570. inline uint32_t get_height() const { return m_height; }
  2571. inline uint32_t get_pitch() const { return m_pitch; }
  2572. inline uint32_t get_total_pixels() const { return m_width * m_height; }
  2573. inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
  2574. inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
  2575. inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
  2576. inline const vec4F_vec &get_pixels() const { return m_pixels; }
  2577. inline vec4F_vec &get_pixels() { return m_pixels; }
  2578. inline const vec4F *get_ptr() const { return &m_pixels[0]; }
  2579. inline vec4F *get_ptr() { return &m_pixels[0]; }
  2580. private:
  2581. uint32_t m_width, m_height, m_pitch; // all in pixels
  2582. vec4F_vec m_pixels;
  2583. };
  2584. // Image metrics
  2585. class image_metrics
  2586. {
  2587. public:
  2588. // TODO: Add ssim
  2589. float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
  2590. image_metrics()
  2591. {
  2592. clear();
  2593. }
  2594. void clear()
  2595. {
  2596. m_max = 0;
  2597. m_mean = 0;
  2598. m_mean_squared = 0;
  2599. m_rms = 0;
  2600. m_psnr = 0;
  2601. m_ssim = 0;
  2602. }
  2603. void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); }
  2604. void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
  2605. };
  2606. // Image saving/loading/resampling
  2607. bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr);
  2608. bool load_png(const char* pFilename, image& img);
  2609. inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); }
  2610. bool load_tga(const char* pFilename, image& img);
  2611. inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); }
  2612. bool load_jpg(const char *pFilename, image& img);
  2613. inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }
  2614. // Currently loads .PNG, .TGA, or .JPG
  2615. bool load_image(const char* pFilename, image& img);
  2616. inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }
  2617. uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);
  2618. uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);
  2619. enum
  2620. {
  2621. cImageSaveGrayscale = 1,
  2622. cImageSaveIgnoreAlpha = 2
  2623. };
  2624. bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0);
  2625. inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }
  2626. bool read_file_to_vec(const char* pFilename, uint8_vec& data);
  2627. bool write_data_to_file(const char* pFilename, const void* pData, size_t len);
  2628. inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }
  2629. float linear_to_srgb(float l);
  2630. float srgb_to_linear(float s);
  2631. bool image_resample(const image &src, image &dst, bool srgb = false,
  2632. const char *pFilter = "lanczos4", float filter_scale = 1.0f,
  2633. bool wrapping = false,
  2634. uint32_t first_comp = 0, uint32_t num_comps = 4);
  2635. // Timing
  2636. typedef uint64_t timer_ticks;
  2637. class interval_timer
  2638. {
  2639. public:
  2640. interval_timer();
  2641. void start();
  2642. void stop();
  2643. double get_elapsed_secs() const;
  2644. inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); }
  2645. static void init();
  2646. static inline timer_ticks get_ticks_per_sec() { return g_freq; }
  2647. static timer_ticks get_ticks();
  2648. static double ticks_to_secs(timer_ticks ticks);
  2649. static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; }
  2650. private:
  2651. static timer_ticks g_init_ticks, g_freq;
  2652. static double g_timer_freq;
  2653. timer_ticks m_start_time, m_stop_time;
  2654. bool m_started, m_stopped;
  2655. };
  2656. // 2D array
  2657. template<typename T>
  2658. class vector2D
  2659. {
  2660. typedef basisu::vector<T> TVec;
  2661. uint32_t m_width, m_height;
  2662. TVec m_values;
  2663. public:
  2664. vector2D() :
  2665. m_width(0),
  2666. m_height(0)
  2667. {
  2668. }
  2669. vector2D(uint32_t w, uint32_t h) :
  2670. m_width(0),
  2671. m_height(0)
  2672. {
  2673. resize(w, h);
  2674. }
  2675. vector2D(const vector2D &other)
  2676. {
  2677. *this = other;
  2678. }
  2679. vector2D &operator= (const vector2D &other)
  2680. {
  2681. if (this != &other)
  2682. {
  2683. m_width = other.m_width;
  2684. m_height = other.m_height;
  2685. m_values = other.m_values;
  2686. }
  2687. return *this;
  2688. }
  2689. inline bool operator== (const vector2D &rhs) const
  2690. {
  2691. return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values);
  2692. }
  2693. inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); }
  2694. inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
  2695. inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
  2696. inline const T &operator[] (uint32_t i) const { return m_values[i]; }
  2697. inline T &operator[] (uint32_t i) { return m_values[i]; }
  2698. inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
  2699. inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
  2700. void clear()
  2701. {
  2702. m_width = 0;
  2703. m_height = 0;
  2704. m_values.clear();
  2705. }
  2706. void set_all(const T&val)
  2707. {
  2708. vector_set_all(m_values, val);
  2709. }
  2710. inline const T* get_ptr() const { return &m_values[0]; }
  2711. inline T* get_ptr() { return &m_values[0]; }
  2712. vector2D &resize(uint32_t new_width, uint32_t new_height)
  2713. {
  2714. if ((m_width == new_width) && (m_height == new_height))
  2715. return *this;
  2716. TVec oldVals(new_width * new_height);
  2717. oldVals.swap(m_values);
  2718. const uint32_t w = minimum(m_width, new_width);
  2719. const uint32_t h = minimum(m_height, new_height);
  2720. if ((w) && (h))
  2721. {
  2722. for (uint32_t y = 0; y < h; y++)
  2723. for (uint32_t x = 0; x < w; x++)
  2724. m_values[x + y * new_width] = oldVals[x + y * m_width];
  2725. }
  2726. m_width = new_width;
  2727. m_height = new_height;
  2728. return *this;
  2729. }
  2730. };
  2731. inline FILE *fopen_safe(const char *pFilename, const char *pMode)
  2732. {
  2733. #ifdef _WIN32
  2734. FILE *pFile = nullptr;
  2735. fopen_s(&pFile, pFilename, pMode);
  2736. return pFile;
  2737. #else
  2738. return fopen(pFilename, pMode);
  2739. #endif
  2740. }
  2741. void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1);
  2742. const uint32_t cPixelBlockWidth = 4;
  2743. const uint32_t cPixelBlockHeight = 4;
  2744. const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight;
  2745. struct pixel_block
  2746. {
  2747. color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
  2748. inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
  2749. inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
  2750. inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; }
  2751. inline color_rgba* get_ptr() { return &m_pixels[0][0]; }
  2752. inline void clear() { clear_obj(*this); }
  2753. inline bool operator== (const pixel_block& rhs) const
  2754. {
  2755. return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
  2756. }
  2757. };
  2758. typedef basisu::vector<pixel_block> pixel_block_vec;
  2759. } // namespace basisu