basisu_enc.h 73 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805
  1. // basisu_enc.h
  2. // Copyright (C) 2019 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. #pragma once
  16. #include "transcoder/basisu.h"
  17. #include "transcoder/basisu_transcoder_internal.h"
  18. #include <mutex>
  19. #include <atomic>
  20. #include <condition_variable>
  21. #include <functional>
  22. #include <thread>
  23. #include <unordered_map>
  24. #ifndef _WIN32
  25. #include <libgen.h>
  26. #endif
  27. namespace basisu
  28. {
  29. extern uint8_t g_hamming_dist[256];
  30. // Encoder library initialization
  31. void basisu_encoder_init();
  32. void error_printf(const char *pFmt, ...);
  33. // Helpers
  34. inline uint8_t clamp255(int32_t i)
  35. {
  36. return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
  37. }
  38. // Hashing
  39. inline uint32_t bitmix32c(uint32_t v)
  40. {
  41. v = (v + 0x7ed55d16) + (v << 12);
  42. v = (v ^ 0xc761c23c) ^ (v >> 19);
  43. v = (v + 0x165667b1) + (v << 5);
  44. v = (v + 0xd3a2646c) ^ (v << 9);
  45. v = (v + 0xfd7046c5) + (v << 3);
  46. v = (v ^ 0xb55a4f09) ^ (v >> 16);
  47. return v;
  48. }
  49. inline uint32_t bitmix32(uint32_t v)
  50. {
  51. v -= (v << 6);
  52. v ^= (v >> 17);
  53. v -= (v << 9);
  54. v ^= (v << 4);
  55. v -= (v << 3);
  56. v ^= (v << 10);
  57. v ^= (v >> 15);
  58. return v;
  59. }
  60. uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);
  61. template <typename Key>
  62. struct bit_hasher
  63. {
  64. std::size_t operator()(const Key& k) const
  65. {
  66. return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k));
  67. }
  68. };
  69. // Linear algebra
  70. template <uint32_t N, typename T>
  71. class vec
  72. {
  73. protected:
  74. T m_v[N];
  75. public:
  76. enum { num_elements = N };
  77. inline vec() { }
  78. inline vec(eZero) { set_zero(); }
  79. explicit inline vec(T val) { set(val); }
  80. inline vec(T v0, T v1) { set(v0, v1); }
  81. inline vec(T v0, T v1, T v2) { set(v0, v1, v2); }
  82. inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); }
  83. inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; }
  84. template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); }
  85. inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; }
  86. inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; }
  87. inline T getX() const { return m_v[0]; }
  88. inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; }
  89. inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; }
  90. inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; }
  91. inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; }
  92. inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
  93. inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
  94. template <uint32_t OtherN, typename OtherT>
  95. inline vec &set(const vec<OtherN, OtherT> &other)
  96. {
  97. uint32_t i;
  98. if (static_cast<void *>(&other) == static_cast<void *>(this))
  99. return *this;
  100. const uint32_t m = minimum(OtherN, N);
  101. for (i = 0; i < m; i++)
  102. m_v[i] = static_cast<T>(other[i]);
  103. for (; i < N; i++)
  104. m_v[i] = 0;
  105. return *this;
  106. }
  107. inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; }
  108. inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; }
  109. inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; }
  110. inline vec &set(T v0, T v1)
  111. {
  112. m_v[0] = v0;
  113. if (N >= 2)
  114. {
  115. m_v[1] = v1;
  116. clear_elements(2, N);
  117. }
  118. return *this;
  119. }
  120. inline vec &set(T v0, T v1, T v2)
  121. {
  122. m_v[0] = v0;
  123. if (N >= 2)
  124. {
  125. m_v[1] = v1;
  126. if (N >= 3)
  127. {
  128. m_v[2] = v2;
  129. clear_elements(3, N);
  130. }
  131. }
  132. return *this;
  133. }
  134. inline vec &set(T v0, T v1, T v2, T v3)
  135. {
  136. m_v[0] = v0;
  137. if (N >= 2)
  138. {
  139. m_v[1] = v1;
  140. if (N >= 3)
  141. {
  142. m_v[2] = v2;
  143. if (N >= 4)
  144. {
  145. m_v[3] = v3;
  146. clear_elements(5, N);
  147. }
  148. }
  149. }
  150. return *this;
  151. }
  152. inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; }
  153. template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; }
  154. inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); }
  155. inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); }
  156. inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; }
  157. inline vec operator+ () const { return *this; }
  158. inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; }
  159. inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; }
  160. inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; }
  161. inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; }
  162. inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; }
  163. inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; }
  164. friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; }
  165. friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; }
  166. friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; }
  167. friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; }
  168. friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; }
  169. friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; }
  170. static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; }
  171. inline T dot(const vec &rhs) const { return dot_product(*this, rhs); }
  172. inline T norm() const { return dot_product(*this, *this); }
  173. inline T length() const { return sqrt(norm()); }
  174. inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; }
  175. inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; }
  176. inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }
  177. inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }
  178. inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
  179. inline vec &clamp(T l, T h)
  180. {
  181. for (uint32_t i = 0; i < N; i++)
  182. m_v[i] = basisu::clamp(m_v[i], l, h);
  183. return *this;
  184. }
  185. static vec component_min(const vec& a, const vec& b)
  186. {
  187. vec res;
  188. for (uint32_t i = 0; i < N; i++)
  189. res[i] = minimum(a[i], b[i]);
  190. return res;
  191. }
  192. static vec component_max(const vec& a, const vec& b)
  193. {
  194. vec res;
  195. for (uint32_t i = 0; i < N; i++)
  196. res[i] = maximum(a[i], b[i]);
  197. return res;
  198. }
  199. };
  200. typedef vec<4, double> vec4D;
  201. typedef vec<3, double> vec3D;
  202. typedef vec<2, double> vec2D;
  203. typedef vec<1, double> vec1D;
  204. typedef vec<4, float> vec4F;
  205. typedef vec<3, float> vec3F;
  206. typedef vec<2, float> vec2F;
  207. typedef vec<1, float> vec1F;
  208. template <uint32_t Rows, uint32_t Cols, typename T>
  209. class matrix
  210. {
  211. public:
  212. typedef vec<Rows, T> col_vec;
  213. typedef vec<Cols, T> row_vec;
  214. typedef T scalar_type;
  215. enum { rows = Rows, cols = Cols };
  216. protected:
  217. row_vec m_r[Rows];
  218. public:
  219. inline matrix() {}
  220. inline matrix(eZero) { set_zero(); }
  221. inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; }
  222. inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; }
  223. inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
  224. inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
  225. inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; }
  226. inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; }
  227. inline matrix &set_zero()
  228. {
  229. for (uint32_t i = 0; i < Rows; i++)
  230. m_r[i].set_zero();
  231. return *this;
  232. }
  233. inline matrix &set_identity()
  234. {
  235. for (uint32_t i = 0; i < Rows; i++)
  236. {
  237. m_r[i].set_zero();
  238. if (i < Cols)
  239. m_r[i][i] = 1.0f;
  240. }
  241. return *this;
  242. }
  243. };
  244. template<uint32_t N, typename VectorType>
  245. inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix)
  246. {
  247. VectorType axis;
  248. if (N == 1)
  249. axis.set(1.0f);
  250. else
  251. {
  252. for (uint32_t i = 0; i < N; i++)
  253. axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1)));
  254. }
  255. VectorType prev_axis(axis);
  256. // Power iterations
  257. for (uint32_t power_iter = 0; power_iter < 8; power_iter++)
  258. {
  259. VectorType trial_axis;
  260. double max_sum = 0;
  261. for (uint32_t i = 0; i < N; i++)
  262. {
  263. double sum = 0;
  264. for (uint32_t j = 0; j < N; j++)
  265. sum += cmatrix[i][j] * axis[j];
  266. trial_axis[i] = static_cast<float>(sum);
  267. max_sum = maximum(fabs(sum), max_sum);
  268. }
  269. if (max_sum != 0.0f)
  270. trial_axis *= static_cast<float>(1.0f / max_sum);
  271. VectorType delta_axis(prev_axis - trial_axis);
  272. prev_axis = axis;
  273. axis = trial_axis;
  274. if (delta_axis.norm() < .0024f)
  275. break;
  276. }
  277. return axis.normalize_in_place();
  278. }
  279. template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys)
  280. {
  281. for (uint32_t i = 0; i < num_indices; i++)
  282. pIndices[i] = i;
  283. std::sort(
  284. pIndices,
  285. pIndices + num_indices,
  286. [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; }
  287. );
  288. }
  289. // Very simple job pool with no dependencies.
  290. class job_pool
  291. {
  292. BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool);
  293. public:
  294. job_pool(uint32_t num_threads);
  295. ~job_pool();
  296. void add_job(const std::function<void()>& job);
  297. void add_job(std::function<void()>&& job);
  298. void wait_for_all();
  299. size_t get_total_threads() const { return 1 + m_threads.size(); }
  300. private:
  301. std::vector<std::thread> m_threads;
  302. std::vector<std::function<void()> > m_queue;
  303. std::mutex m_mutex;
  304. std::condition_variable m_has_work;
  305. std::condition_variable m_no_more_jobs;
  306. uint32_t m_num_active_jobs;
  307. std::atomic<bool> m_kill_flag;
  308. void job_thread(uint32_t index);
  309. };
  310. // Simple 32-bit color class
  311. class color_rgba_i16
  312. {
  313. public:
  314. union
  315. {
  316. int16_t m_comps[4];
  317. struct
  318. {
  319. int16_t r;
  320. int16_t g;
  321. int16_t b;
  322. int16_t a;
  323. };
  324. };
  325. inline color_rgba_i16()
  326. {
  327. static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4");
  328. }
  329. inline color_rgba_i16(int sr, int sg, int sb, int sa)
  330. {
  331. set(sr, sg, sb, sa);
  332. }
  333. inline color_rgba_i16 &set(int sr, int sg, int sb, int sa)
  334. {
  335. m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX);
  336. m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX);
  337. m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX);
  338. m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX);
  339. return *this;
  340. }
  341. };
  342. class color_rgba
  343. {
  344. public:
  345. union
  346. {
  347. uint8_t m_comps[4];
  348. struct
  349. {
  350. uint8_t r;
  351. uint8_t g;
  352. uint8_t b;
  353. uint8_t a;
  354. };
  355. };
  356. inline color_rgba()
  357. {
  358. static_assert(sizeof(*this) == 4, "sizeof(*this) != 4");
  359. }
  360. inline color_rgba(int y)
  361. {
  362. set(y);
  363. }
  364. inline color_rgba(int y, int na)
  365. {
  366. set(y, na);
  367. }
  368. inline color_rgba(int sr, int sg, int sb, int sa)
  369. {
  370. set(sr, sg, sb, sa);
  371. }
  372. inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa)
  373. {
  374. set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa);
  375. }
  376. inline color_rgba& set_noclamp_y(int y)
  377. {
  378. m_comps[0] = (uint8_t)y;
  379. m_comps[1] = (uint8_t)y;
  380. m_comps[2] = (uint8_t)y;
  381. m_comps[3] = (uint8_t)255;
  382. return *this;
  383. }
  384. inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa)
  385. {
  386. m_comps[0] = (uint8_t)sr;
  387. m_comps[1] = (uint8_t)sg;
  388. m_comps[2] = (uint8_t)sb;
  389. m_comps[3] = (uint8_t)sa;
  390. return *this;
  391. }
  392. inline color_rgba &set(int y)
  393. {
  394. m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
  395. m_comps[1] = m_comps[0];
  396. m_comps[2] = m_comps[0];
  397. m_comps[3] = 255;
  398. return *this;
  399. }
  400. inline color_rgba &set(int y, int na)
  401. {
  402. m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
  403. m_comps[1] = m_comps[0];
  404. m_comps[2] = m_comps[0];
  405. m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255));
  406. return *this;
  407. }
  408. inline color_rgba &set(int sr, int sg, int sb, int sa)
  409. {
  410. m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
  411. m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
  412. m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
  413. m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255));
  414. return *this;
  415. }
  416. inline color_rgba &set_rgb(int sr, int sg, int sb)
  417. {
  418. m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
  419. m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
  420. m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
  421. return *this;
  422. }
  423. inline color_rgba &set_rgb(const color_rgba &other)
  424. {
  425. r = other.r;
  426. g = other.g;
  427. b = other.b;
  428. return *this;
  429. }
  430. inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; }
  431. inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; }
  432. inline void clear()
  433. {
  434. m_comps[0] = 0;
  435. m_comps[1] = 0;
  436. m_comps[2] = 0;
  437. m_comps[3] = 0;
  438. }
  439. inline bool operator== (const color_rgba &rhs) const
  440. {
  441. if (m_comps[0] != rhs.m_comps[0]) return false;
  442. if (m_comps[1] != rhs.m_comps[1]) return false;
  443. if (m_comps[2] != rhs.m_comps[2]) return false;
  444. if (m_comps[3] != rhs.m_comps[3]) return false;
  445. return true;
  446. }
  447. inline bool operator!= (const color_rgba &rhs) const
  448. {
  449. return !(*this == rhs);
  450. }
  451. inline bool operator<(const color_rgba &rhs) const
  452. {
  453. for (int i = 0; i < 4; i++)
  454. {
  455. if (m_comps[i] < rhs.m_comps[i])
  456. return true;
  457. else if (m_comps[i] != rhs.m_comps[i])
  458. return false;
  459. }
  460. return false;
  461. }
  462. inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; }
  463. inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; }
  464. inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); }
  465. };
  466. typedef std::vector<color_rgba> color_rgba_vec;
  467. const color_rgba g_black_color(0, 0, 0, 255);
  468. const color_rgba g_white_color(255, 255, 255, 255);
  469. inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1)
  470. {
  471. int dr = r0 - r1, dg = g0 - g1, db = b0 - b1;
  472. return dr * dr + dg * dg + db * db;
  473. }
  474. inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1)
  475. {
  476. int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1;
  477. return dr * dr + dg * dg + db * db + da * da;
  478. }
  479. inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha)
  480. {
  481. if (alpha)
  482. return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a);
  483. else
  484. return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b);
  485. }
  486. // TODO: Allow user to control channel weightings.
  487. inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha)
  488. {
  489. if (perceptual)
  490. {
  491. const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f;
  492. const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f;
  493. const float cr1 = e1.r - l1;
  494. const float cr2 = e2.r - l2;
  495. const float cb1 = e1.b - l1;
  496. const float cb2 = e2.b - l2;
  497. const float dl = l1 - l2;
  498. const float dcr = cr1 - cr2;
  499. const float dcb = cb1 - cb2;
  500. uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb);
  501. if (alpha)
  502. {
  503. int da = static_cast<int>(e1.a) - static_cast<int>(e2.a);
  504. d += static_cast<uint32_t>(128.0f*da*da);
  505. }
  506. return d;
  507. }
  508. else
  509. return color_distance(e1, e2, alpha);
  510. }
  511. // String helpers
  512. inline int string_find_right(const std::string& filename, char c)
  513. {
  514. size_t result = filename.find_last_of(c);
  515. return (result == std::string::npos) ? -1 : (int)result;
  516. }
  517. inline std::string string_get_extension(const std::string &filename)
  518. {
  519. int sep = -1;
  520. #ifdef _WIN32
  521. sep = string_find_right(filename, '\\');
  522. #endif
  523. if (sep < 0)
  524. sep = string_find_right(filename, '/');
  525. int dot = string_find_right(filename, '.');
  526. if (dot <= sep)
  527. return "";
  528. std::string result(filename);
  529. result.erase(0, dot + 1);
  530. return result;
  531. }
  532. inline bool string_remove_extension(std::string &filename)
  533. {
  534. int sep = -1;
  535. #ifdef _WIN32
  536. sep = string_find_right(filename, '\\');
  537. #endif
  538. if (sep < 0)
  539. sep = string_find_right(filename, '/');
  540. int dot = string_find_right(filename, '.');
  541. if ((dot < sep) || (dot < 0))
  542. return false;
  543. filename.resize(dot);
  544. return true;
  545. }
  546. inline std::string string_format(const char* pFmt, ...)
  547. {
  548. char buf[2048];
  549. va_list args;
  550. va_start(args, pFmt);
  551. #ifdef _WIN32
  552. vsprintf_s(buf, sizeof(buf), pFmt, args);
  553. #else
  554. vsnprintf(buf, sizeof(buf), pFmt, args);
  555. #endif
  556. va_end(args);
  557. return std::string(buf);
  558. }
  559. inline std::string string_tolower(const std::string& s)
  560. {
  561. std::string result(s);
  562. for (size_t i = 0; i < result.size(); i++)
  563. result[i] = (char)tolower((int)result[i]);
  564. return result;
  565. }
  566. inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc)
  567. {
  568. assert(pDst && pSrc && dst_len);
  569. if (!dst_len)
  570. return pDst;
  571. const size_t src_len = strlen(pSrc);
  572. const size_t src_len_plus_terminator = src_len + 1;
  573. if (src_len_plus_terminator <= dst_len)
  574. memcpy(pDst, pSrc, src_len_plus_terminator);
  575. else
  576. {
  577. if (dst_len > 1)
  578. memcpy(pDst, pSrc, dst_len - 1);
  579. pDst[dst_len - 1] = '\0';
  580. }
  581. return pDst;
  582. }
  583. inline bool string_ends_with(const std::string& s, char c)
  584. {
  585. return (s.size() != 0) && (s.back() == c);
  586. }
  587. inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt)
  588. {
  589. #ifdef _MSC_VER
  590. char drive_buf[_MAX_DRIVE] = { 0 };
  591. char dir_buf[_MAX_DIR] = { 0 };
  592. char fname_buf[_MAX_FNAME] = { 0 };
  593. char ext_buf[_MAX_EXT] = { 0 };
  594. errno_t error = _splitpath_s(p,
  595. pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0,
  596. pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0,
  597. pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0,
  598. pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0);
  599. if (error != 0)
  600. return false;
  601. if (pDrive) *pDrive = drive_buf;
  602. if (pDir) *pDir = dir_buf;
  603. if (pFilename) *pFilename = fname_buf;
  604. if (pExt) *pExt = ext_buf;
  605. return true;
  606. #else
  607. char dirtmp[1024], nametmp[1024];
  608. strcpy_safe(dirtmp, sizeof(dirtmp), p);
  609. strcpy_safe(nametmp, sizeof(nametmp), p);
  610. if (pDrive)
  611. pDrive->resize(0);
  612. const char *pDirName = dirname(dirtmp);
  613. const char* pBaseName = basename(nametmp);
  614. if ((!pDirName) || (!pBaseName))
  615. return false;
  616. if (pDir)
  617. {
  618. *pDir = pDirName;
  619. if ((pDir->size()) && (pDir->back() != '/'))
  620. *pDir += "/";
  621. }
  622. if (pFilename)
  623. {
  624. *pFilename = pBaseName;
  625. string_remove_extension(*pFilename);
  626. }
  627. if (pExt)
  628. {
  629. *pExt = pBaseName;
  630. *pExt = string_get_extension(*pExt);
  631. if (pExt->size())
  632. *pExt = "." + *pExt;
  633. }
  634. return true;
  635. #endif
  636. }
  637. inline bool is_path_separator(char c)
  638. {
  639. #ifdef _WIN32
  640. return (c == '/') || (c == '\\');
  641. #else
  642. return (c == '/');
  643. #endif
  644. }
  645. inline bool is_drive_separator(char c)
  646. {
  647. #ifdef _WIN32
  648. return (c == ':');
  649. #else
  650. (void)c;
  651. return false;
  652. #endif
  653. }
  654. inline void string_combine_path(std::string &dst, const char *p, const char *q)
  655. {
  656. std::string temp(p);
  657. if (temp.size() && !is_path_separator(q[0]))
  658. {
  659. if (!is_path_separator(temp.back()))
  660. temp.append(1, BASISU_PATH_SEPERATOR_CHAR);
  661. }
  662. temp += q;
  663. dst.swap(temp);
  664. }
  665. inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r)
  666. {
  667. string_combine_path(dst, p, q);
  668. string_combine_path(dst, dst.c_str(), r);
  669. }
  670. inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt)
  671. {
  672. string_combine_path(dst, p, q, r);
  673. if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.'))
  674. dst.append(1, '.');
  675. dst.append(pExt);
  676. }
  677. inline bool string_get_pathname(const char *p, std::string &path)
  678. {
  679. std::string temp_drive, temp_path;
  680. if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL))
  681. return false;
  682. string_combine_path(path, temp_drive.c_str(), temp_path.c_str());
  683. return true;
  684. }
  685. inline bool string_get_filename(const char *p, std::string &filename)
  686. {
  687. std::string temp_ext;
  688. if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext))
  689. return false;
  690. filename += temp_ext;
  691. return true;
  692. }
  693. class rand
  694. {
  695. std::mt19937 m_mt;
  696. public:
  697. rand() { }
  698. rand(uint32_t s) { seed(s); }
  699. void seed(uint32_t s) { m_mt.seed(s); }
  700. // between [l,h]
  701. int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); }
  702. uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); }
  703. bool bit() { return irand(0, 1) == 1; }
  704. uint8_t byte() { return static_cast<uint8_t>(urand32()); }
  705. // between [l,h)
  706. float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); }
  707. float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); }
  708. };
  709. class priority_queue
  710. {
  711. public:
  712. priority_queue() :
  713. m_size(0)
  714. {
  715. }
  716. void clear()
  717. {
  718. m_heap.clear();
  719. m_size = 0;
  720. }
  721. void init(uint32_t max_entries, uint32_t first_index, float first_priority)
  722. {
  723. m_heap.resize(max_entries + 1);
  724. m_heap[1].m_index = first_index;
  725. m_heap[1].m_priority = first_priority;
  726. m_size = 1;
  727. }
  728. inline uint32_t size() const { return m_size; }
  729. inline uint32_t get_top_index() const { return m_heap[1].m_index; }
  730. inline float get_top_priority() const { return m_heap[1].m_priority; }
  731. inline void delete_top()
  732. {
  733. assert(m_size > 0);
  734. m_heap[1] = m_heap[m_size];
  735. m_size--;
  736. if (m_size)
  737. down_heap(1);
  738. }
  739. inline void add_heap(uint32_t index, float priority)
  740. {
  741. m_size++;
  742. uint32_t k = m_size;
  743. if (m_size >= m_heap.size())
  744. m_heap.resize(m_size + 1);
  745. for (;;)
  746. {
  747. uint32_t parent_index = k >> 1;
  748. if ((!parent_index) || (m_heap[parent_index].m_priority > priority))
  749. break;
  750. m_heap[k] = m_heap[parent_index];
  751. k = parent_index;
  752. }
  753. m_heap[k].m_index = index;
  754. m_heap[k].m_priority = priority;
  755. }
  756. private:
  757. struct entry
  758. {
  759. uint32_t m_index;
  760. float m_priority;
  761. };
  762. std::vector<entry> m_heap;
  763. uint32_t m_size;
  764. // Push down entry at index
  765. inline void down_heap(uint32_t heap_index)
  766. {
  767. uint32_t orig_index = m_heap[heap_index].m_index;
  768. const float orig_priority = m_heap[heap_index].m_priority;
  769. uint32_t child_index;
  770. while ((child_index = (heap_index << 1)) <= m_size)
  771. {
  772. if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index;
  773. if (orig_priority > m_heap[child_index].m_priority)
  774. break;
  775. m_heap[heap_index] = m_heap[child_index];
  776. heap_index = child_index;
  777. }
  778. m_heap[heap_index].m_index = orig_index;
  779. m_heap[heap_index].m_priority = orig_priority;
  780. }
  781. };
  782. // Tree structured vector quantization (TSVQ)
  783. template <typename TrainingVectorType>
  784. class tree_vector_quant
  785. {
  786. public:
  787. typedef TrainingVectorType training_vec_type;
  788. typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight;
  789. typedef std::vector< training_vec_with_weight > array_of_weighted_training_vecs;
  790. tree_vector_quant() :
  791. m_next_codebook_index(0)
  792. {
  793. }
  794. void clear()
  795. {
  796. clear_vector(m_training_vecs);
  797. clear_vector(m_nodes);
  798. m_next_codebook_index = 0;
  799. }
  800. void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); }
  801. size_t get_total_training_vecs() const { return m_training_vecs.size(); }
  802. const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; }
  803. array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; }
  804. void retrieve(std::vector< std::vector<uint32_t> > &codebook) const
  805. {
  806. for (uint32_t i = 0; i < m_nodes.size(); i++)
  807. {
  808. const tsvq_node &n = m_nodes[i];
  809. if (!n.is_leaf())
  810. continue;
  811. codebook.resize(codebook.size() + 1);
  812. codebook.back() = n.m_training_vecs;
  813. }
  814. }
  815. void retrieve(std::vector<TrainingVectorType> &codebook) const
  816. {
  817. for (uint32_t i = 0; i < m_nodes.size(); i++)
  818. {
  819. const tsvq_node &n = m_nodes[i];
  820. if (!n.is_leaf())
  821. continue;
  822. codebook.resize(codebook.size() + 1);
  823. codebook.back() = n.m_origin;
  824. }
  825. }
  826. void retrieve(uint32_t max_clusters, std::vector<uint_vec> &codebook) const
  827. {
  828. uint_vec node_stack;
  829. node_stack.reserve(512);
  830. codebook.resize(0);
  831. codebook.reserve(max_clusters);
  832. uint32_t node_index = 0;
  833. while (true)
  834. {
  835. const tsvq_node& cur = m_nodes[node_index];
  836. if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
  837. {
  838. codebook.resize(codebook.size() + 1);
  839. codebook.back() = cur.m_training_vecs;
  840. if (node_stack.empty())
  841. break;
  842. node_index = node_stack.back();
  843. node_stack.pop_back();
  844. continue;
  845. }
  846. node_stack.push_back(cur.m_right_index);
  847. node_index = cur.m_left_index;
  848. }
  849. }
  850. bool generate(uint32_t max_size)
  851. {
  852. if (!m_training_vecs.size())
  853. return false;
  854. m_next_codebook_index = 0;
  855. clear_vector(m_nodes);
  856. m_nodes.reserve(max_size * 2 + 1);
  857. m_nodes.push_back(prepare_root());
  858. priority_queue var_heap;
  859. var_heap.init(max_size, 0, m_nodes[0].m_var);
  860. std::vector<uint32_t> l_children, r_children;
  861. // Now split the worst nodes
  862. l_children.reserve(m_training_vecs.size() + 1);
  863. r_children.reserve(m_training_vecs.size() + 1);
  864. uint32_t total_leaf_nodes = 1;
  865. while ((var_heap.size()) && (total_leaf_nodes < max_size))
  866. {
  867. const uint32_t node_index = var_heap.get_top_index();
  868. const tsvq_node &node = m_nodes[node_index];
  869. assert(node.m_var == var_heap.get_top_priority());
  870. assert(node.is_leaf());
  871. var_heap.delete_top();
  872. if (node.m_training_vecs.size() > 1)
  873. {
  874. if (split_node(node_index, var_heap, l_children, r_children))
  875. {
  876. // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total.
  877. total_leaf_nodes += 1;
  878. }
  879. }
  880. }
  881. return true;
  882. }
  883. private:
  884. class tsvq_node
  885. {
  886. public:
  887. inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { }
  888. // vecs is erased
  889. inline void set(const TrainingVectorType &org, uint64_t weight, float var, std::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); }
  890. inline bool is_leaf() const { return m_left_index < 0; }
  891. float m_var;
  892. uint64_t m_weight;
  893. TrainingVectorType m_origin;
  894. int32_t m_left_index, m_right_index;
  895. std::vector<uint32_t> m_training_vecs;
  896. int m_codebook_index;
  897. };
  898. typedef std::vector<tsvq_node> tsvq_node_vec;
  899. tsvq_node_vec m_nodes;
  900. array_of_weighted_training_vecs m_training_vecs;
  901. uint32_t m_next_codebook_index;
  902. tsvq_node prepare_root() const
  903. {
  904. double ttsum = 0.0f;
  905. // Prepare root node containing all training vectors
  906. tsvq_node root;
  907. root.m_training_vecs.reserve(m_training_vecs.size());
  908. for (uint32_t i = 0; i < m_training_vecs.size(); i++)
  909. {
  910. const TrainingVectorType &v = m_training_vecs[i].first;
  911. const uint64_t weight = m_training_vecs[i].second;
  912. root.m_training_vecs.push_back(i);
  913. root.m_origin += (v * static_cast<float>(weight));
  914. root.m_weight += weight;
  915. ttsum += v.dot(v) * weight;
  916. }
  917. root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight));
  918. root.m_origin *= (1.0f / root.m_weight);
  919. return root;
  920. }
  921. bool split_node(uint32_t node_index, priority_queue &var_heap, std::vector<uint32_t> &l_children, std::vector<uint32_t> &r_children)
  922. {
  923. TrainingVectorType l_child_org, r_child_org;
  924. uint64_t l_weight = 0, r_weight = 0;
  925. float l_var = 0.0f, r_var = 0.0f;
  926. // Compute initial left/right child origins
  927. if (!prep_split(m_nodes[node_index], l_child_org, r_child_org))
  928. return false;
  929. // Use k-means iterations to refine these children vectors
  930. if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children))
  931. return false;
  932. // Create children
  933. const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1;
  934. m_nodes[node_index].m_left_index = l_child_index;
  935. m_nodes[node_index].m_right_index = r_child_index;
  936. m_nodes[node_index].m_codebook_index = m_next_codebook_index;
  937. m_next_codebook_index++;
  938. m_nodes.resize(m_nodes.size() + 2);
  939. tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index];
  940. l_child.set(l_child_org, l_weight, l_var, l_children);
  941. r_child.set(r_child_org, r_weight, r_var, r_children);
  942. if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1))
  943. {
  944. TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first);
  945. for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++)
  946. {
  947. if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first))
  948. {
  949. l_child.m_var = 1e-4f;
  950. break;
  951. }
  952. }
  953. }
  954. if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1))
  955. {
  956. TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first);
  957. for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++)
  958. {
  959. if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first))
  960. {
  961. r_child.m_var = 1e-4f;
  962. break;
  963. }
  964. }
  965. }
  966. if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1))
  967. var_heap.add_heap(l_child_index, l_var);
  968. if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1))
  969. var_heap.add_heap(r_child_index, r_var);
  970. return true;
  971. }
  972. TrainingVectorType compute_split_axis(const tsvq_node &node) const
  973. {
  974. const uint32_t N = TrainingVectorType::num_elements;
  975. matrix<N, N, float> cmatrix(cZero);
  976. // Compute covariance matrix from weighted input vectors
  977. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  978. {
  979. const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin);
  980. const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v);
  981. for (uint32_t x = 0; x < N; x++)
  982. for (uint32_t y = x; y < N; y++)
  983. cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y];
  984. }
  985. const float renorm_scale = 1.0f / node.m_weight;
  986. for (uint32_t x = 0; x < N; x++)
  987. for (uint32_t y = x; y < N; y++)
  988. cmatrix[x][y] *= renorm_scale;
  989. // Diagonal flip
  990. for (uint32_t x = 0; x < (N - 1); x++)
  991. for (uint32_t y = x + 1; y < N; y++)
  992. cmatrix[y][x] = cmatrix[x][y];
  993. return compute_pca_from_covar<N, TrainingVectorType>(cmatrix);
  994. }
  995. bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const
  996. {
  997. const uint32_t N = TrainingVectorType::num_elements;
  998. if (2 == node.m_training_vecs.size())
  999. {
  1000. l_child_result = m_training_vecs[node.m_training_vecs[0]].first;
  1001. r_child_result = m_training_vecs[node.m_training_vecs[1]].first;
  1002. return true;
  1003. }
  1004. TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f);
  1005. double l_weight = 0.0f, r_weight = 0.0f;
  1006. // Compute initial left/right children
  1007. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1008. {
  1009. const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
  1010. const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
  1011. double t = (v - node.m_origin).dot(axis);
  1012. if (t >= 0.0f)
  1013. {
  1014. r_child += v * weight;
  1015. r_weight += weight;
  1016. }
  1017. else
  1018. {
  1019. l_child += v * weight;
  1020. l_weight += weight;
  1021. }
  1022. }
  1023. if ((l_weight > 0.0f) && (r_weight > 0.0f))
  1024. {
  1025. l_child_result = l_child * static_cast<float>(1.0f / l_weight);
  1026. r_child_result = r_child * static_cast<float>(1.0f / r_weight);
  1027. }
  1028. else
  1029. {
  1030. TrainingVectorType l(1e+20f);
  1031. TrainingVectorType h(-1e+20f);
  1032. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1033. {
  1034. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1035. l = TrainingVectorType::component_min(l, v);
  1036. h = TrainingVectorType::component_max(h, v);
  1037. }
  1038. TrainingVectorType r(h - l);
  1039. float largest_axis_v = 0.0f;
  1040. int largest_axis_index = -1;
  1041. for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++)
  1042. {
  1043. if (r[i] > largest_axis_v)
  1044. {
  1045. largest_axis_v = r[i];
  1046. largest_axis_index = i;
  1047. }
  1048. }
  1049. if (largest_axis_index < 0)
  1050. return false;
  1051. std::vector<float> keys(node.m_training_vecs.size());
  1052. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1053. keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index];
  1054. uint_vec indices(node.m_training_vecs.size());
  1055. indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]);
  1056. l_child.set_zero();
  1057. l_weight = 0;
  1058. r_child.set_zero();
  1059. r_weight = 0;
  1060. const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2;
  1061. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1062. {
  1063. const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
  1064. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1065. if (i < half_index)
  1066. {
  1067. l_child += v * weight;
  1068. l_weight += weight;
  1069. }
  1070. else
  1071. {
  1072. r_child += v * weight;
  1073. r_weight += weight;
  1074. }
  1075. }
  1076. if ((l_weight > 0.0f) && (r_weight > 0.0f))
  1077. {
  1078. l_child_result = l_child * static_cast<float>(1.0f / l_weight);
  1079. r_child_result = r_child * static_cast<float>(1.0f / r_weight);
  1080. }
  1081. else
  1082. {
  1083. l_child_result = l;
  1084. r_child_result = h;
  1085. }
  1086. }
  1087. return true;
  1088. }
  1089. bool refine_split(const tsvq_node &node,
  1090. TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, std::vector<uint32_t> &l_children,
  1091. TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, std::vector<uint32_t> &r_children) const
  1092. {
  1093. l_children.reserve(node.m_training_vecs.size());
  1094. r_children.reserve(node.m_training_vecs.size());
  1095. float prev_total_variance = 1e+10f;
  1096. // Refine left/right children locations using k-means iterations
  1097. const uint32_t cMaxIters = 6;
  1098. for (uint32_t iter = 0; iter < cMaxIters; iter++)
  1099. {
  1100. l_children.resize(0);
  1101. r_children.resize(0);
  1102. TrainingVectorType new_l_child(cZero), new_r_child(cZero);
  1103. double l_ttsum = 0.0f, r_ttsum = 0.0f;
  1104. l_weight = 0;
  1105. r_weight = 0;
  1106. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1107. {
  1108. const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
  1109. const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
  1110. double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v);
  1111. if (left_dist2 >= right_dist2)
  1112. {
  1113. new_r_child += (v * static_cast<float>(weight));
  1114. r_weight += weight;
  1115. r_ttsum += weight * v.dot(v);
  1116. r_children.push_back(node.m_training_vecs[i]);
  1117. }
  1118. else
  1119. {
  1120. new_l_child += (v * static_cast<float>(weight));
  1121. l_weight += weight;
  1122. l_ttsum += weight * v.dot(v);
  1123. l_children.push_back(node.m_training_vecs[i]);
  1124. }
  1125. }
  1126. if ((!l_weight) || (!r_weight))
  1127. {
  1128. TrainingVectorType firstVec;
  1129. for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
  1130. {
  1131. const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
  1132. const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
  1133. if ((!i) || (v == firstVec))
  1134. {
  1135. firstVec = v;
  1136. new_r_child += (v * static_cast<float>(weight));
  1137. r_weight += weight;
  1138. r_ttsum += weight * v.dot(v);
  1139. r_children.push_back(node.m_training_vecs[i]);
  1140. }
  1141. else
  1142. {
  1143. new_l_child += (v * static_cast<float>(weight));
  1144. l_weight += weight;
  1145. l_ttsum += weight * v.dot(v);
  1146. l_children.push_back(node.m_training_vecs[i]);
  1147. }
  1148. }
  1149. if (!l_weight)
  1150. return false;
  1151. }
  1152. l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight));
  1153. r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight));
  1154. new_l_child *= (1.0f / l_weight);
  1155. new_r_child *= (1.0f / r_weight);
  1156. l_child = new_l_child;
  1157. r_child = new_r_child;
  1158. float total_var = l_var + r_var;
  1159. const float cGiveupVariance = .00001f;
  1160. if (total_var < cGiveupVariance)
  1161. break;
  1162. // Check to see if the variance has settled
  1163. const float cVarianceDeltaThresh = .00125f;
  1164. if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh)
  1165. break;
  1166. prev_total_variance = total_var;
  1167. }
  1168. return true;
  1169. }
  1170. };
  1171. struct weighted_block_group
  1172. {
  1173. uint64_t m_total_weight;
  1174. uint_vec m_indices;
  1175. };
  1176. template<typename Quantizer>
  1177. bool generate_hierarchical_codebook_threaded_internal(Quantizer& q,
  1178. uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
  1179. std::vector<uint_vec>& codebook,
  1180. std::vector<uint_vec>& parent_codebook,
  1181. uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool)
  1182. {
  1183. codebook.resize(0);
  1184. parent_codebook.resize(0);
  1185. if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16))
  1186. {
  1187. if (!q.generate(max_codebook_size))
  1188. return false;
  1189. q.retrieve(codebook);
  1190. if (max_parent_codebook_size)
  1191. q.retrieve(max_parent_codebook_size, parent_codebook);
  1192. return true;
  1193. }
  1194. const uint32_t cMaxThreads = 16;
  1195. if (max_threads > cMaxThreads)
  1196. max_threads = cMaxThreads;
  1197. if (!q.generate(max_threads))
  1198. return false;
  1199. std::vector<uint_vec> initial_codebook;
  1200. q.retrieve(initial_codebook);
  1201. if (initial_codebook.size() < max_threads)
  1202. {
  1203. codebook = initial_codebook;
  1204. if (max_parent_codebook_size)
  1205. q.retrieve(max_parent_codebook_size, parent_codebook);
  1206. return true;
  1207. }
  1208. Quantizer quantizers[cMaxThreads];
  1209. bool success_flags[cMaxThreads];
  1210. clear_obj(success_flags);
  1211. std::vector<uint_vec> local_clusters[cMaxThreads];
  1212. std::vector<uint_vec> local_parent_clusters[cMaxThreads];
  1213. for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
  1214. {
  1215. pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] {
  1216. Quantizer& lq = quantizers[thread_iter];
  1217. uint_vec& cluster_indices = initial_codebook[thread_iter];
  1218. uint_vec local_to_global(cluster_indices.size());
  1219. for (uint32_t i = 0; i < cluster_indices.size(); i++)
  1220. {
  1221. const uint32_t global_training_vec_index = cluster_indices[i];
  1222. local_to_global[i] = global_training_vec_index;
  1223. lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second);
  1224. }
  1225. const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs();
  1226. success_flags[thread_iter] = lq.generate(max_clusters);
  1227. if (success_flags[thread_iter])
  1228. {
  1229. lq.retrieve(local_clusters[thread_iter]);
  1230. for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++)
  1231. {
  1232. for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++)
  1233. local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]];
  1234. }
  1235. if (max_parent_codebook_size)
  1236. {
  1237. lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]);
  1238. for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++)
  1239. {
  1240. for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++)
  1241. local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]];
  1242. }
  1243. }
  1244. }
  1245. } );
  1246. } // thread_iter
  1247. pJob_pool->wait_for_all();
  1248. uint32_t total_clusters = 0, total_parent_clusters = 0;
  1249. for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++)
  1250. {
  1251. if (!success_flags[thread_iter])
  1252. return false;
  1253. total_clusters += (uint32_t)local_clusters[thread_iter].size();
  1254. total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size();
  1255. }
  1256. codebook.reserve(total_clusters);
  1257. parent_codebook.reserve(total_parent_clusters);
  1258. for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
  1259. {
  1260. for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++)
  1261. {
  1262. codebook.resize(codebook.size() + 1);
  1263. codebook.back().swap(local_clusters[thread_iter][j]);
  1264. }
  1265. for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++)
  1266. {
  1267. parent_codebook.resize(parent_codebook.size() + 1);
  1268. parent_codebook.back().swap(local_parent_clusters[thread_iter][j]);
  1269. }
  1270. }
  1271. return true;
  1272. }
  1273. template<typename Quantizer>
  1274. bool generate_hierarchical_codebook_threaded(Quantizer& q,
  1275. uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
  1276. std::vector<uint_vec>& codebook,
  1277. std::vector<uint_vec>& parent_codebook,
  1278. uint32_t max_threads, job_pool *pJob_pool)
  1279. {
  1280. typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher;
  1281. typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group,
  1282. training_vec_bit_hasher> group_hash;
  1283. group_hash unique_vecs;
  1284. weighted_block_group g;
  1285. g.m_indices.resize(1);
  1286. for (uint32_t i = 0; i < q.get_training_vecs().size(); i++)
  1287. {
  1288. g.m_total_weight = q.get_training_vecs()[i].second;
  1289. g.m_indices[0] = i;
  1290. auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
  1291. if (!ins_res.second)
  1292. {
  1293. (ins_res.first)->second.m_total_weight += g.m_total_weight;
  1294. (ins_res.first)->second.m_indices.push_back(i);
  1295. }
  1296. }
  1297. debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size());
  1298. Quantizer group_quant;
  1299. typedef typename group_hash::const_iterator group_hash_const_iter;
  1300. std::vector<group_hash_const_iter> unique_vec_iters;
  1301. unique_vec_iters.reserve(unique_vecs.size());
  1302. for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter)
  1303. {
  1304. group_quant.add_training_vec(iter->first, iter->second.m_total_weight);
  1305. unique_vec_iters.push_back(iter);
  1306. }
  1307. bool limit_clusterizers = true;
  1308. if (unique_vecs.size() <= max_codebook_size)
  1309. limit_clusterizers = false;
  1310. debug_printf("Limit clusterizers: %u\n", limit_clusterizers);
  1311. std::vector<uint_vec> group_codebook, group_parent_codebook;
  1312. bool status = generate_hierarchical_codebook_threaded_internal(group_quant,
  1313. max_codebook_size, max_parent_codebook_size,
  1314. group_codebook,
  1315. group_parent_codebook,
  1316. (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool);
  1317. if (!status)
  1318. return false;
  1319. codebook.resize(0);
  1320. for (uint32_t i = 0; i < group_codebook.size(); i++)
  1321. {
  1322. codebook.resize(codebook.size() + 1);
  1323. for (uint32_t j = 0; j < group_codebook[i].size(); j++)
  1324. {
  1325. const uint32_t group_index = group_codebook[i][j];
  1326. typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
  1327. const uint_vec& training_vec_indices = group_iter->second.m_indices;
  1328. append_vector(codebook.back(), training_vec_indices);
  1329. }
  1330. }
  1331. parent_codebook.resize(0);
  1332. for (uint32_t i = 0; i < group_parent_codebook.size(); i++)
  1333. {
  1334. parent_codebook.resize(parent_codebook.size() + 1);
  1335. for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++)
  1336. {
  1337. const uint32_t group_index = group_parent_codebook[i][j];
  1338. typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
  1339. const uint_vec& training_vec_indices = group_iter->second.m_indices;
  1340. append_vector(parent_codebook.back(), training_vec_indices);
  1341. }
  1342. }
  1343. return true;
  1344. }
  1345. // Canonical Huffman coding
  1346. class histogram
  1347. {
  1348. std::vector<uint32_t> m_hist;
  1349. public:
  1350. histogram(uint32_t size = 0) { init(size); }
  1351. void clear()
  1352. {
  1353. clear_vector(m_hist);
  1354. }
  1355. void init(uint32_t size)
  1356. {
  1357. m_hist.resize(0);
  1358. m_hist.resize(size);
  1359. }
  1360. inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); }
  1361. inline const uint32_t &operator[] (uint32_t index) const
  1362. {
  1363. return m_hist[index];
  1364. }
  1365. inline uint32_t &operator[] (uint32_t index)
  1366. {
  1367. return m_hist[index];
  1368. }
  1369. inline void inc(uint32_t index)
  1370. {
  1371. m_hist[index]++;
  1372. }
  1373. uint64_t get_total() const
  1374. {
  1375. uint64_t total = 0;
  1376. for (uint32_t i = 0; i < m_hist.size(); ++i)
  1377. total += m_hist[i];
  1378. return total;
  1379. }
  1380. double get_entropy() const
  1381. {
  1382. double total = static_cast<double>(get_total());
  1383. if (total == 0.0f)
  1384. return 0.0f;
  1385. const double inv_total = 1.0f / total;
  1386. const double neg_inv_log2 = -1.0f / log(2.0f);
  1387. double e = 0.0f;
  1388. for (uint32_t i = 0; i < m_hist.size(); i++)
  1389. if (m_hist[i])
  1390. e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]);
  1391. return e;
  1392. }
  1393. };
  1394. struct sym_freq
  1395. {
  1396. uint16_t m_key, m_sym_index;
  1397. };
  1398. sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);
  1399. void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms);
  1400. void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size);
  1401. class huffman_encoding_table
  1402. {
  1403. public:
  1404. huffman_encoding_table()
  1405. {
  1406. }
  1407. void clear()
  1408. {
  1409. clear_vector(m_codes);
  1410. clear_vector(m_code_sizes);
  1411. }
  1412. bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize)
  1413. {
  1414. return init(h.size(), &h[0], max_code_size);
  1415. }
  1416. bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size);
  1417. bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size);
  1418. inline const uint16_vec &get_codes() const { return m_codes; }
  1419. inline const uint8_vec &get_code_sizes() const { return m_code_sizes; }
  1420. uint32_t get_total_used_codes() const
  1421. {
  1422. for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--)
  1423. if (m_code_sizes[i])
  1424. return i + 1;
  1425. return 0;
  1426. }
  1427. private:
  1428. uint16_vec m_codes;
  1429. uint8_vec m_code_sizes;
  1430. };
  1431. class bitwise_coder
  1432. {
  1433. public:
  1434. bitwise_coder() :
  1435. m_bit_buffer(0),
  1436. m_bit_buffer_size(0),
  1437. m_total_bits(0)
  1438. {
  1439. }
  1440. inline void clear()
  1441. {
  1442. clear_vector(m_bytes);
  1443. m_bit_buffer = 0;
  1444. m_bit_buffer_size = 0;
  1445. m_total_bits = 0;
  1446. }
  1447. inline const uint8_vec &get_bytes() const { return m_bytes; }
  1448. inline uint64_t get_total_bits() const { return m_total_bits; }
  1449. inline void clear_total_bits() { m_total_bits = 0; }
  1450. inline void init(uint32_t reserve_size = 1024)
  1451. {
  1452. m_bytes.reserve(reserve_size);
  1453. m_bytes.resize(0);
  1454. m_bit_buffer = 0;
  1455. m_bit_buffer_size = 0;
  1456. m_total_bits = 0;
  1457. }
  1458. inline uint32_t flush()
  1459. {
  1460. if (m_bit_buffer_size)
  1461. {
  1462. m_total_bits += 8;
  1463. append_byte(static_cast<uint8_t>(m_bit_buffer));
  1464. m_bit_buffer = 0;
  1465. m_bit_buffer_size = 0;
  1466. return 8;
  1467. }
  1468. return 0;
  1469. }
  1470. inline uint32_t put_bits(uint32_t bits, uint32_t num_bits)
  1471. {
  1472. assert(num_bits <= 32);
  1473. assert(bits < (1ULL << num_bits));
  1474. if (!num_bits)
  1475. return 0;
  1476. m_total_bits += num_bits;
  1477. uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer;
  1478. m_bit_buffer_size += num_bits;
  1479. while (m_bit_buffer_size >= 8)
  1480. {
  1481. append_byte(static_cast<uint8_t>(v));
  1482. v >>= 8;
  1483. m_bit_buffer_size -= 8;
  1484. }
  1485. m_bit_buffer = static_cast<uint8_t>(v);
  1486. return num_bits;
  1487. }
  1488. inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab)
  1489. {
  1490. uint32_t code = tab.get_codes()[sym];
  1491. uint32_t code_size = tab.get_code_sizes()[sym];
  1492. assert(code_size >= 1);
  1493. put_bits(code, code_size);
  1494. return code_size;
  1495. }
  1496. inline uint32_t put_truncated_binary(uint32_t v, uint32_t n)
  1497. {
  1498. assert((n >= 2) && (v < n));
  1499. uint32_t k = floor_log2i(n);
  1500. uint32_t u = (1 << (k + 1)) - n;
  1501. if (v < u)
  1502. return put_bits(v, k);
  1503. uint32_t x = v + u;
  1504. assert((x >> 1) >= u);
  1505. put_bits(x >> 1, k);
  1506. put_bits(x & 1, 1);
  1507. return k + 1;
  1508. }
  1509. inline uint32_t put_rice(uint32_t v, uint32_t m)
  1510. {
  1511. assert(m);
  1512. const uint64_t start_bits = m_total_bits;
  1513. uint32_t q = v >> m, r = v & ((1 << m) - 1);
  1514. // rice coding sanity check
  1515. assert(q <= 64);
  1516. for (; q > 16; q -= 16)
  1517. put_bits(0xFFFF, 16);
  1518. put_bits((1 << q) - 1, q);
  1519. put_bits(r << 1, m + 1);
  1520. return (uint32_t)(m_total_bits - start_bits);
  1521. }
  1522. inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits)
  1523. {
  1524. assert(chunk_bits);
  1525. const uint32_t chunk_size = 1 << chunk_bits;
  1526. const uint32_t chunk_mask = chunk_size - 1;
  1527. uint32_t total_bits = 0;
  1528. for ( ; ; )
  1529. {
  1530. uint32_t next_v = v >> chunk_bits;
  1531. total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1);
  1532. if (!next_v)
  1533. break;
  1534. v = next_v;
  1535. }
  1536. return total_bits;
  1537. }
  1538. uint32_t emit_huffman_table(const huffman_encoding_table &tab);
  1539. private:
  1540. uint8_vec m_bytes;
  1541. uint32_t m_bit_buffer, m_bit_buffer_size;
  1542. uint64_t m_total_bits;
  1543. void append_byte(uint8_t c)
  1544. {
  1545. m_bytes.resize(m_bytes.size() + 1);
  1546. m_bytes.back() = c;
  1547. }
  1548. static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len);
  1549. static void end_zero_run(uint16_vec &syms, uint32_t &run_size);
  1550. };
  1551. class huff2D
  1552. {
  1553. public:
  1554. huff2D() { }
  1555. huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); }
  1556. inline const histogram &get_histogram() const { return m_histogram; }
  1557. inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; }
  1558. inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group)
  1559. {
  1560. assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1);
  1561. m_bits_per_sym = bits_per_sym;
  1562. m_total_syms_per_group = total_syms_per_group;
  1563. m_cur_sym_bits = 0;
  1564. m_cur_num_syms = 0;
  1565. m_decode_syms_remaining = 0;
  1566. m_next_decoder_group_index = 0;
  1567. m_histogram.init(1 << (bits_per_sym * total_syms_per_group));
  1568. }
  1569. inline void clear()
  1570. {
  1571. m_group_bits.clear();
  1572. m_cur_sym_bits = 0;
  1573. m_cur_num_syms = 0;
  1574. m_decode_syms_remaining = 0;
  1575. m_next_decoder_group_index = 0;
  1576. }
  1577. inline void emit(uint32_t sym)
  1578. {
  1579. m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym));
  1580. m_cur_num_syms++;
  1581. if (m_cur_num_syms == m_total_syms_per_group)
  1582. flush();
  1583. }
  1584. inline void flush()
  1585. {
  1586. if (m_cur_num_syms)
  1587. {
  1588. m_group_bits.push_back(m_cur_sym_bits);
  1589. m_histogram.inc(m_cur_sym_bits);
  1590. m_cur_sym_bits = 0;
  1591. m_cur_num_syms = 0;
  1592. }
  1593. }
  1594. inline bool start_encoding(uint32_t code_size_limit = 16)
  1595. {
  1596. flush();
  1597. if (!m_encoding_table.init(m_histogram, code_size_limit))
  1598. return false;
  1599. m_decode_syms_remaining = 0;
  1600. m_next_decoder_group_index = 0;
  1601. return true;
  1602. }
  1603. inline uint32_t emit_next_sym(bitwise_coder &c)
  1604. {
  1605. uint32_t bits = 0;
  1606. if (!m_decode_syms_remaining)
  1607. {
  1608. bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table);
  1609. m_decode_syms_remaining = m_total_syms_per_group;
  1610. }
  1611. m_decode_syms_remaining--;
  1612. return bits;
  1613. }
  1614. inline void emit_flush()
  1615. {
  1616. m_decode_syms_remaining = 0;
  1617. }
  1618. private:
  1619. uint_vec m_group_bits;
  1620. huffman_encoding_table m_encoding_table;
  1621. histogram m_histogram;
  1622. uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining;
  1623. };
  1624. bool huffman_test(int rand_seed);
  1625. // VQ index reordering
  1626. class palette_index_reorderer
  1627. {
  1628. public:
  1629. palette_index_reorderer()
  1630. {
  1631. }
  1632. void clear()
  1633. {
  1634. clear_vector(m_hist);
  1635. clear_vector(m_total_count_to_picked);
  1636. clear_vector(m_entries_picked);
  1637. clear_vector(m_entries_to_do);
  1638. clear_vector(m_remap_table);
  1639. }
  1640. // returns [0,1] distance of entry i to entry j
  1641. typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx);
  1642. void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  1643. // Table remaps old to new symbol indices
  1644. inline const uint_vec &get_remap_table() const { return m_remap_table; }
  1645. private:
  1646. uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table;
  1647. inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; }
  1648. inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } }
  1649. void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices);
  1650. void find_initial(uint32_t num_syms);
  1651. void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  1652. float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
  1653. };
  1654. // Simple 32-bit 2D image class
  1655. class image
  1656. {
  1657. public:
  1658. image() :
  1659. m_width(0), m_height(0), m_pitch(0)
  1660. {
  1661. }
  1662. image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
  1663. m_width(0), m_height(0), m_pitch(0)
  1664. {
  1665. resize(w, h, p);
  1666. }
  1667. image(const image &other) :
  1668. m_width(0), m_height(0), m_pitch(0)
  1669. {
  1670. *this = other;
  1671. }
  1672. image &swap(image &other)
  1673. {
  1674. std::swap(m_width, other.m_width);
  1675. std::swap(m_height, other.m_height);
  1676. std::swap(m_pitch, other.m_pitch);
  1677. m_pixels.swap(other.m_pixels);
  1678. return *this;
  1679. }
  1680. image &operator= (const image &rhs)
  1681. {
  1682. if (this != &rhs)
  1683. {
  1684. m_width = rhs.m_width;
  1685. m_height = rhs.m_height;
  1686. m_pitch = rhs.m_pitch;
  1687. m_pixels = rhs.m_pixels;
  1688. }
  1689. return *this;
  1690. }
  1691. image &clear()
  1692. {
  1693. m_width = 0;
  1694. m_height = 0;
  1695. m_pitch = 0;
  1696. clear_vector(m_pixels);
  1697. return *this;
  1698. }
  1699. image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color)
  1700. {
  1701. return crop(w, h, p, background);
  1702. }
  1703. image &set_all(const color_rgba &c)
  1704. {
  1705. for (uint32_t i = 0; i < m_pixels.size(); i++)
  1706. m_pixels[i] = c;
  1707. return *this;
  1708. }
  1709. image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c)
  1710. {
  1711. for (uint32_t iy = 0; iy < h; iy++)
  1712. for (uint32_t ix = 0; ix < w; ix++)
  1713. set_clipped(x + ix, y + iy, c);
  1714. return *this;
  1715. }
  1716. image &crop_dup_borders(uint32_t w, uint32_t h)
  1717. {
  1718. const uint32_t orig_w = m_width, orig_h = m_height;
  1719. crop(w, h);
  1720. if (orig_w && orig_h)
  1721. {
  1722. if (m_width > orig_w)
  1723. {
  1724. for (uint32_t x = orig_w; x < m_width; x++)
  1725. for (uint32_t y = 0; y < m_height; y++)
  1726. set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
  1727. }
  1728. if (m_height > orig_h)
  1729. {
  1730. for (uint32_t y = orig_h; y < m_height; y++)
  1731. for (uint32_t x = 0; x < m_width; x++)
  1732. set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
  1733. }
  1734. }
  1735. return *this;
  1736. }
  1737. image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color)
  1738. {
  1739. if (p == UINT32_MAX)
  1740. p = w;
  1741. if ((w == m_width) && (m_height == h) && (m_pitch == p))
  1742. return *this;
  1743. if ((!w) || (!h) || (!p))
  1744. {
  1745. clear();
  1746. return *this;
  1747. }
  1748. color_rgba_vec cur_state;
  1749. cur_state.swap(m_pixels);
  1750. m_pixels.resize(p * h);
  1751. for (uint32_t y = 0; y < h; y++)
  1752. {
  1753. for (uint32_t x = 0; x < w; x++)
  1754. {
  1755. if ((x < m_width) && (y < m_height))
  1756. m_pixels[x + y * p] = cur_state[x + y * m_pitch];
  1757. else
  1758. m_pixels[x + y * p] = background;
  1759. }
  1760. }
  1761. m_width = w;
  1762. m_height = h;
  1763. m_pitch = p;
  1764. return *this;
  1765. }
  1766. inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  1767. inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  1768. inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  1769. inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  1770. inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
  1771. {
  1772. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  1773. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  1774. return m_pixels[x + y * m_pitch];
  1775. }
  1776. inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
  1777. {
  1778. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  1779. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  1780. return m_pixels[x + y * m_pitch];
  1781. }
  1782. inline image &set_clipped(int x, int y, const color_rgba &c)
  1783. {
  1784. if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
  1785. (*this)(x, y) = c;
  1786. return *this;
  1787. }
  1788. // Very straightforward blit with full clipping. Not fast, but it works.
  1789. image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
  1790. {
  1791. for (int y = 0; y < src_h; y++)
  1792. {
  1793. const int sy = src_y + y;
  1794. if (sy < 0)
  1795. continue;
  1796. else if (sy >= (int)src.get_height())
  1797. break;
  1798. for (int x = 0; x < src_w; x++)
  1799. {
  1800. const int sx = src_x + x;
  1801. if (sx < 0)
  1802. continue;
  1803. else if (sx >= (int)src.get_height())
  1804. break;
  1805. set_clipped(dst_x + x, dst_y + y, src(sx, sy));
  1806. }
  1807. }
  1808. return *this;
  1809. }
  1810. const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
  1811. {
  1812. for (uint32_t y = 0; y < h; y++)
  1813. for (uint32_t x = 0; x < w; x++)
  1814. *pDst++ = get_clamped(src_x + x, src_y + y);
  1815. return *this;
  1816. }
  1817. image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
  1818. {
  1819. for (uint32_t y = 0; y < h; y++)
  1820. for (uint32_t x = 0; x < w; x++)
  1821. set_clipped(dst_x + x, dst_y + y, *pSrc++);
  1822. return *this;
  1823. }
  1824. inline uint32_t get_width() const { return m_width; }
  1825. inline uint32_t get_height() const { return m_height; }
  1826. inline uint32_t get_pitch() const { return m_pitch; }
  1827. inline uint32_t get_total_pixels() const { return m_width * m_height; }
  1828. inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
  1829. inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
  1830. inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
  1831. inline const color_rgba_vec &get_pixels() const { return m_pixels; }
  1832. inline color_rgba_vec &get_pixels() { return m_pixels; }
  1833. inline const color_rgba *get_ptr() const { return &m_pixels[0]; }
  1834. inline color_rgba *get_ptr() { return &m_pixels[0]; }
  1835. bool has_alpha() const
  1836. {
  1837. for (uint32_t y = 0; y < m_height; ++y)
  1838. for (uint32_t x = 0; x < m_width; ++x)
  1839. if ((*this)(x, y).a < 255)
  1840. return true;
  1841. return false;
  1842. }
  1843. image &set_alpha(uint8_t a)
  1844. {
  1845. for (uint32_t y = 0; y < m_height; ++y)
  1846. for (uint32_t x = 0; x < m_width; ++x)
  1847. (*this)(x, y).a = a;
  1848. return *this;
  1849. }
  1850. image &flip_y()
  1851. {
  1852. for (uint32_t y = 0; y < m_height / 2; ++y)
  1853. for (uint32_t x = 0; x < m_width; ++x)
  1854. std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
  1855. return *this;
  1856. }
  1857. // TODO: There are many ways to do this, not sure this is the best way.
  1858. image &renormalize_normal_map()
  1859. {
  1860. for (uint32_t y = 0; y < m_height; y++)
  1861. {
  1862. for (uint32_t x = 0; x < m_width; x++)
  1863. {
  1864. color_rgba &c = (*this)(x, y);
  1865. if ((c.r == 128) && (c.g == 128) && (c.b == 128))
  1866. continue;
  1867. vec3F v(c.r, c.g, c.b);
  1868. v = (v * (2.0f / 255.0f)) - vec3F(1.0f);
  1869. v.clamp(-1.0f, 1.0f);
  1870. float length = v.length();
  1871. const float cValidThresh = .077f;
  1872. if (length < cValidThresh)
  1873. {
  1874. c.set(128, 128, 128, c.a);
  1875. }
  1876. else if (fabs(length - 1.0f) > cValidThresh)
  1877. {
  1878. if (length)
  1879. v /= length;
  1880. for (uint32_t i = 0; i < 3; i++)
  1881. c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f));
  1882. if ((c.g == 128) && (c.r == 128))
  1883. {
  1884. if (c.b < 128)
  1885. c.b = 0;
  1886. else
  1887. c.b = 255;
  1888. }
  1889. }
  1890. }
  1891. }
  1892. return *this;
  1893. }
  1894. private:
  1895. uint32_t m_width, m_height, m_pitch; // all in pixels
  1896. color_rgba_vec m_pixels;
  1897. };
  1898. // Float images
  1899. typedef std::vector<vec4F> vec4F_vec;
  1900. class imagef
  1901. {
  1902. public:
  1903. imagef() :
  1904. m_width(0), m_height(0), m_pitch(0)
  1905. {
  1906. }
  1907. imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
  1908. m_width(0), m_height(0), m_pitch(0)
  1909. {
  1910. resize(w, h, p);
  1911. }
  1912. imagef(const imagef &other) :
  1913. m_width(0), m_height(0), m_pitch(0)
  1914. {
  1915. *this = other;
  1916. }
  1917. imagef &swap(imagef &other)
  1918. {
  1919. std::swap(m_width, other.m_width);
  1920. std::swap(m_height, other.m_height);
  1921. std::swap(m_pitch, other.m_pitch);
  1922. m_pixels.swap(other.m_pixels);
  1923. return *this;
  1924. }
  1925. imagef &operator= (const imagef &rhs)
  1926. {
  1927. if (this != &rhs)
  1928. {
  1929. m_width = rhs.m_width;
  1930. m_height = rhs.m_height;
  1931. m_pitch = rhs.m_pitch;
  1932. m_pixels = rhs.m_pixels;
  1933. }
  1934. return *this;
  1935. }
  1936. imagef &clear()
  1937. {
  1938. m_width = 0;
  1939. m_height = 0;
  1940. m_pitch = 0;
  1941. clear_vector(m_pixels);
  1942. return *this;
  1943. }
  1944. imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0))
  1945. {
  1946. const uint32_t width = src.get_width();
  1947. const uint32_t height = src.get_height();
  1948. resize(width, height);
  1949. for (int y = 0; y < (int)height; y++)
  1950. {
  1951. for (uint32_t x = 0; x < width; x++)
  1952. {
  1953. const color_rgba &src_pixel = src(x, y);
  1954. (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]);
  1955. }
  1956. }
  1957. return *this;
  1958. }
  1959. imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
  1960. {
  1961. return resize(other.get_width(), other.get_height(), p, background);
  1962. }
  1963. imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
  1964. {
  1965. return crop(w, h, p, background);
  1966. }
  1967. imagef &set_all(const vec4F &c)
  1968. {
  1969. for (uint32_t i = 0; i < m_pixels.size(); i++)
  1970. m_pixels[i] = c;
  1971. return *this;
  1972. }
  1973. imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c)
  1974. {
  1975. for (uint32_t iy = 0; iy < h; iy++)
  1976. for (uint32_t ix = 0; ix < w; ix++)
  1977. set_clipped(x + ix, y + iy, c);
  1978. return *this;
  1979. }
  1980. imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1))
  1981. {
  1982. if (p == UINT32_MAX)
  1983. p = w;
  1984. if ((w == m_width) && (m_height == h) && (m_pitch == p))
  1985. return *this;
  1986. if ((!w) || (!h) || (!p))
  1987. {
  1988. clear();
  1989. return *this;
  1990. }
  1991. vec4F_vec cur_state;
  1992. cur_state.swap(m_pixels);
  1993. m_pixels.resize(p * h);
  1994. for (uint32_t y = 0; y < h; y++)
  1995. {
  1996. for (uint32_t x = 0; x < w; x++)
  1997. {
  1998. if ((x < m_width) && (y < m_height))
  1999. m_pixels[x + y * p] = cur_state[x + y * m_pitch];
  2000. else
  2001. m_pixels[x + y * p] = background;
  2002. }
  2003. }
  2004. m_width = w;
  2005. m_height = h;
  2006. m_pitch = p;
  2007. return *this;
  2008. }
  2009. inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2010. inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
  2011. inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2012. inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
  2013. inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
  2014. {
  2015. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2016. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2017. return m_pixels[x + y * m_pitch];
  2018. }
  2019. inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
  2020. {
  2021. x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
  2022. y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
  2023. return m_pixels[x + y * m_pitch];
  2024. }
  2025. inline imagef &set_clipped(int x, int y, const vec4F &c)
  2026. {
  2027. if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
  2028. (*this)(x, y) = c;
  2029. return *this;
  2030. }
  2031. // Very straightforward blit with full clipping. Not fast, but it works.
  2032. imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
  2033. {
  2034. for (int y = 0; y < src_h; y++)
  2035. {
  2036. const int sy = src_y + y;
  2037. if (sy < 0)
  2038. continue;
  2039. else if (sy >= (int)src.get_height())
  2040. break;
  2041. for (int x = 0; x < src_w; x++)
  2042. {
  2043. const int sx = src_x + x;
  2044. if (sx < 0)
  2045. continue;
  2046. else if (sx >= (int)src.get_height())
  2047. break;
  2048. set_clipped(dst_x + x, dst_y + y, src(sx, sy));
  2049. }
  2050. }
  2051. return *this;
  2052. }
  2053. const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
  2054. {
  2055. for (uint32_t y = 0; y < h; y++)
  2056. for (uint32_t x = 0; x < w; x++)
  2057. *pDst++ = get_clamped(src_x + x, src_y + y);
  2058. return *this;
  2059. }
  2060. imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
  2061. {
  2062. for (uint32_t y = 0; y < h; y++)
  2063. for (uint32_t x = 0; x < w; x++)
  2064. set_clipped(dst_x + x, dst_y + y, *pSrc++);
  2065. return *this;
  2066. }
  2067. inline uint32_t get_width() const { return m_width; }
  2068. inline uint32_t get_height() const { return m_height; }
  2069. inline uint32_t get_pitch() const { return m_pitch; }
  2070. inline uint32_t get_total_pixels() const { return m_width * m_height; }
  2071. inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
  2072. inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
  2073. inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
  2074. inline const vec4F_vec &get_pixels() const { return m_pixels; }
  2075. inline vec4F_vec &get_pixels() { return m_pixels; }
  2076. inline const vec4F *get_ptr() const { return &m_pixels[0]; }
  2077. inline vec4F *get_ptr() { return &m_pixels[0]; }
  2078. private:
  2079. uint32_t m_width, m_height, m_pitch; // all in pixels
  2080. vec4F_vec m_pixels;
  2081. };
  2082. // Image metrics
  2083. class image_metrics
  2084. {
  2085. public:
  2086. // TODO: Add ssim
  2087. float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
  2088. image_metrics()
  2089. {
  2090. clear();
  2091. }
  2092. void clear()
  2093. {
  2094. m_max = 0;
  2095. m_mean = 0;
  2096. m_mean_squared = 0;
  2097. m_rms = 0;
  2098. m_psnr = 0;
  2099. m_ssim = 0;
  2100. }
  2101. void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); }
  2102. void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
  2103. };
  2104. // Image saving/loading/resampling
  2105. bool load_png(const char* pFilename, image& img);
  2106. inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); }
  2107. enum
  2108. {
  2109. cImageSaveGrayscale = 1,
  2110. cImageSaveIgnoreAlpha = 2
  2111. };
  2112. bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0);
  2113. inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }
  2114. bool read_file_to_vec(const char* pFilename, uint8_vec& data);
  2115. bool write_data_to_file(const char* pFilename, const void* pData, size_t len);
  2116. inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }
  2117. float linear_to_srgb(float l);
  2118. float srgb_to_linear(float s);
  2119. bool image_resample(const image &src, image &dst, bool srgb = false,
  2120. const char *pFilter = "lanczos4", float filter_scale = 1.0f,
  2121. bool wrapping = false,
  2122. uint32_t first_comp = 0, uint32_t num_comps = 4);
  2123. // Timing
  2124. typedef uint64_t timer_ticks;
  2125. class interval_timer
  2126. {
  2127. public:
  2128. interval_timer();
  2129. void start();
  2130. void stop();
  2131. double get_elapsed_secs() const;
  2132. inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); }
  2133. static void init();
  2134. static inline timer_ticks get_ticks_per_sec() { return g_freq; }
  2135. static timer_ticks get_ticks();
  2136. static double ticks_to_secs(timer_ticks ticks);
  2137. static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; }
  2138. private:
  2139. static timer_ticks g_init_ticks, g_freq;
  2140. static double g_timer_freq;
  2141. timer_ticks m_start_time, m_stop_time;
  2142. bool m_started, m_stopped;
  2143. };
  2144. // 2D array
  2145. template<typename T>
  2146. class vector2D
  2147. {
  2148. typedef std::vector<T> TVec;
  2149. uint32_t m_width, m_height;
  2150. TVec m_values;
  2151. public:
  2152. vector2D() :
  2153. m_width(0),
  2154. m_height(0)
  2155. {
  2156. }
  2157. vector2D(uint32_t w, uint32_t h) :
  2158. m_width(0),
  2159. m_height(0)
  2160. {
  2161. resize(w, h);
  2162. }
  2163. vector2D(const vector2D &other)
  2164. {
  2165. *this = other;
  2166. }
  2167. vector2D &operator= (const vector2D &other)
  2168. {
  2169. if (this != &other)
  2170. {
  2171. m_width = other.m_width;
  2172. m_height = other.m_height;
  2173. m_values = other.m_values;
  2174. }
  2175. return *this;
  2176. }
  2177. inline bool operator== (const vector2D &rhs) const
  2178. {
  2179. return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values);
  2180. }
  2181. inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); }
  2182. inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
  2183. inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
  2184. inline const T &operator[] (uint32_t i) const { return m_values[i]; }
  2185. inline T &operator[] (uint32_t i) { return m_values[i]; }
  2186. inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
  2187. inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
  2188. void clear()
  2189. {
  2190. m_width = 0;
  2191. m_height = 0;
  2192. m_values.clear();
  2193. }
  2194. void set_all(const T&val)
  2195. {
  2196. vector_set_all(m_values, val);
  2197. }
  2198. inline const T* get_ptr() const { return &m_values[0]; }
  2199. inline T* get_ptr() { return &m_values[0]; }
  2200. vector2D &resize(uint32_t new_width, uint32_t new_height)
  2201. {
  2202. if ((m_width == new_width) && (m_height == new_height))
  2203. return *this;
  2204. TVec oldVals(new_width * new_height);
  2205. oldVals.swap(m_values);
  2206. const uint32_t w = minimum(m_width, new_width);
  2207. const uint32_t h = minimum(m_height, new_height);
  2208. if ((w) && (h))
  2209. {
  2210. for (uint32_t y = 0; y < h; y++)
  2211. for (uint32_t x = 0; x < w; x++)
  2212. m_values[x + y * new_width] = oldVals[x + y * m_width];
  2213. }
  2214. m_width = new_width;
  2215. m_height = new_height;
  2216. return *this;
  2217. }
  2218. };
  2219. inline FILE *fopen_safe(const char *pFilename, const char *pMode)
  2220. {
  2221. #ifdef _WIN32
  2222. FILE *pFile = nullptr;
  2223. fopen_s(&pFile, pFilename, pMode);
  2224. return pFile;
  2225. #else
  2226. return fopen(pFilename, pMode);
  2227. #endif
  2228. }
  2229. void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1);
  2230. } // namespace basisu