basisu_transcoder_internal.h 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056
  1. // basisu_transcoder_internal.h - Universal texture format transcoder library.
  2. // Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
  3. //
  4. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS,
  14. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. // See the License for the specific language governing permissions and
  16. // limitations under the License.
  17. #pragma once
  18. #ifdef _MSC_VER
  19. #pragma warning (disable: 4127) // conditional expression is constant
  20. #endif
  21. // v1.50: Added UASTC HDR 4x4 support
  22. // v1.60: Added RDO ASTC HDR 6x6 and intermediate support
  23. #define BASISD_LIB_VERSION 160
  24. #define BASISD_VERSION_STRING "01.60"
  25. #ifdef _DEBUG
  26. #define BASISD_BUILD_DEBUG
  27. #else
  28. #define BASISD_BUILD_RELEASE
  29. #endif
  30. #include "basisu.h"
  31. #define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16))
  32. namespace basisu
  33. {
  34. extern bool g_debug_printf;
  35. }
  36. namespace basist
  37. {
  38. // Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats).
  39. // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices.
  40. enum class block_format
  41. {
  42. cETC1, // ETC1S RGB
  43. cETC2_RGBA, // full ETC2 EAC RGBA8 block
  44. cBC1, // DXT1 RGB
  45. cBC3, // BC4 block followed by a four color BC1 block
  46. cBC4, // DXT5A (alpha block only)
  47. cBC5, // two BC4 blocks
  48. cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp
  49. cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA
  50. cBC7, // Full BC7 block, any mode
  51. cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block)
  52. cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.)
  53. cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format)
  54. cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC
  55. // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking.
  56. cATC_RGB,
  57. cATC_RGBA_INTERPOLATED_ALPHA,
  58. cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size
  59. cPVRTC2_4_RGB,
  60. cPVRTC2_4_RGBA,
  61. cETC2_EAC_R11,
  62. cETC2_EAC_RG11,
  63. cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits)
  64. cRGB32, // Writes RGB components to 32bpp output pixels
  65. cRGBA32, // Writes RGB255 components to 32bpp output pixels
  66. cA32, // Writes alpha component to 32bpp output pixels
  67. cRGB565,
  68. cBGR565,
  69. cRGBA4444_COLOR,
  70. cRGBA4444_ALPHA,
  71. cRGBA4444_COLOR_OPAQUE,
  72. cRGBA4444,
  73. cRGBA_HALF,
  74. cRGB_HALF,
  75. cRGB_9E5,
  76. cUASTC_4x4, // LDR, universal
  77. cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed
  78. cBC6H,
  79. cASTC_HDR_4x4,
  80. cASTC_HDR_6x6,
  81. cTotalBlockFormats
  82. };
  83. inline uint32_t get_block_width(block_format fmt)
  84. {
  85. switch (fmt)
  86. {
  87. case block_format::cFXT1_RGB:
  88. return 8;
  89. case block_format::cASTC_HDR_6x6:
  90. return 6;
  91. default:
  92. break;
  93. }
  94. return 4;
  95. }
  96. inline uint32_t get_block_height(block_format fmt)
  97. {
  98. switch (fmt)
  99. {
  100. case block_format::cASTC_HDR_6x6:
  101. return 6;
  102. default:
  103. break;
  104. }
  105. return 4;
  106. }
  107. const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31;
  108. const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21;
  109. const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9;
  110. const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3;
  111. const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1;
  112. const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1;
  113. const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3;
  114. const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4;
  115. const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds);
  116. const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1;
  117. const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS;
  118. const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64;
  119. const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3;
  120. const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6;
  121. const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);
  122. uint16_t crc16(const void *r, size_t size, uint16_t crc);
  123. class huffman_decoding_table
  124. {
  125. friend class bitwise_decoder;
  126. public:
  127. huffman_decoding_table()
  128. {
  129. }
  130. void clear()
  131. {
  132. basisu::clear_vector(m_code_sizes);
  133. basisu::clear_vector(m_lookup);
  134. basisu::clear_vector(m_tree);
  135. }
  136. bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits)
  137. {
  138. if (!total_syms)
  139. {
  140. clear();
  141. return true;
  142. }
  143. m_code_sizes.resize(total_syms);
  144. memcpy(&m_code_sizes[0], pCode_sizes, total_syms);
  145. const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
  146. m_lookup.resize(0);
  147. m_lookup.resize(huffman_fast_lookup_size);
  148. m_tree.resize(0);
  149. m_tree.resize(total_syms * 2);
  150. uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];
  151. basisu::clear_obj(syms_using_codesize);
  152. for (uint32_t i = 0; i < total_syms; i++)
  153. {
  154. if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize)
  155. return false;
  156. syms_using_codesize[pCode_sizes[i]]++;
  157. }
  158. uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];
  159. next_code[0] = next_code[1] = 0;
  160. uint32_t used_syms = 0, total = 0;
  161. for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++)
  162. {
  163. used_syms += syms_using_codesize[i];
  164. next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1));
  165. }
  166. if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U))
  167. return false;
  168. for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index)
  169. {
  170. uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index];
  171. if (!code_size)
  172. continue;
  173. cur_code = next_code[code_size]++;
  174. for (l = code_size; l > 0; l--, cur_code >>= 1)
  175. rev_code = (rev_code << 1) | (cur_code & 1);
  176. if (code_size <= fast_lookup_bits)
  177. {
  178. uint32_t k = (code_size << 16) | sym_index;
  179. while (rev_code < huffman_fast_lookup_size)
  180. {
  181. if (m_lookup[rev_code] != 0)
  182. {
  183. // Supplied codesizes can't create a valid prefix code.
  184. return false;
  185. }
  186. m_lookup[rev_code] = k;
  187. rev_code += (1 << code_size);
  188. }
  189. continue;
  190. }
  191. int tree_cur;
  192. if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)]))
  193. {
  194. const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1);
  195. if (m_lookup[idx] != 0)
  196. {
  197. // Supplied codesizes can't create a valid prefix code.
  198. return false;
  199. }
  200. m_lookup[idx] = tree_next;
  201. tree_cur = tree_next;
  202. tree_next -= 2;
  203. }
  204. if (tree_cur >= 0)
  205. {
  206. // Supplied codesizes can't create a valid prefix code.
  207. return false;
  208. }
  209. rev_code >>= (fast_lookup_bits - 1);
  210. for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--)
  211. {
  212. tree_cur -= ((rev_code >>= 1) & 1);
  213. const int idx = -tree_cur - 1;
  214. if (idx < 0)
  215. return false;
  216. else if (idx >= (int)m_tree.size())
  217. m_tree.resize(idx + 1);
  218. if (!m_tree[idx])
  219. {
  220. m_tree[idx] = (int16_t)tree_next;
  221. tree_cur = tree_next;
  222. tree_next -= 2;
  223. }
  224. else
  225. {
  226. tree_cur = m_tree[idx];
  227. if (tree_cur >= 0)
  228. {
  229. // Supplied codesizes can't create a valid prefix code.
  230. return false;
  231. }
  232. }
  233. }
  234. tree_cur -= ((rev_code >>= 1) & 1);
  235. const int idx = -tree_cur - 1;
  236. if (idx < 0)
  237. return false;
  238. else if (idx >= (int)m_tree.size())
  239. m_tree.resize(idx + 1);
  240. if (m_tree[idx] != 0)
  241. {
  242. // Supplied codesizes can't create a valid prefix code.
  243. return false;
  244. }
  245. m_tree[idx] = (int16_t)sym_index;
  246. }
  247. return true;
  248. }
  249. const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
  250. const basisu::int_vec &get_lookup() const { return m_lookup; }
  251. const basisu::int16_vec &get_tree() const { return m_tree; }
  252. bool is_valid() const { return m_code_sizes.size() > 0; }
  253. private:
  254. basisu::uint8_vec m_code_sizes;
  255. basisu::int_vec m_lookup;
  256. basisu::int16_vec m_tree;
  257. };
  258. class bitwise_decoder
  259. {
  260. public:
  261. bitwise_decoder() :
  262. m_buf_size(0),
  263. m_pBuf(nullptr),
  264. m_pBuf_start(nullptr),
  265. m_pBuf_end(nullptr),
  266. m_bit_buf(0),
  267. m_bit_buf_size(0)
  268. {
  269. }
  270. void clear()
  271. {
  272. m_buf_size = 0;
  273. m_pBuf = nullptr;
  274. m_pBuf_start = nullptr;
  275. m_pBuf_end = nullptr;
  276. m_bit_buf = 0;
  277. m_bit_buf_size = 0;
  278. }
  279. bool init(const uint8_t *pBuf, uint32_t buf_size)
  280. {
  281. if ((!pBuf) && (buf_size))
  282. return false;
  283. m_buf_size = buf_size;
  284. m_pBuf = pBuf;
  285. m_pBuf_start = pBuf;
  286. m_pBuf_end = pBuf + buf_size;
  287. m_bit_buf = 0;
  288. m_bit_buf_size = 0;
  289. return true;
  290. }
  291. void stop()
  292. {
  293. }
  294. inline uint32_t peek_bits(uint32_t num_bits)
  295. {
  296. if (!num_bits)
  297. return 0;
  298. assert(num_bits <= 25);
  299. while (m_bit_buf_size < num_bits)
  300. {
  301. uint32_t c = 0;
  302. if (m_pBuf < m_pBuf_end)
  303. c = *m_pBuf++;
  304. m_bit_buf |= (c << m_bit_buf_size);
  305. m_bit_buf_size += 8;
  306. assert(m_bit_buf_size <= 32);
  307. }
  308. return m_bit_buf & ((1 << num_bits) - 1);
  309. }
  310. void remove_bits(uint32_t num_bits)
  311. {
  312. assert(m_bit_buf_size >= num_bits);
  313. m_bit_buf >>= num_bits;
  314. m_bit_buf_size -= num_bits;
  315. }
  316. uint32_t get_bits(uint32_t num_bits)
  317. {
  318. if (num_bits > 25)
  319. {
  320. assert(num_bits <= 32);
  321. const uint32_t bits0 = peek_bits(25);
  322. m_bit_buf >>= 25;
  323. m_bit_buf_size -= 25;
  324. num_bits -= 25;
  325. const uint32_t bits = peek_bits(num_bits);
  326. m_bit_buf >>= num_bits;
  327. m_bit_buf_size -= num_bits;
  328. return bits0 | (bits << 25);
  329. }
  330. const uint32_t bits = peek_bits(num_bits);
  331. m_bit_buf >>= num_bits;
  332. m_bit_buf_size -= num_bits;
  333. return bits;
  334. }
  335. uint32_t decode_truncated_binary(uint32_t n)
  336. {
  337. assert(n >= 2);
  338. const uint32_t k = basisu::floor_log2i(n);
  339. const uint32_t u = (1 << (k + 1)) - n;
  340. uint32_t result = get_bits(k);
  341. if (result >= u)
  342. result = ((result << 1) | get_bits(1)) - u;
  343. return result;
  344. }
  345. uint32_t decode_rice(uint32_t m)
  346. {
  347. assert(m);
  348. uint32_t q = 0;
  349. for (;;)
  350. {
  351. uint32_t k = peek_bits(16);
  352. uint32_t l = 0;
  353. while (k & 1)
  354. {
  355. l++;
  356. k >>= 1;
  357. }
  358. q += l;
  359. remove_bits(l);
  360. if (l < 16)
  361. break;
  362. }
  363. return (q << m) + (get_bits(m + 1) >> 1);
  364. }
  365. inline uint32_t decode_vlc(uint32_t chunk_bits)
  366. {
  367. assert(chunk_bits);
  368. const uint32_t chunk_size = 1 << chunk_bits;
  369. const uint32_t chunk_mask = chunk_size - 1;
  370. uint32_t v = 0;
  371. uint32_t ofs = 0;
  372. for ( ; ; )
  373. {
  374. uint32_t s = get_bits(chunk_bits + 1);
  375. v |= ((s & chunk_mask) << ofs);
  376. ofs += chunk_bits;
  377. if ((s & chunk_size) == 0)
  378. break;
  379. if (ofs >= 32)
  380. {
  381. assert(0);
  382. break;
  383. }
  384. }
  385. return v;
  386. }
  387. inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits)
  388. {
  389. assert(ct.m_code_sizes.size());
  390. const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
  391. while (m_bit_buf_size < 16)
  392. {
  393. uint32_t c = 0;
  394. if (m_pBuf < m_pBuf_end)
  395. c = *m_pBuf++;
  396. m_bit_buf |= (c << m_bit_buf_size);
  397. m_bit_buf_size += 8;
  398. assert(m_bit_buf_size <= 32);
  399. }
  400. int code_len;
  401. int sym;
  402. if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0)
  403. {
  404. code_len = sym >> 16;
  405. sym &= 0xFFFF;
  406. }
  407. else
  408. {
  409. code_len = fast_lookup_bits;
  410. do
  411. {
  412. sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1
  413. } while (sym < 0);
  414. }
  415. m_bit_buf >>= code_len;
  416. m_bit_buf_size -= code_len;
  417. return sym;
  418. }
  419. bool read_huffman_table(huffman_decoding_table &ct)
  420. {
  421. ct.clear();
  422. const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2);
  423. if (!total_used_syms)
  424. return true;
  425. if (total_used_syms > basisu::cHuffmanMaxSyms)
  426. return false;
  427. uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes];
  428. basisu::clear_obj(code_length_code_sizes);
  429. const uint32_t num_codelength_codes = get_bits(5);
  430. if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes))
  431. return false;
  432. for (uint32_t i = 0; i < num_codelength_codes; i++)
  433. code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast<uint8_t>(get_bits(3));
  434. huffman_decoding_table code_length_table;
  435. if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes))
  436. return false;
  437. if (!code_length_table.is_valid())
  438. return false;
  439. basisu::uint8_vec code_sizes(total_used_syms);
  440. uint32_t cur = 0;
  441. while (cur < total_used_syms)
  442. {
  443. int c = decode_huffman(code_length_table);
  444. if (c <= 16)
  445. code_sizes[cur++] = static_cast<uint8_t>(c);
  446. else if (c == basisu::cHuffmanSmallZeroRunCode)
  447. cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin;
  448. else if (c == basisu::cHuffmanBigZeroRunCode)
  449. cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin;
  450. else
  451. {
  452. if (!cur)
  453. return false;
  454. uint32_t l;
  455. if (c == basisu::cHuffmanSmallRepeatCode)
  456. l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin;
  457. else
  458. l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin;
  459. const uint8_t prev = code_sizes[cur - 1];
  460. if (prev == 0)
  461. return false;
  462. do
  463. {
  464. if (cur >= total_used_syms)
  465. return false;
  466. code_sizes[cur++] = prev;
  467. } while (--l > 0);
  468. }
  469. }
  470. if (cur != total_used_syms)
  471. return false;
  472. return ct.init(total_used_syms, &code_sizes[0]);
  473. }
  474. size_t get_bits_remaining() const
  475. {
  476. size_t total_bytes_remaining = m_pBuf_end - m_pBuf;
  477. return total_bytes_remaining * 8 + m_bit_buf_size;
  478. }
  479. private:
  480. uint32_t m_buf_size;
  481. const uint8_t *m_pBuf;
  482. const uint8_t *m_pBuf_start;
  483. const uint8_t *m_pBuf_end;
  484. uint32_t m_bit_buf;
  485. uint32_t m_bit_buf_size;
  486. };
  487. inline uint32_t basisd_rand(uint32_t seed)
  488. {
  489. if (!seed)
  490. seed++;
  491. uint32_t z = seed;
  492. BASISD_znew;
  493. return z;
  494. }
  495. // Returns random number in [0,limit). Max limit is 0xFFFF.
  496. inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit)
  497. {
  498. seed = basisd_rand(seed);
  499. return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16;
  500. }
  501. class approx_move_to_front
  502. {
  503. public:
  504. approx_move_to_front(uint32_t n)
  505. {
  506. init(n);
  507. }
  508. void init(uint32_t n)
  509. {
  510. m_values.resize(n);
  511. m_rover = n / 2;
  512. }
  513. const basisu::int_vec& get_values() const { return m_values; }
  514. basisu::int_vec& get_values() { return m_values; }
  515. uint32_t size() const { return (uint32_t)m_values.size(); }
  516. const int& operator[] (uint32_t index) const { return m_values[index]; }
  517. int operator[] (uint32_t index) { return m_values[index]; }
  518. void add(int new_value)
  519. {
  520. m_values[m_rover++] = new_value;
  521. if (m_rover == m_values.size())
  522. m_rover = (uint32_t)m_values.size() / 2;
  523. }
  524. void use(uint32_t index)
  525. {
  526. if (index)
  527. {
  528. //std::swap(m_values[index / 2], m_values[index]);
  529. int x = m_values[index / 2];
  530. int y = m_values[index];
  531. m_values[index / 2] = y;
  532. m_values[index] = x;
  533. }
  534. }
  535. // returns -1 if not found
  536. int find(int value) const
  537. {
  538. for (uint32_t i = 0; i < m_values.size(); i++)
  539. if (m_values[i] == value)
  540. return i;
  541. return -1;
  542. }
  543. void reset()
  544. {
  545. const uint32_t n = (uint32_t)m_values.size();
  546. m_values.clear();
  547. init(n);
  548. }
  549. private:
  550. basisu::int_vec m_values;
  551. uint32_t m_rover;
  552. };
  553. struct decoder_etc_block;
  554. inline uint8_t clamp255(int32_t i)
  555. {
  556. return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
  557. }
  558. enum eNoClamp
  559. {
  560. cNoClamp = 0
  561. };
  562. struct color32
  563. {
  564. union
  565. {
  566. struct
  567. {
  568. uint8_t r;
  569. uint8_t g;
  570. uint8_t b;
  571. uint8_t a;
  572. };
  573. uint8_t c[4];
  574. uint32_t m;
  575. };
  576. color32() { }
  577. color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
  578. color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); }
  579. void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); }
  580. void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); }
  581. void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
  582. void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); }
  583. uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; }
  584. uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; }
  585. bool operator== (const color32&rhs) const { return m == rhs.m; }
  586. static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }
  587. static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }
  588. };
  589. struct endpoint
  590. {
  591. color32 m_color5;
  592. uint8_t m_inten5;
  593. bool operator== (const endpoint& rhs) const
  594. {
  595. return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5);
  596. }
  597. bool operator!= (const endpoint& rhs) const { return !(*this == rhs); }
  598. };
  599. struct selector
  600. {
  601. // Plain selectors (2-bits per value)
  602. uint8_t m_selectors[4];
  603. // ETC1 selectors
  604. uint8_t m_bytes[4];
  605. uint8_t m_lo_selector, m_hi_selector;
  606. uint8_t m_num_unique_selectors;
  607. bool operator== (const selector& rhs) const
  608. {
  609. return (m_selectors[0] == rhs.m_selectors[0]) &&
  610. (m_selectors[1] == rhs.m_selectors[1]) &&
  611. (m_selectors[2] == rhs.m_selectors[2]) &&
  612. (m_selectors[3] == rhs.m_selectors[3]);
  613. }
  614. bool operator!= (const selector& rhs) const
  615. {
  616. return !(*this == rhs);
  617. }
  618. void init_flags()
  619. {
  620. uint32_t hist[4] = { 0, 0, 0, 0 };
  621. for (uint32_t y = 0; y < 4; y++)
  622. {
  623. for (uint32_t x = 0; x < 4; x++)
  624. {
  625. uint32_t s = get_selector(x, y);
  626. hist[s]++;
  627. }
  628. }
  629. m_lo_selector = 3;
  630. m_hi_selector = 0;
  631. m_num_unique_selectors = 0;
  632. for (uint32_t i = 0; i < 4; i++)
  633. {
  634. if (hist[i])
  635. {
  636. m_num_unique_selectors++;
  637. if (i < m_lo_selector) m_lo_selector = static_cast<uint8_t>(i);
  638. if (i > m_hi_selector) m_hi_selector = static_cast<uint8_t>(i);
  639. }
  640. }
  641. }
  642. // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
  643. inline uint32_t get_selector(uint32_t x, uint32_t y) const
  644. {
  645. assert((x < 4) && (y < 4));
  646. return (m_selectors[y] >> (x * 2)) & 3;
  647. }
  648. void set_selector(uint32_t x, uint32_t y, uint32_t val)
  649. {
  650. static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
  651. assert((x | y | val) < 4);
  652. m_selectors[y] &= ~(3 << (x * 2));
  653. m_selectors[y] |= (val << (x * 2));
  654. const uint32_t etc1_bit_index = x * 4 + y;
  655. uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)];
  656. const uint32_t byte_bit_ofs = etc1_bit_index & 7;
  657. const uint32_t mask = 1 << byte_bit_ofs;
  658. const uint32_t etc1_val = s_selector_index_to_etc1[val];
  659. const uint32_t lsb = etc1_val & 1;
  660. const uint32_t msb = etc1_val >> 1;
  661. p[0] &= ~mask;
  662. p[0] |= (lsb << byte_bit_ofs);
  663. p[-2] &= ~mask;
  664. p[-2] |= (msb << byte_bit_ofs);
  665. }
  666. };
  667. bool basis_block_format_is_uncompressed(block_format tex_type);
  668. //------------------------------------
  669. typedef uint16_t half_float;
  670. const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number
  671. const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number
  672. const double MAX_HALF_FLOAT = 65504.0; // largest normal number
  673. const uint32_t MAX_HALF_FLOAT_AS_INT_BITS = 0x7BFF; // the half float rep for 65504.0
  674. inline uint32_t get_bits(uint32_t val, int low, int high)
  675. {
  676. const int num_bits = (high - low) + 1;
  677. assert((num_bits >= 1) && (num_bits <= 32));
  678. val >>= low;
  679. if (num_bits != 32)
  680. val &= ((1u << num_bits) - 1);
  681. return val;
  682. }
  683. inline bool is_half_inf_or_nan(half_float v)
  684. {
  685. return get_bits(v, 10, 14) == 31;
  686. }
  687. inline bool is_half_denorm(half_float v)
  688. {
  689. int e = (v >> 10) & 31;
  690. return !e;
  691. }
  692. inline int get_half_exp(half_float v)
  693. {
  694. int e = ((v >> 10) & 31);
  695. return e ? (e - 15) : -14;
  696. }
  697. inline int get_half_mantissa(half_float v)
  698. {
  699. if (is_half_denorm(v))
  700. return v & 0x3FF;
  701. return (v & 0x3FF) | 0x400;
  702. }
  703. inline float get_half_mantissaf(half_float v)
  704. {
  705. return ((float)get_half_mantissa(v)) / 1024.0f;
  706. }
  707. inline int get_half_sign(half_float v)
  708. {
  709. return v ? ((v & 0x8000) ? -1 : 1) : 0;
  710. }
  711. inline bool half_is_signed(half_float v)
  712. {
  713. return (v & 0x8000) != 0;
  714. }
  715. #if 0
  716. int hexp = get_half_exp(Cf);
  717. float hman = get_half_mantissaf(Cf);
  718. int hsign = get_half_sign(Cf);
  719. float k = powf(2.0f, hexp) * hman * hsign;
  720. if (is_half_inf_or_nan(Cf))
  721. k = std::numeric_limits<float>::quiet_NaN();
  722. #endif
  723. half_float float_to_half(float val);
  724. inline float half_to_float(half_float hval)
  725. {
  726. union { float f; uint32_t u; } x = { 0 };
  727. uint32_t s = ((uint32_t)hval >> 15) & 1;
  728. uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
  729. uint32_t m = (uint32_t)hval & 0x3FF;
  730. if (!e)
  731. {
  732. if (!m)
  733. {
  734. // +- 0
  735. x.u = s << 31;
  736. return x.f;
  737. }
  738. else
  739. {
  740. // denormalized
  741. while (!(m & 0x00000400))
  742. {
  743. m <<= 1;
  744. --e;
  745. }
  746. ++e;
  747. m &= ~0x00000400;
  748. }
  749. }
  750. else if (e == 31)
  751. {
  752. if (m == 0)
  753. {
  754. // +/- INF
  755. x.u = (s << 31) | 0x7f800000;
  756. return x.f;
  757. }
  758. else
  759. {
  760. // +/- NaN
  761. x.u = (s << 31) | 0x7f800000 | (m << 13);
  762. return x.f;
  763. }
  764. }
  765. e = e + (127 - 15);
  766. m = m << 13;
  767. assert(s <= 1);
  768. assert(m <= 0x7FFFFF);
  769. assert(e <= 255);
  770. x.u = m | (e << 23) | (s << 31);
  771. return x.f;
  772. }
  773. // Originally from bc6h_enc.h
  774. void bc6h_enc_init();
  775. const uint32_t MAX_BLOG16_VAL = 0xFFFF;
  776. // BC6H internals
  777. const uint32_t NUM_BC6H_MODES = 14;
  778. const uint32_t BC6H_LAST_MODE_INDEX = 13;
  779. const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights
  780. const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32;
  781. extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b
  782. struct bc6h_bit_layout
  783. {
  784. int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index)
  785. int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d)
  786. int8_t m_last_bit;
  787. int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed
  788. };
  789. const uint32_t MAX_BC6H_LAYOUT_INDEX = 25;
  790. extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX];
  791. extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x]
  792. extern const uint8_t g_bc6h_weight3[8];
  793. extern const uint8_t g_bc6h_weight4[16];
  794. extern const int8_t g_bc6h_mode_lookup[32];
  795. // Converts b16 to half float
  796. inline half_float bc6h_blog16_to_half(uint32_t comp)
  797. {
  798. assert(comp <= 0xFFFF);
  799. // scale the magnitude by 31/64
  800. comp = (comp * 31u) >> 6u;
  801. return (half_float)comp;
  802. }
  803. const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF;
  804. // Inverts bc6h_blog16_to_half().
  805. // Returns the nearest blog16 given a half value.
  806. inline uint32_t bc6h_half_to_blog16(half_float h)
  807. {
  808. assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
  809. return (h * 64 + 30) / 31;
  810. }
  811. // Suboptimal, but very close.
  812. inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
  813. {
  814. assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
  815. return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
  816. }
  817. struct bc6h_block
  818. {
  819. uint8_t m_bytes[16];
  820. };
  821. void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
  822. void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
  823. void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
  824. void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
  825. void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
  826. void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
  827. bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);
  828. struct bc6h_logical_block
  829. {
  830. uint32_t m_mode;
  831. uint32_t m_partition_pattern; // must be 0 if 1 subset
  832. uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed
  833. uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0
  834. void clear()
  835. {
  836. basisu::clear_obj(*this);
  837. }
  838. };
  839. void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk);
  840. namespace bc7_mode_5_encoder
  841. {
  842. void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode);
  843. }
  844. } // namespace basist