basisu_transcoder.cpp 688 KB


  1. // basisu_transcoder.cpp
  2. // Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. #include "basisu_transcoder.h"
  16. #include <limits.h>
  17. #include "basisu_containers_impl.h"
  18. #define BASISU_ASTC_HELPERS_IMPLEMENTATION
  19. #include "basisu_astc_helpers.h"
  20. #include "basisu_astc_hdr_core.h"
  21. #ifndef BASISD_IS_BIG_ENDIAN
  22. // TODO: This doesn't work on OSX. How can this be so difficult?
  23. //#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
  24. // #define BASISD_IS_BIG_ENDIAN (1)
  25. //#else
  26. #define BASISD_IS_BIG_ENDIAN (0)
  27. //#endif
  28. #endif
  29. #ifndef BASISD_USE_UNALIGNED_WORD_READS
  30. #ifdef __EMSCRIPTEN__
  31. // Can't use unaligned loads/stores with WebAssembly.
  32. #define BASISD_USE_UNALIGNED_WORD_READS (0)
  33. #elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)
  34. #define BASISD_USE_UNALIGNED_WORD_READS (1)
  35. #else
  36. #define BASISD_USE_UNALIGNED_WORD_READS (0)
  37. #endif
  38. #endif
  39. // Using unaligned loads and stores causes errors when using UBSan. Jam it off.
  40. #if defined(__has_feature)
  41. #if __has_feature(undefined_behavior_sanitizer)
  42. #undef BASISD_USE_UNALIGNED_WORD_READS
  43. #define BASISD_USE_UNALIGNED_WORD_READS 0
  44. #endif
  45. #endif
  46. #define BASISD_SUPPORTED_BASIS_VERSION (0x13)
  47. #ifndef BASISD_SUPPORT_KTX2
  48. #error Must have defined BASISD_SUPPORT_KTX2
  49. #endif
  50. #ifndef BASISD_SUPPORT_KTX2_ZSTD
  51. #error Must have defined BASISD_SUPPORT_KTX2_ZSTD
  52. #endif
  53. // Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data.
  54. #ifndef BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
  55. #define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0
  56. #endif
  57. #ifndef BASISD_SUPPORT_DXT1
  58. #define BASISD_SUPPORT_DXT1 1
  59. #endif
  60. #ifndef BASISD_SUPPORT_DXT5A
  61. #define BASISD_SUPPORT_DXT5A 1
  62. #endif
  63. // Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript)
  64. #if defined(BASISD_SUPPORT_BC7) && !BASISD_SUPPORT_BC7
  65. #ifndef BASISD_SUPPORT_BC7_MODE5
  66. #define BASISD_SUPPORT_BC7_MODE5 0
  67. #endif
  68. #endif // !BASISD_SUPPORT_BC7
  69. // BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1.
  70. #ifndef BASISD_SUPPORT_BC7_MODE5
  71. #define BASISD_SUPPORT_BC7_MODE5 1
  72. #endif
  73. #ifndef BASISD_SUPPORT_PVRTC1
  74. #define BASISD_SUPPORT_PVRTC1 1
  75. #endif
  76. #ifndef BASISD_SUPPORT_ETC2_EAC_A8
  77. #define BASISD_SUPPORT_ETC2_EAC_A8 1
  78. #endif
  79. // Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files.
  80. #ifndef BASISD_SUPPORT_UASTC
  81. #define BASISD_SUPPORT_UASTC 1
  82. #endif
  83. #ifndef BASISD_SUPPORT_ASTC
  84. #define BASISD_SUPPORT_ASTC 1
  85. #endif
  86. // Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support.
  87. #ifndef BASISD_SUPPORT_ATC
  88. #define BASISD_SUPPORT_ATC 1
  89. #endif
  90. // Support for ETC2 EAC R11 and ETC2 EAC RG11
  91. #ifndef BASISD_SUPPORT_ETC2_EAC_RG11
  92. #define BASISD_SUPPORT_ETC2_EAC_RG11 1
  93. #endif
  94. // If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large.
  95. // This impacts grayscale and grayscale+alpha textures the most.
  96. #ifndef BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
  97. #ifdef __EMSCRIPTEN__
  98. // Let's assume size matters more than quality when compiling with emscripten.
  99. #define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 0
  100. #else
  101. // Compiling native, so an extra 64K lookup table is probably acceptable.
  102. #define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 1
  103. #endif
  104. #endif
  105. #ifndef BASISD_SUPPORT_FXT1
  106. #define BASISD_SUPPORT_FXT1 1
  107. #endif
  108. #ifndef BASISD_SUPPORT_PVRTC2
  109. #define BASISD_SUPPORT_PVRTC2 1
  110. #endif
  111. #if BASISD_SUPPORT_PVRTC2
  112. #if !BASISD_SUPPORT_ATC
  113. #error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1
  114. #endif
  115. #endif
  116. #if BASISD_SUPPORT_ATC
  117. #if !BASISD_SUPPORT_DXT5A
  118. #error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1
  119. #endif
  120. #endif
  121. #ifndef BASISD_SUPPORT_UASTC_HDR
  122. #define BASISD_SUPPORT_UASTC_HDR 1
  123. #endif
  124. #define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0
  125. #define BASISD_WRITE_NEW_DXT1_TABLES 0
  126. #define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0
  127. #define BASISD_WRITE_NEW_ASTC_TABLES 0
  128. #define BASISD_WRITE_NEW_ATC_TABLES 0
  129. #define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES 0
  130. #ifndef BASISD_ENABLE_DEBUG_FLAGS
  131. #define BASISD_ENABLE_DEBUG_FLAGS 0
  132. #endif
  133. // If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header.
  134. #if BASISD_SUPPORT_KTX2
  135. // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded.
  136. #if BASISD_SUPPORT_KTX2_ZSTD
  137. // We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError()
  138. #include <zstd.h>
  139. #endif
  140. #endif
  141. namespace basisu
  142. {
  143. bool g_debug_printf;
  144. void enable_debug_printf(bool enabled)
  145. {
  146. g_debug_printf = enabled;
  147. }
  148. void debug_printf(const char* pFmt, ...)
  149. {
  150. #if BASISU_FORCE_DEVEL_MESSAGES
  151. g_debug_printf = true;
  152. #endif
  153. if (g_debug_printf)
  154. {
  155. va_list args;
  156. va_start(args, pFmt);
  157. vprintf(pFmt, args);
  158. va_end(args);
  159. }
  160. }
  161. } // namespace basisu
  162. namespace basist
  163. {
  164. #if BASISD_ENABLE_DEBUG_FLAGS
  165. static uint32_t g_debug_flags = 0;
  166. #endif
  167. uint32_t get_debug_flags()
  168. {
  169. #if BASISD_ENABLE_DEBUG_FLAGS
  170. return g_debug_flags;
  171. #else
  172. return 0;
  173. #endif
  174. }
  175. void set_debug_flags(uint32_t f)
  176. {
  177. BASISU_NOTE_UNUSED(f);
  178. #if BASISD_ENABLE_DEBUG_FLAGS
  179. g_debug_flags = f;
  180. #endif
  181. }
  182. inline uint16_t byteswap_uint16(uint16_t v)
  183. {
  184. return static_cast<uint16_t>((v >> 8) | (v << 8));
  185. }
  186. static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; }
  187. static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; }
  188. static inline float saturate(float value) { return clampf(value, 0, 1.0f); }
  189. static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
  190. uint16_t crc16(const void* r, size_t size, uint16_t crc)
  191. {
  192. crc = ~crc;
  193. const uint8_t* p = static_cast<const uint8_t*>(r);
  194. for (; size; --size)
  195. {
  196. const uint16_t q = *p++ ^ (crc >> 8);
  197. uint16_t k = (q >> 4) ^ q;
  198. crc = (((crc << 8) ^ k) ^ (k << 5)) ^ (k << 12);
  199. }
  200. return static_cast<uint16_t>(~crc);
  201. }
  202. enum etc_constants
  203. {
  204. cETC1BytesPerBlock = 8U,
  205. cETC1SelectorBits = 2U,
  206. cETC1SelectorValues = 1U << cETC1SelectorBits,
  207. cETC1SelectorMask = cETC1SelectorValues - 1U,
  208. cETC1BlockShift = 2U,
  209. cETC1BlockSize = 1U << cETC1BlockShift,
  210. cETC1LSBSelectorIndicesBitOffset = 0,
  211. cETC1MSBSelectorIndicesBitOffset = 16,
  212. cETC1FlipBitOffset = 32,
  213. cETC1DiffBitOffset = 33,
  214. cETC1IntenModifierNumBits = 3,
  215. cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
  216. cETC1RightIntenModifierTableBitOffset = 34,
  217. cETC1LeftIntenModifierTableBitOffset = 37,
  218. // Base+Delta encoding (5 bit bases, 3 bit delta)
  219. cETC1BaseColorCompNumBits = 5,
  220. cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
  221. cETC1DeltaColorCompNumBits = 3,
  222. cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
  223. cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
  224. cETC1BaseColor5RBitOffset = 59,
  225. cETC1BaseColor5GBitOffset = 51,
  226. cETC1BaseColor5BBitOffset = 43,
  227. cETC1DeltaColor3RBitOffset = 56,
  228. cETC1DeltaColor3GBitOffset = 48,
  229. cETC1DeltaColor3BBitOffset = 40,
  230. // Absolute (non-delta) encoding (two 4-bit per component bases)
  231. cETC1AbsColorCompNumBits = 4,
  232. cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
  233. cETC1AbsColor4R1BitOffset = 60,
  234. cETC1AbsColor4G1BitOffset = 52,
  235. cETC1AbsColor4B1BitOffset = 44,
  236. cETC1AbsColor4R2BitOffset = 56,
  237. cETC1AbsColor4G2BitOffset = 48,
  238. cETC1AbsColor4B2BitOffset = 40,
  239. cETC1ColorDeltaMin = -4,
  240. cETC1ColorDeltaMax = 3,
  241. // Delta3:
  242. // 0 1 2 3 4 5 6 7
  243. // 000 001 010 011 100 101 110 111
  244. // 0 1 2 3 -4 -3 -2 -1
  245. };
  246. #define DECLARE_ETC1_INTEN_TABLE(name, N) \
  247. static const int name[cETC1IntenModifierValues][cETC1SelectorValues] = \
  248. { \
  249. { N * -8, N * -2, N * 2, N * 8 },{ N * -17, N * -5, N * 5, N * 17 },{ N * -29, N * -9, N * 9, N * 29 },{ N * -42, N * -13, N * 13, N * 42 }, \
  250. { N * -60, N * -18, N * 18, N * 60 },{ N * -80, N * -24, N * 24, N * 80 },{ N * -106, N * -33, N * 33, N * 106 },{ N * -183, N * -47, N * 47, N * 183 } \
  251. };
  252. DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1);
  253. DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16);
  254. DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16);
  255. //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
  256. const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
  257. static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 };
  258. struct decoder_etc_block
  259. {
  260. // big endian uint64:
  261. // bit ofs: 56 48 40 32 24 16 8 0
  262. // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7
  263. union
  264. {
  265. uint64_t m_uint64;
  266. uint32_t m_uint32[2];
  267. uint8_t m_bytes[8];
  268. struct
  269. {
  270. signed m_dred2 : 3;
  271. uint32_t m_red1 : 5;
  272. signed m_dgreen2 : 3;
  273. uint32_t m_green1 : 5;
  274. signed m_dblue2 : 3;
  275. uint32_t m_blue1 : 5;
  276. uint32_t m_flip : 1;
  277. uint32_t m_diff : 1;
  278. uint32_t m_cw2 : 3;
  279. uint32_t m_cw1 : 3;
  280. uint32_t m_selectors;
  281. } m_differential;
  282. };
  283. inline void clear()
  284. {
  285. assert(sizeof(*this) == 8);
  286. basisu::clear_obj(*this);
  287. }
  288. inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits)
  289. {
  290. assert((ofs + num) <= 64U);
  291. assert(num && (num < 32U));
  292. assert((ofs >> 3) == ((ofs + num - 1) >> 3));
  293. assert(bits < (1U << num));
  294. const uint32_t byte_ofs = 7 - (ofs >> 3);
  295. const uint32_t byte_bit_ofs = ofs & 7;
  296. const uint32_t mask = (1 << num) - 1;
  297. m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
  298. m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
  299. }
  300. inline void set_flip_bit(bool flip)
  301. {
  302. m_bytes[3] &= ~1;
  303. m_bytes[3] |= static_cast<uint8_t>(flip);
  304. }
  305. inline void set_diff_bit(bool diff)
  306. {
  307. m_bytes[3] &= ~2;
  308. m_bytes[3] |= (static_cast<uint32_t>(diff) << 1);
  309. }
  310. // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
  311. inline void set_inten_table(uint32_t subblock_id, uint32_t t)
  312. {
  313. assert(subblock_id < 2);
  314. assert(t < 8);
  315. const uint32_t ofs = subblock_id ? 2 : 5;
  316. m_bytes[3] &= ~(7 << ofs);
  317. m_bytes[3] |= (t << ofs);
  318. }
  319. // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
  320. inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
  321. {
  322. assert((x | y | val) < 4);
  323. const uint32_t bit_index = x * 4 + y;
  324. uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
  325. const uint32_t byte_bit_ofs = bit_index & 7;
  326. const uint32_t mask = 1 << byte_bit_ofs;
  327. static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
  328. const uint32_t etc1_val = s_selector_index_to_etc1[val];
  329. const uint32_t lsb = etc1_val & 1;
  330. const uint32_t msb = etc1_val >> 1;
  331. p[0] &= ~mask;
  332. p[0] |= (lsb << byte_bit_ofs);
  333. p[-2] &= ~mask;
  334. p[-2] |= (msb << byte_bit_ofs);
  335. }
  336. // Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector())
  337. inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const
  338. {
  339. assert((x | y) < 4);
  340. const uint32_t bit_index = x * 4 + y;
  341. const uint32_t byte_bit_ofs = bit_index & 7;
  342. const uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
  343. const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1;
  344. const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1;
  345. const uint32_t val = lsb | (msb << 1);
  346. return val;
  347. }
  348. // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
  349. inline uint32_t get_selector(uint32_t x, uint32_t y) const
  350. {
  351. static const uint8_t s_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
  352. return s_etc1_to_selector_index[get_raw_selector(x, y)];
  353. }
  354. inline void set_raw_selector_bits(uint32_t bits)
  355. {
  356. m_bytes[4] = static_cast<uint8_t>(bits);
  357. m_bytes[5] = static_cast<uint8_t>(bits >> 8);
  358. m_bytes[6] = static_cast<uint8_t>(bits >> 16);
  359. m_bytes[7] = static_cast<uint8_t>(bits >> 24);
  360. }
  361. inline bool are_all_selectors_the_same() const
  362. {
  363. uint32_t v = *reinterpret_cast<const uint32_t*>(&m_bytes[4]);
  364. if ((v == 0xFFFFFFFF) || (v == 0xFFFF) || (!v) || (v == 0xFFFF0000))
  365. return true;
  366. return false;
  367. }
  368. inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3)
  369. {
  370. m_bytes[4] = byte0;
  371. m_bytes[5] = byte1;
  372. m_bytes[6] = byte2;
  373. m_bytes[7] = byte3;
  374. }
  375. inline uint32_t get_raw_selector_bits() const
  376. {
  377. return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24);
  378. }
  379. inline void set_base4_color(uint32_t idx, uint16_t c)
  380. {
  381. if (idx)
  382. {
  383. set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
  384. set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
  385. set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
  386. }
  387. else
  388. {
  389. set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
  390. set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
  391. set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
  392. }
  393. }
  394. inline void set_base5_color(uint16_t c)
  395. {
  396. set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
  397. set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
  398. set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
  399. }
  400. void set_delta3_color(uint16_t c)
  401. {
  402. set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
  403. set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
  404. set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
  405. }
  406. void set_block_color4(const color32& c0_unscaled, const color32& c1_unscaled)
  407. {
  408. set_diff_bit(false);
  409. set_base4_color(0, pack_color4(c0_unscaled, false));
  410. set_base4_color(1, pack_color4(c1_unscaled, false));
  411. }
  412. void set_block_color5(const color32& c0_unscaled, const color32& c1_unscaled)
  413. {
  414. set_diff_bit(true);
  415. set_base5_color(pack_color5(c0_unscaled, false));
  416. int dr = c1_unscaled.r - c0_unscaled.r;
  417. int dg = c1_unscaled.g - c0_unscaled.g;
  418. int db = c1_unscaled.b - c0_unscaled.b;
  419. set_delta3_color(pack_delta3(dr, dg, db));
  420. }
  421. bool set_block_color5_check(const color32& c0_unscaled, const color32& c1_unscaled)
  422. {
  423. set_diff_bit(true);
  424. set_base5_color(pack_color5(c0_unscaled, false));
  425. int dr = c1_unscaled.r - c0_unscaled.r;
  426. int dg = c1_unscaled.g - c0_unscaled.g;
  427. int db = c1_unscaled.b - c0_unscaled.b;
  428. if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) ||
  429. ((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) ||
  430. ((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax)))
  431. return false;
  432. set_delta3_color(pack_delta3(dr, dg, db));
  433. return true;
  434. }
  435. inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const
  436. {
  437. assert((ofs + num) <= 64U);
  438. assert(num && (num <= 8U));
  439. assert((ofs >> 3) == ((ofs + num - 1) >> 3));
  440. const uint32_t byte_ofs = 7 - (ofs >> 3);
  441. const uint32_t byte_bit_ofs = ofs & 7;
  442. return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
  443. }
  444. inline uint16_t get_base5_color() const
  445. {
  446. const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
  447. const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
  448. const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
  449. return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
  450. }
  451. inline uint16_t get_base4_color(uint32_t idx) const
  452. {
  453. uint32_t r, g, b;
  454. if (idx)
  455. {
  456. r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
  457. g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
  458. b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
  459. }
  460. else
  461. {
  462. r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
  463. g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
  464. b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
  465. }
  466. return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
  467. }
  468. inline color32 get_base5_color_unscaled() const
  469. {
  470. return color32(m_differential.m_red1, m_differential.m_green1, m_differential.m_blue1, 255);
  471. }
  472. inline bool get_flip_bit() const
  473. {
  474. return (m_bytes[3] & 1) != 0;
  475. }
  476. inline bool get_diff_bit() const
  477. {
  478. return (m_bytes[3] & 2) != 0;
  479. }
  480. inline uint32_t get_inten_table(uint32_t subblock_id) const
  481. {
  482. assert(subblock_id < 2);
  483. const uint32_t ofs = subblock_id ? 2 : 5;
  484. return (m_bytes[3] >> ofs) & 7;
  485. }
  486. inline uint16_t get_delta3_color() const
  487. {
  488. const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
  489. const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
  490. const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
  491. return static_cast<uint16_t>(b | (g << 3U) | (r << 6U));
  492. }
  493. void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const
  494. {
  495. color32 b;
  496. if (get_diff_bit())
  497. {
  498. if (subblock_index)
  499. unpack_color5(b, get_base5_color(), get_delta3_color(), true, 255);
  500. else
  501. unpack_color5(b, get_base5_color(), true);
  502. }
  503. else
  504. {
  505. b = unpack_color4(get_base4_color(subblock_index), true, 255);
  506. }
  507. const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)];
  508. pBlock_colors[0].set_noclamp_rgba(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
  509. pBlock_colors[1].set_noclamp_rgba(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
  510. pBlock_colors[2].set_noclamp_rgba(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
  511. pBlock_colors[3].set_noclamp_rgba(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
  512. }
  513. static uint16_t pack_color4(const color32& color, bool scaled, uint32_t bias = 127U)
  514. {
  515. return pack_color4(color.r, color.g, color.b, scaled, bias);
  516. }
  517. static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
  518. {
  519. if (scaled)
  520. {
  521. r = (r * 15U + bias) / 255U;
  522. g = (g * 15U + bias) / 255U;
  523. b = (b * 15U + bias) / 255U;
  524. }
  525. r = basisu::minimum(r, 15U);
  526. g = basisu::minimum(g, 15U);
  527. b = basisu::minimum(b, 15U);
  528. return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
  529. }
  530. static uint16_t pack_color5(const color32& color, bool scaled, uint32_t bias = 127U)
  531. {
  532. return pack_color5(color.r, color.g, color.b, scaled, bias);
  533. }
  534. static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
  535. {
  536. if (scaled)
  537. {
  538. r = (r * 31U + bias) / 255U;
  539. g = (g * 31U + bias) / 255U;
  540. b = (b * 31U + bias) / 255U;
  541. }
  542. r = basisu::minimum(r, 31U);
  543. g = basisu::minimum(g, 31U);
  544. b = basisu::minimum(b, 31U);
  545. return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
  546. }
  547. uint16_t pack_delta3(const color32& color)
  548. {
  549. return pack_delta3(color.r, color.g, color.b);
  550. }
  551. uint16_t pack_delta3(int r, int g, int b)
  552. {
  553. assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
  554. assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
  555. assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
  556. if (r < 0) r += 8;
  557. if (g < 0) g += 8;
  558. if (b < 0) b += 8;
  559. return static_cast<uint16_t>(b | (g << 3) | (r << 6));
  560. }
  561. static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3)
  562. {
  563. r = (packed_delta3 >> 6) & 7;
  564. g = (packed_delta3 >> 3) & 7;
  565. b = packed_delta3 & 7;
  566. if (r >= 4) r -= 8;
  567. if (g >= 4) g -= 8;
  568. if (b >= 4) b -= 8;
  569. }
  570. static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha)
  571. {
  572. uint32_t b = packed_color5 & 31U;
  573. uint32_t g = (packed_color5 >> 5U) & 31U;
  574. uint32_t r = (packed_color5 >> 10U) & 31U;
  575. if (scaled)
  576. {
  577. b = (b << 3U) | (b >> 2U);
  578. g = (g << 3U) | (g >> 2U);
  579. r = (r << 3U) | (r >> 2U);
  580. }
  581. assert(alpha <= 255);
  582. return color32(cNoClamp, r, g, b, alpha);
  583. }
  584. static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled)
  585. {
  586. color32 c(unpack_color5(packed_color5, scaled, 0));
  587. r = c.r;
  588. g = c.g;
  589. b = c.b;
  590. }
  591. static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled)
  592. {
  593. result = unpack_color5(packed_color5, scaled, 255);
  594. }
  595. static bool unpack_color5(color32& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha)
  596. {
  597. int dr, dg, db;
  598. unpack_delta3(dr, dg, db, packed_delta3);
  599. int r = ((packed_color5 >> 10U) & 31U) + dr;
  600. int g = ((packed_color5 >> 5U) & 31U) + dg;
  601. int b = (packed_color5 & 31U) + db;
  602. bool success = true;
  603. if (static_cast<uint32_t>(r | g | b) > 31U)
  604. {
  605. success = false;
  606. r = basisu::clamp<int>(r, 0, 31);
  607. g = basisu::clamp<int>(g, 0, 31);
  608. b = basisu::clamp<int>(b, 0, 31);
  609. }
  610. if (scaled)
  611. {
  612. b = (b << 3U) | (b >> 2U);
  613. g = (g << 3U) | (g >> 2U);
  614. r = (r << 3U) | (r >> 2U);
  615. }
  616. result.set_noclamp_rgba(r, g, b, basisu::minimum(alpha, 255U));
  617. return success;
  618. }
  619. static color32 unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha)
  620. {
  621. uint32_t b = packed_color4 & 15U;
  622. uint32_t g = (packed_color4 >> 4U) & 15U;
  623. uint32_t r = (packed_color4 >> 8U) & 15U;
  624. if (scaled)
  625. {
  626. b = (b << 4U) | b;
  627. g = (g << 4U) | g;
  628. r = (r << 4U) | r;
  629. }
  630. return color32(cNoClamp, r, g, b, basisu::minimum(alpha, 255U));
  631. }
  632. static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled)
  633. {
  634. color32 c(unpack_color4(packed_color4, scaled, 0));
  635. r = c.r;
  636. g = c.g;
  637. b = c.b;
  638. }
  639. static void get_diff_subblock_colors(color32* pDst, uint16_t packed_color5, uint32_t table_idx)
  640. {
  641. assert(table_idx < cETC1IntenModifierValues);
  642. const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
  643. uint32_t r, g, b;
  644. unpack_color5(r, g, b, packed_color5, true);
  645. const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
  646. const int y0 = pInten_modifer_table[0];
  647. pDst[0].set(clamp255(ir + y0), clamp255(ig + y0), clamp255(ib + y0), 255);
  648. const int y1 = pInten_modifer_table[1];
  649. pDst[1].set(clamp255(ir + y1), clamp255(ig + y1), clamp255(ib + y1), 255);
  650. const int y2 = pInten_modifer_table[2];
  651. pDst[2].set(clamp255(ir + y2), clamp255(ig + y2), clamp255(ib + y2), 255);
  652. const int y3 = pInten_modifer_table[3];
  653. pDst[3].set(clamp255(ir + y3), clamp255(ig + y3), clamp255(ib + y3), 255);
  654. }
  655. static int clamp255(int x)
  656. {
  657. if (x & 0xFFFFFF00)
  658. {
  659. if (x < 0)
  660. x = 0;
  661. else if (x > 255)
  662. x = 255;
  663. }
  664. return x;
  665. }
  666. static void get_block_colors5(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table)
  667. {
  668. color32 b(base_color5);
  669. b.r = (b.r << 3) | (b.r >> 2);
  670. b.g = (b.g << 3) | (b.g >> 2);
  671. b.b = (b.b << 3) | (b.b >> 2);
  672. const int* pInten_table = g_etc1_inten_tables[inten_table];
  673. pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
  674. pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
  675. pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
  676. pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
  677. }
  678. static void get_block_color5(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t& r, uint32_t &g, uint32_t &b)
  679. {
  680. assert(index < 4);
  681. uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
  682. uint32_t bg = (base_color5.g << 3) | (base_color5.g >> 2);
  683. uint32_t bb = (base_color5.b << 3) | (base_color5.b >> 2);
  684. const int* pInten_table = g_etc1_inten_tables[inten_table];
  685. r = clamp255(br + pInten_table[index]);
  686. g = clamp255(bg + pInten_table[index]);
  687. b = clamp255(bb + pInten_table[index]);
  688. }
  689. static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r)
  690. {
  691. assert(index < 4);
  692. uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
  693. const int* pInten_table = g_etc1_inten_tables[inten_table];
  694. r = clamp255(br + pInten_table[index]);
  695. }
  696. static void get_block_colors5_g(int* pBlock_colors, const color32& base_color5, uint32_t inten_table)
  697. {
  698. const int g = (base_color5.g << 3) | (base_color5.g >> 2);
  699. const int* pInten_table = g_etc1_inten_tables[inten_table];
  700. pBlock_colors[0] = clamp255(g + pInten_table[0]);
  701. pBlock_colors[1] = clamp255(g + pInten_table[1]);
  702. pBlock_colors[2] = clamp255(g + pInten_table[2]);
  703. pBlock_colors[3] = clamp255(g + pInten_table[3]);
  704. }
  705. static void get_block_colors5_bounds(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
  706. {
  707. color32 b(base_color5);
  708. b.r = (b.r << 3) | (b.r >> 2);
  709. b.g = (b.g << 3) | (b.g >> 2);
  710. b.b = (b.b << 3) | (b.b >> 2);
  711. const int* pInten_table = g_etc1_inten_tables[inten_table];
  712. pBlock_colors[0].set(clamp255(b.r + pInten_table[l]), clamp255(b.g + pInten_table[l]), clamp255(b.b + pInten_table[l]), 255);
  713. pBlock_colors[1].set(clamp255(b.r + pInten_table[h]), clamp255(b.g + pInten_table[h]), clamp255(b.b + pInten_table[h]), 255);
  714. }
  715. static void get_block_colors5_bounds_g(uint32_t* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
  716. {
  717. color32 b(base_color5);
  718. b.g = (b.g << 3) | (b.g >> 2);
  719. const int* pInten_table = g_etc1_inten_tables[inten_table];
  720. pBlock_colors[0] = clamp255(b.g + pInten_table[l]);
  721. pBlock_colors[1] = clamp255(b.g + pInten_table[h]);
  722. }
  723. };
  724. enum dxt_constants
  725. {
  726. cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
  727. cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
  728. };
  729. static const uint8_t g_etc1_x_selector_unpack[4][256] =
  730. {
  731. {
  732. 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
  733. 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
  734. 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
  735. 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
  736. 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
  737. 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
  738. 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
  739. 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
  740. },
  741. {
  742. 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
  743. 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
  744. 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
  745. 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
  746. 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
  747. 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
  748. 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
  749. 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
  750. },
  751. {
  752. 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
  753. 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
  754. 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
  755. 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
  756. 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
  757. 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
  758. 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
  759. 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
  760. },
  761. {
  762. 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
  763. 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
  764. 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
  765. 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
  766. 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
  767. 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
  768. 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
  769. 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
  770. }
  771. };
  772. struct dxt1_block
  773. {
  774. enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
  775. uint8_t m_low_color[cTotalEndpointBytes];
  776. uint8_t m_high_color[cTotalEndpointBytes];
  777. uint8_t m_selectors[cTotalSelectorBytes];
  778. inline void clear() { basisu::clear_obj(*this); }
  779. inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
  780. inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
  781. inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
  782. inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
  783. inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; }
  784. inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
  785. static uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U)
  786. {
  787. uint32_t r = color.r, g = color.g, b = color.b;
  788. if (scaled)
  789. {
  790. r = (r * 31U + bias) / 255U;
  791. g = (g * 63U + bias) / 255U;
  792. b = (b * 31U + bias) / 255U;
  793. }
  794. return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U));
  795. }
  796. static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
  797. };
  798. struct dxt_selector_range
  799. {
  800. uint32_t m_low;
  801. uint32_t m_high;
  802. };
  803. struct etc1_to_dxt1_56_solution
  804. {
  805. uint8_t m_lo;
  806. uint8_t m_hi;
  807. uint16_t m_err;
  808. };
  809. #if BASISD_SUPPORT_DXT1
  810. static dxt_selector_range g_etc1_to_dxt1_selector_ranges[] =
  811. {
  812. { 0, 3 },
  813. { 1, 3 },
  814. { 0, 2 },
  815. { 1, 2 },
  816. { 2, 3 },
  817. { 0, 1 },
  818. };
  819. const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_RANGES = sizeof(g_etc1_to_dxt1_selector_ranges) / sizeof(g_etc1_to_dxt1_selector_ranges[0]);
  820. static uint32_t g_etc1_to_dxt1_selector_range_index[4][4];
  821. const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS = 10;
  822. static const uint8_t g_etc1_to_dxt1_selector_mappings[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][4] =
  823. {
  824. { 0, 0, 1, 1 },
  825. { 0, 0, 1, 2 },
  826. { 0, 0, 1, 3 },
  827. { 0, 0, 2, 3 },
  828. { 0, 1, 1, 1 },
  829. { 0, 1, 2, 2 },
  830. { 0, 1, 2, 3 },
  831. { 0, 2, 3, 3 },
  832. { 1, 2, 2, 2 },
  833. { 1, 2, 3, 3 },
  834. };
  835. static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
  836. static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
  837. static const etc1_to_dxt1_56_solution g_etc1_to_dxt_6[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
  838. #include "basisu_transcoder_tables_dxt1_6.inc"
  839. };
  840. static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
  841. #include "basisu_transcoder_tables_dxt1_5.inc"
  842. };
  843. #endif // BASISD_SUPPORT_DXT1
  844. #if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
  845. // First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt.
  846. struct bc1_match_entry
  847. {
  848. uint8_t m_hi;
  849. uint8_t m_lo;
  850. };
  851. static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; // selector 1, allow equals hi/lo
  852. static bc1_match_entry g_bc1_match5_equals_0[256], g_bc1_match6_equals_0[256]; // selector 0, allow equals hi/lo
  853. static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size0, int size1, int sel)
  854. {
  855. for (int i = 0; i < 256; i++)
  856. {
  857. int lowest_e = 256;
  858. for (int lo = 0; lo < size0; lo++)
  859. {
  860. for (int hi = 0; hi < size1; hi++)
  861. {
  862. const int lo_e = pExpand[lo], hi_e = pExpand[hi];
  863. int e;
  864. if (sel == 1)
  865. {
  866. // Selector 1
  867. e = basisu::iabs(((hi_e * 2 + lo_e) / 3) - i);
  868. e += (basisu::iabs(hi_e - lo_e) * 3) / 100;
  869. }
  870. else
  871. {
  872. assert(sel == 0);
  873. // Selector 0
  874. e = basisu::iabs(hi_e - i);
  875. }
  876. if (e < lowest_e)
  877. {
  878. pTable[i].m_hi = static_cast<uint8_t>(hi);
  879. pTable[i].m_lo = static_cast<uint8_t>(lo);
  880. lowest_e = e;
  881. }
  882. } // hi
  883. } // lo
  884. }
  885. }
  886. #endif
  887. #if BASISD_WRITE_NEW_DXT1_TABLES
  888. static void create_etc1_to_dxt1_5_conversion_table()
  889. {
  890. FILE* pFile = nullptr;
  891. fopen_s(&pFile, "basisu_transcoder_tables_dxt1_5.inc", "w");
  892. uint32_t n = 0;
  893. for (int inten = 0; inten < 8; inten++)
  894. {
  895. for (uint32_t g = 0; g < 32; g++)
  896. {
  897. color32 block_colors[4];
  898. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  899. for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
  900. {
  901. const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
  902. const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
  903. for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
  904. {
  905. uint32_t best_lo = 0;
  906. uint32_t best_hi = 0;
  907. uint64_t best_err = UINT64_MAX;
  908. for (uint32_t hi = 0; hi <= 31; hi++)
  909. {
  910. for (uint32_t lo = 0; lo <= 31; lo++)
  911. {
  912. //if (lo == hi) continue;
  913. uint32_t colors[4];
  914. colors[0] = (lo << 3) | (lo >> 2);
  915. colors[3] = (hi << 3) | (hi >> 2);
  916. colors[1] = (colors[0] * 2 + colors[3]) / 3;
  917. colors[2] = (colors[3] * 2 + colors[0]) / 3;
  918. uint64_t total_err = 0;
  919. for (uint32_t s = low_selector; s <= high_selector; s++)
  920. {
  921. int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
  922. total_err += err * err;
  923. }
  924. if (total_err < best_err)
  925. {
  926. best_err = total_err;
  927. best_lo = lo;
  928. best_hi = hi;
  929. }
  930. }
  931. }
  932. assert(best_err <= 0xFFFF);
  933. //table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
  934. //table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
  935. //table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
  936. //assert(best_lo != best_hi);
  937. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  938. n++;
  939. if ((n & 31) == 31)
  940. fprintf(pFile, "\n");
  941. } // m
  942. } // sr
  943. } // g
  944. } // inten
  945. fclose(pFile);
  946. }
  947. static void create_etc1_to_dxt1_6_conversion_table()
  948. {
  949. FILE* pFile = nullptr;
  950. fopen_s(&pFile, "basisu_transcoder_tables_dxt1_6.inc", "w");
  951. uint32_t n = 0;
  952. for (int inten = 0; inten < 8; inten++)
  953. {
  954. for (uint32_t g = 0; g < 32; g++)
  955. {
  956. color32 block_colors[4];
  957. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  958. for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
  959. {
  960. const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
  961. const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
  962. for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
  963. {
  964. uint32_t best_lo = 0;
  965. uint32_t best_hi = 0;
  966. uint64_t best_err = UINT64_MAX;
  967. for (uint32_t hi = 0; hi <= 63; hi++)
  968. {
  969. for (uint32_t lo = 0; lo <= 63; lo++)
  970. {
  971. //if (lo == hi) continue;
  972. uint32_t colors[4];
  973. colors[0] = (lo << 2) | (lo >> 4);
  974. colors[3] = (hi << 2) | (hi >> 4);
  975. colors[1] = (colors[0] * 2 + colors[3]) / 3;
  976. colors[2] = (colors[3] * 2 + colors[0]) / 3;
  977. uint64_t total_err = 0;
  978. for (uint32_t s = low_selector; s <= high_selector; s++)
  979. {
  980. int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
  981. total_err += err * err;
  982. }
  983. if (total_err < best_err)
  984. {
  985. best_err = total_err;
  986. best_lo = lo;
  987. best_hi = hi;
  988. }
  989. }
  990. }
  991. assert(best_err <= 0xFFFF);
  992. //table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
  993. //table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
  994. //table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
  995. //assert(best_lo != best_hi);
  996. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  997. n++;
  998. if ((n & 31) == 31)
  999. fprintf(pFile, "\n");
  1000. } // m
  1001. } // sr
  1002. } // g
  1003. } // inten
  1004. fclose(pFile);
  1005. }
  1006. #endif
  1007. #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
  1008. static const int8_t g_eac_modifier_table[16][8] =
  1009. {
  1010. { -3, -6, -9, -15, 2, 5, 8, 14 },
  1011. { -3, -7, -10, -13, 2, 6, 9, 12 },
  1012. { -2, -5, -8, -13, 1, 4, 7, 12 },
  1013. { -2, -4, -6, -13, 1, 3, 5, 12 },
  1014. { -3, -6, -8, -12, 2, 5, 7, 11 },
  1015. { -3, -7, -9, -11, 2, 6, 8, 10 },
  1016. { -4, -7, -8, -11, 3, 6, 7, 10 },
  1017. { -3, -5, -8, -11, 2, 4, 7, 10 },
  1018. { -2, -6, -8, -10, 1, 5, 7, 9 },
  1019. { -2, -5, -8, -10, 1, 4, 7, 9 },
  1020. { -2, -4, -8, -10, 1, 3, 7, 9 },
  1021. { -2, -5, -7, -10, 1, 4, 6, 9 },
  1022. { -3, -4, -7, -10, 2, 3, 6, 9 },
  1023. { -1, -2, -3, -10, 0, 1, 2, 9 }, // entry 13
  1024. { -4, -6, -8, -9, 3, 5, 7, 8 },
  1025. { -3, -5, -7, -9, 2, 4, 6, 8 }
  1026. };
  1027. // Used by ETC2 EAC A8 and ETC2 EAC R11/RG11.
  1028. struct eac_block
  1029. {
  1030. uint16_t m_base : 8;
  1031. uint16_t m_table : 4;
  1032. uint16_t m_multiplier : 4;
  1033. uint8_t m_selectors[6];
  1034. uint32_t get_selector(uint32_t x, uint32_t y) const
  1035. {
  1036. assert((x < 4) && (y < 4));
  1037. const uint32_t ofs = 45 - (y + x * 4) * 3;
  1038. const uint64_t pixels = get_selector_bits();
  1039. return (pixels >> ofs) & 7;
  1040. }
  1041. void set_selector(uint32_t x, uint32_t y, uint32_t s)
  1042. {
  1043. assert((x < 4) && (y < 4) && (s < 8));
  1044. const uint32_t ofs = 45 - (y + x * 4) * 3;
  1045. uint64_t pixels = get_selector_bits();
  1046. pixels &= ~(7ULL << ofs);
  1047. pixels |= (static_cast<uint64_t>(s) << ofs);
  1048. set_selector_bits(pixels);
  1049. }
  1050. uint64_t get_selector_bits() const
  1051. {
  1052. uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) |
  1053. ((uint64_t)m_selectors[2] << 24) |
  1054. ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5];
  1055. return pixels;
  1056. }
  1057. void set_selector_bits(uint64_t pixels)
  1058. {
  1059. m_selectors[0] = (uint8_t)(pixels >> 40);
  1060. m_selectors[1] = (uint8_t)(pixels >> 32);
  1061. m_selectors[2] = (uint8_t)(pixels >> 24);
  1062. m_selectors[3] = (uint8_t)(pixels >> 16);
  1063. m_selectors[4] = (uint8_t)(pixels >> 8);
  1064. m_selectors[5] = (uint8_t)(pixels);
  1065. }
  1066. };
  1067. #endif // #if BASISD_SUPPORT_UASTC BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
  1068. #if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
  1069. static const dxt_selector_range s_etc2_eac_selector_ranges[] =
  1070. {
  1071. { 0, 3 },
  1072. { 1, 3 },
  1073. { 0, 2 },
  1074. { 1, 2 },
  1075. };
  1076. const uint32_t NUM_ETC2_EAC_SELECTOR_RANGES = sizeof(s_etc2_eac_selector_ranges) / sizeof(s_etc2_eac_selector_ranges[0]);
  1077. struct etc1_g_to_eac_conversion
  1078. {
  1079. uint8_t m_base;
  1080. uint8_t m_table_mul; // mul*16+table
  1081. uint16_t m_trans; // translates ETC1 selectors to ETC2_EAC_A8
  1082. };
  1083. #endif // BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
  1084. #if BASISD_SUPPORT_ETC2_EAC_A8
  1085. #if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
  1086. struct pack_eac_a8_results
  1087. {
  1088. uint32_t m_base;
  1089. uint32_t m_table;
  1090. uint32_t m_multiplier;
  1091. basisu::vector<uint8_t> m_selectors;
  1092. basisu::vector<uint8_t> m_selectors_temp;
  1093. };
  1094. static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels)
  1095. {
  1096. results.m_selectors.resize(num_pixels);
  1097. results.m_selectors_temp.resize(num_pixels);
  1098. uint64_t best_err = UINT64_MAX;
  1099. for (uint32_t base_color = 0; base_color < 256; base_color++)
  1100. {
  1101. for (uint32_t multiplier = 1; multiplier < 16; multiplier++)
  1102. {
  1103. for (uint32_t table = 0; table < 16; table++)
  1104. {
  1105. uint64_t total_err = 0;
  1106. for (uint32_t i = 0; i < num_pixels; i++)
  1107. {
  1108. const int a = pPixels[i];
  1109. uint32_t best_s_err = UINT32_MAX;
  1110. uint32_t best_s = 0;
  1111. for (uint32_t s = 0; s < 8; s++)
  1112. {
  1113. int v = (int)multiplier * g_eac_modifier_table[table][s] + (int)base_color;
  1114. if (v < 0)
  1115. v = 0;
  1116. else if (v > 255)
  1117. v = 255;
  1118. uint32_t err = abs(a - v);
  1119. if (err < best_s_err)
  1120. {
  1121. best_s_err = err;
  1122. best_s = s;
  1123. }
  1124. }
  1125. results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
  1126. total_err += best_s_err * best_s_err;
  1127. if (total_err >= best_err)
  1128. break;
  1129. }
  1130. if (total_err < best_err)
  1131. {
  1132. best_err = total_err;
  1133. results.m_base = base_color;
  1134. results.m_multiplier = multiplier;
  1135. results.m_table = table;
  1136. results.m_selectors.swap(results.m_selectors_temp);
  1137. }
  1138. } // table
  1139. } // multiplier
  1140. } // base_color
  1141. return best_err;
  1142. }
  1143. #endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
  1144. static
  1145. #if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
  1146. const
  1147. #endif
  1148. etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
  1149. {
  1150. { { 0,1,3328 },{ 0,1,3328 },{ 0,1,256 },{ 0,1,256 } },
  1151. { { 0,226,3936 },{ 0,226,3936 },{ 0,81,488 },{ 0,81,488 } },
  1152. { { 6,178,4012 },{ 6,178,4008 },{ 0,146,501 },{ 0,130,496 } },
  1153. { { 14,178,4012 },{ 14,178,4008 },{ 8,146,501 },{ 6,82,496 } },
  1154. { { 23,178,4012 },{ 23,178,4008 },{ 17,146,501 },{ 3,228,496 } },
  1155. { { 31,178,4012 },{ 31,178,4008 },{ 25,146,501 },{ 11,228,496 } },
  1156. { { 39,178,4012 },{ 39,178,4008 },{ 33,146,501 },{ 19,228,496 } },
  1157. { { 47,178,4012 },{ 47,178,4008 },{ 41,146,501 },{ 27,228,496 } },
  1158. { { 56,178,4012 },{ 56,178,4008 },{ 50,146,501 },{ 36,228,496 } },
  1159. { { 64,178,4012 },{ 64,178,4008 },{ 58,146,501 },{ 44,228,496 } },
  1160. { { 72,178,4012 },{ 72,178,4008 },{ 66,146,501 },{ 52,228,496 } },
  1161. { { 80,178,4012 },{ 80,178,4008 },{ 74,146,501 },{ 60,228,496 } },
  1162. { { 89,178,4012 },{ 89,178,4008 },{ 83,146,501 },{ 69,228,496 } },
  1163. { { 97,178,4012 },{ 97,178,4008 },{ 91,146,501 },{ 77,228,496 } },
  1164. { { 105,178,4012 },{ 105,178,4008 },{ 99,146,501 },{ 85,228,496 } },
  1165. { { 113,178,4012 },{ 113,178,4008 },{ 107,146,501 },{ 93,228,496 } },
  1166. { { 122,178,4012 },{ 122,178,4008 },{ 116,146,501 },{ 102,228,496 } },
  1167. { { 130,178,4012 },{ 130,178,4008 },{ 124,146,501 },{ 110,228,496 } },
  1168. { { 138,178,4012 },{ 138,178,4008 },{ 132,146,501 },{ 118,228,496 } },
  1169. { { 146,178,4012 },{ 146,178,4008 },{ 140,146,501 },{ 126,228,496 } },
  1170. { { 155,178,4012 },{ 155,178,4008 },{ 149,146,501 },{ 135,228,496 } },
  1171. { { 163,178,4012 },{ 163,178,4008 },{ 157,146,501 },{ 143,228,496 } },
  1172. { { 171,178,4012 },{ 171,178,4008 },{ 165,146,501 },{ 151,228,496 } },
  1173. { { 179,178,4012 },{ 179,178,4008 },{ 173,146,501 },{ 159,228,496 } },
  1174. { { 188,178,4012 },{ 188,178,4008 },{ 182,146,501 },{ 168,228,496 } },
  1175. { { 196,178,4012 },{ 196,178,4008 },{ 190,146,501 },{ 176,228,496 } },
  1176. { { 204,178,4012 },{ 204,178,4008 },{ 198,146,501 },{ 184,228,496 } },
  1177. { { 212,178,4012 },{ 212,178,4008 },{ 206,146,501 },{ 192,228,496 } },
  1178. { { 221,178,4012 },{ 221,178,4008 },{ 215,146,501 },{ 201,228,496 } },
  1179. { { 229,178,4012 },{ 229,178,4008 },{ 223,146,501 },{ 209,228,496 } },
  1180. { { 235,66,4012 },{ 221,100,4008 },{ 231,146,501 },{ 217,228,496 } },
  1181. { { 211,102,4085 },{ 118,31,4080 },{ 211,102,501 },{ 118,31,496 } },
  1182. { { 1,2,3328 },{ 1,2,3328 },{ 0,1,320 },{ 0,1,320 } },
  1183. { { 7,162,3905 },{ 7,162,3904 },{ 1,17,480 },{ 1,17,480 } },
  1184. { { 15,162,3906 },{ 15,162,3904 },{ 1,117,352 },{ 1,117,352 } },
  1185. { { 23,162,3906 },{ 23,162,3904 },{ 5,34,500 },{ 4,53,424 } },
  1186. { { 32,162,3906 },{ 32,162,3904 },{ 14,34,500 },{ 3,69,424 } },
  1187. { { 40,162,3906 },{ 40,162,3904 },{ 22,34,500 },{ 1,133,496 } },
  1188. { { 48,162,3906 },{ 48,162,3904 },{ 30,34,500 },{ 4,85,496 } },
  1189. { { 56,162,3906 },{ 56,162,3904 },{ 38,34,500 },{ 12,85,496 } },
  1190. { { 65,162,3906 },{ 65,162,3904 },{ 47,34,500 },{ 1,106,424 } },
  1191. { { 73,162,3906 },{ 73,162,3904 },{ 55,34,500 },{ 9,106,424 } },
  1192. { { 81,162,3906 },{ 81,162,3904 },{ 63,34,500 },{ 7,234,496 } },
  1193. { { 89,162,3906 },{ 89,162,3904 },{ 71,34,500 },{ 15,234,496 } },
  1194. { { 98,162,3906 },{ 98,162,3904 },{ 80,34,500 },{ 24,234,496 } },
  1195. { { 106,162,3906 },{ 106,162,3904 },{ 88,34,500 },{ 32,234,496 } },
  1196. { { 114,162,3906 },{ 114,162,3904 },{ 96,34,500 },{ 40,234,496 } },
  1197. { { 122,162,3906 },{ 122,162,3904 },{ 104,34,500 },{ 48,234,496 } },
  1198. { { 131,162,3906 },{ 131,162,3904 },{ 113,34,500 },{ 57,234,496 } },
  1199. { { 139,162,3906 },{ 139,162,3904 },{ 121,34,500 },{ 65,234,496 } },
  1200. { { 147,162,3906 },{ 147,162,3904 },{ 129,34,500 },{ 73,234,496 } },
  1201. { { 155,162,3906 },{ 155,162,3904 },{ 137,34,500 },{ 81,234,496 } },
  1202. { { 164,162,3906 },{ 164,162,3904 },{ 146,34,500 },{ 90,234,496 } },
  1203. { { 172,162,3906 },{ 172,162,3904 },{ 154,34,500 },{ 98,234,496 } },
  1204. { { 180,162,3906 },{ 180,162,3904 },{ 162,34,500 },{ 106,234,496 } },
  1205. { { 188,162,3906 },{ 188,162,3904 },{ 170,34,500 },{ 114,234,496 } },
  1206. { { 197,162,3906 },{ 197,162,3904 },{ 179,34,500 },{ 123,234,496 } },
  1207. { { 205,162,3906 },{ 205,162,3904 },{ 187,34,500 },{ 131,234,496 } },
  1208. { { 213,162,3906 },{ 213,162,3904 },{ 195,34,500 },{ 139,234,496 } },
  1209. { { 221,162,3906 },{ 221,162,3904 },{ 203,34,500 },{ 147,234,496 } },
  1210. { { 230,162,3906 },{ 230,162,3904 },{ 212,34,500 },{ 156,234,496 } },
  1211. { { 238,162,3906 },{ 174,106,4008 },{ 220,34,500 },{ 164,234,496 } },
  1212. { { 240,178,4001 },{ 182,106,4008 },{ 228,34,500 },{ 172,234,496 } },
  1213. { { 166,108,4085 },{ 115,31,4080 },{ 166,108,501 },{ 115,31,496 } },
  1214. { { 1,68,3328 },{ 1,68,3328 },{ 0,17,384 },{ 0,17,384 } },
  1215. { { 1,148,3904 },{ 1,148,3904 },{ 1,2,384 },{ 1,2,384 } },
  1216. { { 21,18,3851 },{ 21,18,3848 },{ 1,50,488 },{ 1,50,488 } },
  1217. { { 27,195,3851 },{ 29,18,3848 },{ 0,67,488 },{ 0,67,488 } },
  1218. { { 34,195,3907 },{ 38,18,3848 },{ 20,66,482 },{ 0,3,496 } },
  1219. { { 42,195,3907 },{ 46,18,3848 },{ 28,66,482 },{ 2,6,424 } },
  1220. { { 50,195,3907 },{ 54,18,3848 },{ 36,66,482 },{ 4,22,424 } },
  1221. { { 58,195,3907 },{ 62,18,3848 },{ 44,66,482 },{ 3,73,424 } },
  1222. { { 67,195,3907 },{ 71,18,3848 },{ 53,66,482 },{ 3,22,496 } },
  1223. { { 75,195,3907 },{ 79,18,3848 },{ 61,66,482 },{ 2,137,496 } },
  1224. { { 83,195,3907 },{ 87,18,3848 },{ 69,66,482 },{ 1,89,496 } },
  1225. { { 91,195,3907 },{ 95,18,3848 },{ 77,66,482 },{ 9,89,496 } },
  1226. { { 100,195,3907 },{ 104,18,3848 },{ 86,66,482 },{ 18,89,496 } },
  1227. { { 108,195,3907 },{ 112,18,3848 },{ 94,66,482 },{ 26,89,496 } },
  1228. { { 116,195,3907 },{ 120,18,3848 },{ 102,66,482 },{ 34,89,496 } },
  1229. { { 124,195,3907 },{ 128,18,3848 },{ 110,66,482 },{ 42,89,496 } },
  1230. { { 133,195,3907 },{ 137,18,3848 },{ 119,66,482 },{ 51,89,496 } },
  1231. { { 141,195,3907 },{ 145,18,3848 },{ 127,66,482 },{ 59,89,496 } },
  1232. { { 149,195,3907 },{ 153,18,3848 },{ 135,66,482 },{ 67,89,496 } },
  1233. { { 157,195,3907 },{ 161,18,3848 },{ 143,66,482 },{ 75,89,496 } },
  1234. { { 166,195,3907 },{ 170,18,3848 },{ 152,66,482 },{ 84,89,496 } },
  1235. { { 174,195,3907 },{ 178,18,3848 },{ 160,66,482 },{ 92,89,496 } },
  1236. { { 182,195,3907 },{ 186,18,3848 },{ 168,66,482 },{ 100,89,496 } },
  1237. { { 190,195,3907 },{ 194,18,3848 },{ 176,66,482 },{ 108,89,496 } },
  1238. { { 199,195,3907 },{ 203,18,3848 },{ 185,66,482 },{ 117,89,496 } },
  1239. { { 207,195,3907 },{ 211,18,3848 },{ 193,66,482 },{ 125,89,496 } },
  1240. { { 215,195,3907 },{ 219,18,3848 },{ 201,66,482 },{ 133,89,496 } },
  1241. { { 223,195,3907 },{ 227,18,3848 },{ 209,66,482 },{ 141,89,496 } },
  1242. { { 231,195,3907 },{ 168,89,4008 },{ 218,66,482 },{ 150,89,496 } },
  1243. { { 236,18,3907 },{ 176,89,4008 },{ 226,66,482 },{ 158,89,496 } },
  1244. { { 158,90,4085 },{ 103,31,4080 },{ 158,90,501 },{ 103,31,496 } },
  1245. { { 166,90,4085 },{ 111,31,4080 },{ 166,90,501 },{ 111,31,496 } },
  1246. { { 0,70,3328 },{ 0,70,3328 },{ 0,45,256 },{ 0,45,256 } },
  1247. { { 0,117,3904 },{ 0,117,3904 },{ 0,35,384 },{ 0,35,384 } },
  1248. { { 13,165,3905 },{ 13,165,3904 },{ 3,221,416 },{ 3,221,416 } },
  1249. { { 21,165,3906 },{ 21,165,3904 },{ 11,221,416 },{ 11,221,416 } },
  1250. { { 30,165,3906 },{ 30,165,3904 },{ 7,61,352 },{ 7,61,352 } },
  1251. { { 38,165,3906 },{ 38,165,3904 },{ 2,125,352 },{ 2,125,352 } },
  1252. { { 46,165,3906 },{ 46,165,3904 },{ 2,37,500 },{ 10,125,352 } },
  1253. { { 54,165,3906 },{ 54,165,3904 },{ 10,37,500 },{ 5,61,424 } },
  1254. { { 63,165,3906 },{ 63,165,3904 },{ 19,37,500 },{ 1,189,424 } },
  1255. { { 4,254,4012 },{ 71,165,3904 },{ 27,37,500 },{ 9,189,424 } },
  1256. { { 12,254,4012 },{ 79,165,3904 },{ 35,37,500 },{ 4,77,424 } },
  1257. { { 20,254,4012 },{ 87,165,3904 },{ 43,37,500 },{ 12,77,424 } },
  1258. { { 29,254,4012 },{ 96,165,3904 },{ 52,37,500 },{ 8,93,424 } },
  1259. { { 37,254,4012 },{ 104,165,3904 },{ 60,37,500 },{ 3,141,496 } },
  1260. { { 45,254,4012 },{ 112,165,3904 },{ 68,37,500 },{ 11,141,496 } },
  1261. { { 53,254,4012 },{ 120,165,3904 },{ 76,37,500 },{ 6,93,496 } },
  1262. { { 62,254,4012 },{ 129,165,3904 },{ 85,37,500 },{ 15,93,496 } },
  1263. { { 70,254,4012 },{ 137,165,3904 },{ 93,37,500 },{ 23,93,496 } },
  1264. { { 78,254,4012 },{ 145,165,3904 },{ 101,37,500 },{ 31,93,496 } },
  1265. { { 86,254,4012 },{ 153,165,3904 },{ 109,37,500 },{ 39,93,496 } },
  1266. { { 95,254,4012 },{ 162,165,3904 },{ 118,37,500 },{ 48,93,496 } },
  1267. { { 103,254,4012 },{ 170,165,3904 },{ 126,37,500 },{ 56,93,496 } },
  1268. { { 111,254,4012 },{ 178,165,3904 },{ 134,37,500 },{ 64,93,496 } },
  1269. { { 119,254,4012 },{ 186,165,3904 },{ 142,37,500 },{ 72,93,496 } },
  1270. { { 128,254,4012 },{ 195,165,3904 },{ 151,37,500 },{ 81,93,496 } },
  1271. { { 136,254,4012 },{ 203,165,3904 },{ 159,37,500 },{ 89,93,496 } },
  1272. { { 212,165,3906 },{ 136,77,4008 },{ 167,37,500 },{ 97,93,496 } },
  1273. { { 220,165,3394 },{ 131,93,4008 },{ 175,37,500 },{ 105,93,496 } },
  1274. { { 214,181,4001 },{ 140,93,4008 },{ 184,37,500 },{ 114,93,496 } },
  1275. { { 222,181,4001 },{ 148,93,4008 },{ 192,37,500 },{ 122,93,496 } },
  1276. { { 114,95,4085 },{ 99,31,4080 },{ 114,95,501 },{ 99,31,496 } },
  1277. { { 122,95,4085 },{ 107,31,4080 },{ 122,95,501 },{ 107,31,496 } },
  1278. { { 0,102,3840 },{ 0,102,3840 },{ 0,18,384 },{ 0,18,384 } },
  1279. { { 5,167,3904 },{ 5,167,3904 },{ 0,13,256 },{ 0,13,256 } },
  1280. { { 4,54,3968 },{ 4,54,3968 },{ 1,67,448 },{ 1,67,448 } },
  1281. { { 30,198,3850 },{ 30,198,3848 },{ 0,3,480 },{ 0,3,480 } },
  1282. { { 39,198,3850 },{ 39,198,3848 },{ 3,52,488 },{ 3,52,488 } },
  1283. { { 47,198,3851 },{ 47,198,3848 },{ 3,4,488 },{ 3,4,488 } },
  1284. { { 55,198,3851 },{ 55,198,3848 },{ 1,70,488 },{ 1,70,488 } },
  1285. { { 54,167,3906 },{ 63,198,3848 },{ 3,22,488 },{ 3,22,488 } },
  1286. { { 62,167,3906 },{ 72,198,3848 },{ 24,118,488 },{ 0,6,496 } },
  1287. { { 70,167,3906 },{ 80,198,3848 },{ 32,118,488 },{ 2,89,488 } },
  1288. { { 78,167,3906 },{ 88,198,3848 },{ 40,118,488 },{ 1,73,496 } },
  1289. { { 86,167,3906 },{ 96,198,3848 },{ 48,118,488 },{ 0,28,424 } },
  1290. { { 95,167,3906 },{ 105,198,3848 },{ 57,118,488 },{ 9,28,424 } },
  1291. { { 103,167,3906 },{ 113,198,3848 },{ 65,118,488 },{ 5,108,496 } },
  1292. { { 111,167,3906 },{ 121,198,3848 },{ 73,118,488 },{ 13,108,496 } },
  1293. { { 119,167,3906 },{ 129,198,3848 },{ 81,118,488 },{ 21,108,496 } },
  1294. { { 128,167,3906 },{ 138,198,3848 },{ 90,118,488 },{ 6,28,496 } },
  1295. { { 136,167,3906 },{ 146,198,3848 },{ 98,118,488 },{ 14,28,496 } },
  1296. { { 144,167,3906 },{ 154,198,3848 },{ 106,118,488 },{ 22,28,496 } },
  1297. { { 152,167,3906 },{ 162,198,3848 },{ 114,118,488 },{ 30,28,496 } },
  1298. { { 161,167,3906 },{ 171,198,3848 },{ 123,118,488 },{ 39,28,496 } },
  1299. { { 169,167,3906 },{ 179,198,3848 },{ 131,118,488 },{ 47,28,496 } },
  1300. { { 177,167,3906 },{ 187,198,3848 },{ 139,118,488 },{ 55,28,496 } },
  1301. { { 185,167,3906 },{ 195,198,3848 },{ 147,118,488 },{ 63,28,496 } },
  1302. { { 194,167,3906 },{ 120,12,4008 },{ 156,118,488 },{ 72,28,496 } },
  1303. { { 206,198,3907 },{ 116,28,4008 },{ 164,118,488 },{ 80,28,496 } },
  1304. { { 214,198,3907 },{ 124,28,4008 },{ 172,118,488 },{ 88,28,496 } },
  1305. { { 222,198,3395 },{ 132,28,4008 },{ 180,118,488 },{ 96,28,496 } },
  1306. { { 207,134,4001 },{ 141,28,4008 },{ 189,118,488 },{ 105,28,496 } },
  1307. { { 95,30,4085 },{ 86,31,4080 },{ 95,30,501 },{ 86,31,496 } },
  1308. { { 103,30,4085 },{ 94,31,4080 },{ 103,30,501 },{ 94,31,496 } },
  1309. { { 111,30,4085 },{ 102,31,4080 },{ 111,30,501 },{ 102,31,496 } },
  1310. { { 0,104,3840 },{ 0,104,3840 },{ 0,18,448 },{ 0,18,448 } },
  1311. { { 4,39,3904 },{ 4,39,3904 },{ 0,4,384 },{ 0,4,384 } },
  1312. { { 0,56,3968 },{ 0,56,3968 },{ 0,84,448 },{ 0,84,448 } },
  1313. { { 6,110,3328 },{ 6,110,3328 },{ 0,20,448 },{ 0,20,448 } },
  1314. { { 41,200,3850 },{ 41,200,3848 },{ 1,4,480 },{ 1,4,480 } },
  1315. { { 49,200,3850 },{ 49,200,3848 },{ 1,8,416 },{ 1,8,416 } },
  1316. { { 57,200,3851 },{ 57,200,3848 },{ 1,38,488 },{ 1,38,488 } },
  1317. { { 65,200,3851 },{ 65,200,3848 },{ 1,120,488 },{ 1,120,488 } },
  1318. { { 74,200,3851 },{ 74,200,3848 },{ 2,72,488 },{ 2,72,488 } },
  1319. { { 69,6,3907 },{ 82,200,3848 },{ 2,24,488 },{ 2,24,488 } },
  1320. { { 77,6,3907 },{ 90,200,3848 },{ 26,120,488 },{ 10,24,488 } },
  1321. { { 97,63,3330 },{ 98,200,3848 },{ 34,120,488 },{ 2,8,496 } },
  1322. { { 106,63,3330 },{ 107,200,3848 },{ 43,120,488 },{ 3,92,488 } },
  1323. { { 114,63,3330 },{ 115,200,3848 },{ 51,120,488 },{ 11,92,488 } },
  1324. { { 122,63,3330 },{ 123,200,3848 },{ 59,120,488 },{ 7,76,496 } },
  1325. { { 130,63,3330 },{ 131,200,3848 },{ 67,120,488 },{ 15,76,496 } },
  1326. { { 139,63,3330 },{ 140,200,3848 },{ 76,120,488 },{ 24,76,496 } },
  1327. { { 147,63,3330 },{ 148,200,3848 },{ 84,120,488 },{ 32,76,496 } },
  1328. { { 155,63,3330 },{ 156,200,3848 },{ 92,120,488 },{ 40,76,496 } },
  1329. { { 163,63,3330 },{ 164,200,3848 },{ 100,120,488 },{ 48,76,496 } },
  1330. { { 172,63,3330 },{ 173,200,3848 },{ 109,120,488 },{ 57,76,496 } },
  1331. { { 184,6,3851 },{ 181,200,3848 },{ 117,120,488 },{ 65,76,496 } },
  1332. { { 192,6,3851 },{ 133,28,3936 },{ 125,120,488 },{ 73,76,496 } },
  1333. { { 189,200,3907 },{ 141,28,3936 },{ 133,120,488 },{ 81,76,496 } },
  1334. { { 198,200,3907 },{ 138,108,4000 },{ 142,120,488 },{ 90,76,496 } },
  1335. { { 206,200,3907 },{ 146,108,4000 },{ 150,120,488 },{ 98,76,496 } },
  1336. { { 214,200,3395 },{ 154,108,4000 },{ 158,120,488 },{ 106,76,496 } },
  1337. { { 190,136,4001 },{ 162,108,4000 },{ 166,120,488 },{ 114,76,496 } },
  1338. { { 123,30,4076 },{ 87,15,4080 },{ 123,30,492 },{ 87,15,496 } },
  1339. { { 117,110,4084 },{ 80,31,4080 },{ 117,110,500 },{ 80,31,496 } },
  1340. { { 125,110,4084 },{ 88,31,4080 },{ 125,110,500 },{ 88,31,496 } },
  1341. { { 133,110,4084 },{ 96,31,4080 },{ 133,110,500 },{ 96,31,496 } },
  1342. { { 9,56,3904 },{ 9,56,3904 },{ 0,67,448 },{ 0,67,448 } },
  1343. { { 1,8,3904 },{ 1,8,3904 },{ 1,84,448 },{ 1,84,448 } },
  1344. { { 1,124,3904 },{ 1,124,3904 },{ 0,39,384 },{ 0,39,384 } },
  1345. { { 9,124,3904 },{ 9,124,3904 },{ 1,4,448 },{ 1,4,448 } },
  1346. { { 6,76,3904 },{ 6,76,3904 },{ 0,70,448 },{ 0,70,448 } },
  1347. { { 62,6,3859 },{ 62,6,3856 },{ 2,38,480 },{ 2,38,480 } },
  1348. { { 70,6,3859 },{ 70,6,3856 },{ 5,43,416 },{ 5,43,416 } },
  1349. { { 78,6,3859 },{ 78,6,3856 },{ 2,11,416 },{ 2,11,416 } },
  1350. { { 87,6,3859 },{ 87,6,3856 },{ 0,171,488 },{ 0,171,488 } },
  1351. { { 67,8,3906 },{ 95,6,3856 },{ 8,171,488 },{ 8,171,488 } },
  1352. { { 75,8,3907 },{ 103,6,3856 },{ 5,123,488 },{ 5,123,488 } },
  1353. { { 83,8,3907 },{ 111,6,3856 },{ 2,75,488 },{ 2,75,488 } },
  1354. { { 92,8,3907 },{ 120,6,3856 },{ 0,27,488 },{ 0,27,488 } },
  1355. { { 100,8,3907 },{ 128,6,3856 },{ 8,27,488 },{ 8,27,488 } },
  1356. { { 120,106,3843 },{ 136,6,3856 },{ 100,6,387 },{ 16,27,488 } },
  1357. { { 128,106,3843 },{ 144,6,3856 },{ 108,6,387 },{ 2,11,496 } },
  1358. { { 137,106,3843 },{ 153,6,3856 },{ 117,6,387 },{ 11,11,496 } },
  1359. { { 145,106,3843 },{ 161,6,3856 },{ 125,6,387 },{ 19,11,496 } },
  1360. { { 163,8,3851 },{ 137,43,3904 },{ 133,6,387 },{ 27,11,496 } },
  1361. { { 171,8,3851 },{ 101,11,4000 },{ 141,6,387 },{ 35,11,496 } },
  1362. { { 180,8,3851 },{ 110,11,4000 },{ 150,6,387 },{ 44,11,496 } },
  1363. { { 188,8,3851 },{ 118,11,4000 },{ 158,6,387 },{ 52,11,496 } },
  1364. { { 172,72,3907 },{ 126,11,4000 },{ 166,6,387 },{ 60,11,496 } },
  1365. { { 174,6,3971 },{ 134,11,4000 },{ 174,6,387 },{ 68,11,496 } },
  1366. { { 183,6,3971 },{ 143,11,4000 },{ 183,6,387 },{ 77,11,496 } },
  1367. { { 191,6,3971 },{ 151,11,4000 },{ 191,6,387 },{ 85,11,496 } },
  1368. { { 199,6,3971 },{ 159,11,4000 },{ 199,6,387 },{ 93,11,496 } },
  1369. { { 92,12,4084 },{ 69,15,4080 },{ 92,12,500 },{ 69,15,496 } },
  1370. { { 101,12,4084 },{ 78,15,4080 },{ 101,12,500 },{ 78,15,496 } },
  1371. { { 109,12,4084 },{ 86,15,4080 },{ 109,12,500 },{ 86,15,496 } },
  1372. { { 117,12,4084 },{ 79,31,4080 },{ 117,12,500 },{ 79,31,496 } },
  1373. { { 125,12,4084 },{ 87,31,4080 },{ 125,12,500 },{ 87,31,496 } },
  1374. { { 71,8,3602 },{ 71,8,3600 },{ 2,21,384 },{ 2,21,384 } },
  1375. { { 79,8,3611 },{ 79,8,3608 },{ 0,69,448 },{ 0,69,448 } },
  1376. { { 87,8,3611 },{ 87,8,3608 },{ 0,23,384 },{ 0,23,384 } },
  1377. { { 95,8,3611 },{ 95,8,3608 },{ 1,5,448 },{ 1,5,448 } },
  1378. { { 104,8,3611 },{ 104,8,3608 },{ 0,88,448 },{ 0,88,448 } },
  1379. { { 112,8,3611 },{ 112,8,3608 },{ 0,72,448 },{ 0,72,448 } },
  1380. { { 120,8,3611 },{ 121,8,3608 },{ 36,21,458 },{ 36,21,456 } },
  1381. { { 133,47,3091 },{ 129,8,3608 },{ 44,21,458 },{ 44,21,456 } },
  1382. { { 142,47,3091 },{ 138,8,3608 },{ 53,21,459 },{ 53,21,456 } },
  1383. { { 98,12,3850 },{ 98,12,3848 },{ 61,21,459 },{ 61,21,456 } },
  1384. { { 106,12,3850 },{ 106,12,3848 },{ 10,92,480 },{ 69,21,456 } },
  1385. { { 114,12,3851 },{ 114,12,3848 },{ 18,92,480 },{ 77,21,456 } },
  1386. { { 87,12,3906 },{ 87,12,3904 },{ 3,44,488 },{ 86,21,456 } },
  1387. { { 95,12,3906 },{ 95,12,3904 },{ 11,44,488 },{ 94,21,456 } },
  1388. { { 103,12,3906 },{ 103,12,3904 },{ 19,44,488 },{ 102,21,456 } },
  1389. { { 111,12,3907 },{ 111,12,3904 },{ 27,44,489 },{ 110,21,456 } },
  1390. { { 120,12,3907 },{ 120,12,3904 },{ 36,44,489 },{ 119,21,456 } },
  1391. { { 128,12,3907 },{ 128,12,3904 },{ 44,44,489 },{ 127,21,456 } },
  1392. { { 136,12,3907 },{ 136,12,3904 },{ 52,44,489 },{ 135,21,456 } },
  1393. { { 144,12,3907 },{ 144,12,3904 },{ 60,44,489 },{ 143,21,456 } },
  1394. { { 153,12,3907 },{ 153,12,3904 },{ 69,44,490 },{ 152,21,456 } },
  1395. { { 161,12,3395 },{ 149,188,3968 },{ 77,44,490 },{ 160,21,456 } },
  1396. { { 169,12,3395 },{ 198,21,3928 },{ 85,44,490 },{ 168,21,456 } },
  1397. { { 113,95,4001 },{ 201,69,3992 },{ 125,8,483 },{ 176,21,456 } },
  1398. { { 122,95,4001 },{ 200,21,3984 },{ 134,8,483 },{ 185,21,456 } },
  1399. { { 142,8,4067 },{ 208,21,3984 },{ 142,8,483 },{ 193,21,456 } },
  1400. { { 151,8,4067 },{ 47,15,4080 },{ 151,8,483 },{ 47,15,496 } },
  1401. { { 159,8,4067 },{ 55,15,4080 },{ 159,8,483 },{ 55,15,496 } },
  1402. { { 168,8,4067 },{ 64,15,4080 },{ 168,8,483 },{ 64,15,496 } },
  1403. { { 160,40,4075 },{ 72,15,4080 },{ 160,40,491 },{ 72,15,496 } },
  1404. { { 168,40,4075 },{ 80,15,4080 },{ 168,40,491 },{ 80,15,496 } },
  1405. { { 144,8,4082 },{ 88,15,4080 },{ 144,8,498 },{ 88,15,496 } }
  1406. };
  1407. #endif // BASISD_SUPPORT_ETC2_EAC_A8
  1408. #if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
  1409. static void create_etc2_eac_a8_conversion_table()
  1410. {
  1411. FILE* pFile = fopen("basisu_decoder_tables_etc2_eac_a8.inc", "w");
  1412. for (uint32_t inten = 0; inten < 8; inten++)
  1413. {
  1414. for (uint32_t base = 0; base < 32; base++)
  1415. {
  1416. color32 block_colors[4];
  1417. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
  1418. fprintf(pFile, "{");
  1419. for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
  1420. {
  1421. const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
  1422. const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
  1423. // We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
  1424. // Now find the best ETC2 EAC A8 base/table/multiplier that fits these colors.
  1425. uint8_t pixels[4];
  1426. uint32_t num_pixels = 0;
  1427. for (uint32_t s = low_selector; s <= high_selector; s++)
  1428. pixels[num_pixels++] = block_colors[s].g;
  1429. pack_eac_a8_results pack_results;
  1430. pack_eac_a8_exhaustive(pack_results, pixels, num_pixels);
  1431. etc1_g_to_eac_conversion& c = s_etc1_g_to_etc2_a8[base + inten * 32][sel_range];
  1432. c.m_base = pack_results.m_base;
  1433. c.m_table_mul = pack_results.m_table * 16 + pack_results.m_multiplier;
  1434. c.m_trans = 0;
  1435. for (uint32_t s = 0; s < 4; s++)
  1436. {
  1437. if ((s < low_selector) || (s > high_selector))
  1438. continue;
  1439. uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
  1440. c.m_trans |= (etc2_selector << (s * 3));
  1441. }
  1442. fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
  1443. if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
  1444. fprintf(pFile, ",");
  1445. }
  1446. fprintf(pFile, "},\n");
  1447. }
  1448. }
  1449. fclose(pFile);
  1450. }
  1451. #endif
  1452. #if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
  1453. struct pack_eac_r11_results
  1454. {
  1455. uint32_t m_base;
  1456. uint32_t m_table;
  1457. uint32_t m_multiplier;
  1458. basisu::vector<uint8_t> m_selectors;
  1459. basisu::vector<uint8_t> m_selectors_temp;
  1460. };
  1461. static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels)
  1462. {
  1463. results.m_selectors.resize(num_pixels);
  1464. results.m_selectors_temp.resize(num_pixels);
  1465. uint64_t best_err = UINT64_MAX;
  1466. for (uint32_t base_color = 0; base_color < 256; base_color++)
  1467. {
  1468. for (uint32_t multiplier = 0; multiplier < 16; multiplier++)
  1469. {
  1470. for (uint32_t table = 0; table < 16; table++)
  1471. {
  1472. uint64_t total_err = 0;
  1473. for (uint32_t i = 0; i < num_pixels; i++)
  1474. {
  1475. // Convert 8-bit input to 11-bits
  1476. const int a = (pPixels[i] * 2047 + 128) / 255;
  1477. uint32_t best_s_err = UINT32_MAX;
  1478. uint32_t best_s = 0;
  1479. for (uint32_t s = 0; s < 8; s++)
  1480. {
  1481. int v = (int)(multiplier ? (multiplier * 8) : 1) * g_eac_modifier_table[table][s] + (int)base_color * 8 + 4;
  1482. if (v < 0)
  1483. v = 0;
  1484. else if (v > 2047)
  1485. v = 2047;
  1486. uint32_t err = abs(a - v);
  1487. if (err < best_s_err)
  1488. {
  1489. best_s_err = err;
  1490. best_s = s;
  1491. }
  1492. }
  1493. results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
  1494. total_err += best_s_err * best_s_err;
  1495. if (total_err >= best_err)
  1496. break;
  1497. }
  1498. if (total_err < best_err)
  1499. {
  1500. best_err = total_err;
  1501. results.m_base = base_color;
  1502. results.m_multiplier = multiplier;
  1503. results.m_table = table;
  1504. results.m_selectors.swap(results.m_selectors_temp);
  1505. }
  1506. } // table
  1507. } // multiplier
  1508. } // base_color
  1509. return best_err;
  1510. }
  1511. static void create_etc2_eac_r11_conversion_table()
  1512. {
  1513. FILE* pFile = nullptr;
  1514. fopen_s(&pFile, "basisu_decoder_tables_etc2_eac_r11.inc", "w");
  1515. for (uint32_t inten = 0; inten < 8; inten++)
  1516. {
  1517. for (uint32_t base = 0; base < 32; base++)
  1518. {
  1519. color32 block_colors[4];
  1520. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
  1521. fprintf(pFile, "{");
  1522. for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
  1523. {
  1524. const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
  1525. const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
  1526. // We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
  1527. // Now find the best ETC2 EAC R11 base/table/multiplier that fits these colors.
  1528. uint8_t pixels[4];
  1529. uint32_t num_pixels = 0;
  1530. for (uint32_t s = low_selector; s <= high_selector; s++)
  1531. pixels[num_pixels++] = block_colors[s].g;
  1532. pack_eac_r11_results pack_results;
  1533. pack_eac_r11_exhaustive(pack_results, pixels, num_pixels);
  1534. etc1_g_to_eac_conversion c;
  1535. c.m_base = (uint8_t)pack_results.m_base;
  1536. c.m_table_mul = (uint8_t)(pack_results.m_table * 16 + pack_results.m_multiplier);
  1537. c.m_trans = 0;
  1538. for (uint32_t s = 0; s < 4; s++)
  1539. {
  1540. if ((s < low_selector) || (s > high_selector))
  1541. continue;
  1542. uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
  1543. c.m_trans |= (etc2_selector << (s * 3));
  1544. }
  1545. fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
  1546. if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
  1547. fprintf(pFile, ",");
  1548. }
  1549. fprintf(pFile, "},\n");
  1550. }
  1551. }
  1552. fclose(pFile);
  1553. }
  1554. #endif // BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
  1555. #if BASISD_WRITE_NEW_ASTC_TABLES
  1556. static void create_etc1_to_astc_conversion_table_0_47();
  1557. static void create_etc1_to_astc_conversion_table_0_255();
  1558. #endif
  1559. #if BASISD_SUPPORT_ASTC
  1560. static void transcoder_init_astc();
  1561. #endif
  1562. #if BASISD_WRITE_NEW_BC7_MODE5_TABLES
  1563. static void create_etc1_to_bc7_m5_color_conversion_table();
  1564. static void create_etc1_to_bc7_m5_alpha_conversion_table();
  1565. #endif
  1566. #if BASISD_SUPPORT_BC7_MODE5
  1567. static void transcoder_init_bc7_mode5();
  1568. #endif
  1569. #if BASISD_WRITE_NEW_ATC_TABLES
  1570. static void create_etc1s_to_atc_conversion_tables();
  1571. #endif
  1572. #if BASISD_SUPPORT_ATC
  1573. static void transcoder_init_atc();
  1574. #endif
  1575. #if BASISD_SUPPORT_PVRTC2
  1576. static void transcoder_init_pvrtc2();
  1577. #endif
  1578. #if BASISD_SUPPORT_UASTC
  1579. void uastc_init();
  1580. #endif
  1581. static bool g_transcoder_initialized;
  1582. // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz.
  1583. // If this is too slow, these computed tables can easilky be moved to be compiled in.
  1584. void basisu_transcoder_init()
  1585. {
  1586. if (g_transcoder_initialized)
  1587. {
  1588. BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
  1589. return;
  1590. }
  1591. BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
  1592. #if BASISD_SUPPORT_UASTC
  1593. uastc_init();
  1594. #endif
  1595. #if BASISD_SUPPORT_UASTC_HDR
  1596. // TODO: Examine this, optimize for startup time/mem utilization.
  1597. astc_helpers::init_tables(false);
  1598. astc_hdr_core_init();
  1599. #endif
  1600. #if BASISD_SUPPORT_ASTC
  1601. transcoder_init_astc();
  1602. #endif
  1603. #if BASISD_WRITE_NEW_ASTC_TABLES
  1604. create_etc1_to_astc_conversion_table_0_47();
  1605. create_etc1_to_astc_conversion_table_0_255();
  1606. exit(0);
  1607. #endif
  1608. #if BASISD_WRITE_NEW_BC7_MODE5_TABLES
  1609. create_etc1_to_bc7_m5_color_conversion_table();
  1610. create_etc1_to_bc7_m5_alpha_conversion_table();
  1611. exit(0);
  1612. #endif
  1613. #if BASISD_WRITE_NEW_DXT1_TABLES
  1614. create_etc1_to_dxt1_5_conversion_table();
  1615. create_etc1_to_dxt1_6_conversion_table();
  1616. exit(0);
  1617. #endif
  1618. #if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
  1619. create_etc2_eac_a8_conversion_table();
  1620. exit(0);
  1621. #endif
  1622. #if BASISD_WRITE_NEW_ATC_TABLES
  1623. create_etc1s_to_atc_conversion_tables();
  1624. exit(0);
  1625. #endif
  1626. #if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
  1627. create_etc2_eac_r11_conversion_table();
  1628. exit(0);
  1629. #endif
  1630. #if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
  1631. uint8_t bc1_expand5[32];
  1632. for (int i = 0; i < 32; i++)
  1633. bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2));
  1634. prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, 32, 1);
  1635. prepare_bc1_single_color_table(g_bc1_match5_equals_0, bc1_expand5, 1, 32, 0);
  1636. uint8_t bc1_expand6[64];
  1637. for (int i = 0; i < 64; i++)
  1638. bc1_expand6[i] = static_cast<uint8_t>((i << 2) | (i >> 4));
  1639. prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 64, 1);
  1640. prepare_bc1_single_color_table(g_bc1_match6_equals_0, bc1_expand6, 1, 64, 0);
  1641. #if 0
  1642. for (uint32_t i = 0; i < 256; i++)
  1643. {
  1644. printf("%u %u %u\n", i, (i * 63 + 127) / 255, g_bc1_match6_equals_0[i].m_hi);
  1645. }
  1646. exit(0);
  1647. #endif
  1648. #endif
  1649. #if BASISD_SUPPORT_DXT1
  1650. for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++)
  1651. {
  1652. uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low;
  1653. uint32_t h = g_etc1_to_dxt1_selector_ranges[i].m_high;
  1654. g_etc1_to_dxt1_selector_range_index[l][h] = i;
  1655. }
  1656. for (uint32_t sm = 0; sm < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; sm++)
  1657. {
  1658. uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1[4];
  1659. uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1_inv[4];
  1660. for (uint32_t j = 0; j < 4; j++)
  1661. {
  1662. static const uint8_t s_linear_dxt1_to_dxt1[4] = { 0, 2, 3, 1 };
  1663. static const uint8_t s_dxt1_inverted_xlat[4] = { 1, 0, 3, 2 };
  1664. etc1_to_dxt1_selector_mappings_raw_dxt1[j] = (uint8_t)s_linear_dxt1_to_dxt1[g_etc1_to_dxt1_selector_mappings[sm][j]];
  1665. etc1_to_dxt1_selector_mappings_raw_dxt1_inv[j] = (uint8_t)s_dxt1_inverted_xlat[etc1_to_dxt1_selector_mappings_raw_dxt1[j]];
  1666. }
  1667. for (uint32_t i = 0; i < 256; i++)
  1668. {
  1669. uint32_t k = 0, k_inv = 0;
  1670. for (uint32_t s = 0; s < 4; s++)
  1671. {
  1672. k |= (etc1_to_dxt1_selector_mappings_raw_dxt1[(i >> (s * 2)) & 3] << (s * 2));
  1673. k_inv |= (etc1_to_dxt1_selector_mappings_raw_dxt1_inv[(i >> (s * 2)) & 3] << (s * 2));
  1674. }
  1675. g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[sm][i] = (uint8_t)k;
  1676. g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[sm][i] = (uint8_t)k_inv;
  1677. }
  1678. }
  1679. #endif
  1680. #if BASISD_SUPPORT_BC7_MODE5
  1681. transcoder_init_bc7_mode5();
  1682. #endif
  1683. #if BASISD_SUPPORT_ATC
  1684. transcoder_init_atc();
  1685. #endif
  1686. #if BASISD_SUPPORT_PVRTC2
  1687. transcoder_init_pvrtc2();
  1688. #endif
  1689. #if BASISD_SUPPORT_UASTC_HDR
  1690. bc6h_enc_init();
  1691. #endif
  1692. g_transcoder_initialized = true;
  1693. }
  1694. #if BASISD_SUPPORT_DXT1
  1695. static void convert_etc1s_to_dxt1(dxt1_block* pDst_block, const endpoint *pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
  1696. {
  1697. #if !BASISD_WRITE_NEW_DXT1_TABLES
  1698. const uint32_t low_selector = pSelector->m_lo_selector;
  1699. const uint32_t high_selector = pSelector->m_hi_selector;
  1700. const color32& base_color = pEndpoints->m_color5;
  1701. const uint32_t inten_table = pEndpoints->m_inten5;
  1702. if (low_selector == high_selector)
  1703. {
  1704. uint32_t r, g, b;
  1705. decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
  1706. uint32_t mask = 0xAA;
  1707. uint32_t max16 = (g_bc1_match5_equals_1[r].m_hi << 11) | (g_bc1_match6_equals_1[g].m_hi << 5) | g_bc1_match5_equals_1[b].m_hi;
  1708. uint32_t min16 = (g_bc1_match5_equals_1[r].m_lo << 11) | (g_bc1_match6_equals_1[g].m_lo << 5) | g_bc1_match5_equals_1[b].m_lo;
  1709. if ((!use_threecolor_blocks) && (min16 == max16))
  1710. {
  1711. // This is an annoying edge case that impacts BC3.
  1712. // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
  1713. mask = 0;
  1714. // Make l > h
  1715. if (min16 > 0)
  1716. min16--;
  1717. else
  1718. {
  1719. // l = h = 0
  1720. assert(min16 == max16 && max16 == 0);
  1721. max16 = 1;
  1722. min16 = 0;
  1723. mask = 0x55;
  1724. }
  1725. assert(max16 > min16);
  1726. }
  1727. if (max16 < min16)
  1728. {
  1729. std::swap(max16, min16);
  1730. mask ^= 0x55;
  1731. }
  1732. pDst_block->set_low_color(static_cast<uint16_t>(max16));
  1733. pDst_block->set_high_color(static_cast<uint16_t>(min16));
  1734. pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
  1735. pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
  1736. pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
  1737. pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
  1738. return;
  1739. }
  1740. else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
  1741. {
  1742. color32 block_colors[4];
  1743. decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
  1744. const uint32_t r0 = block_colors[0].r;
  1745. const uint32_t g0 = block_colors[0].g;
  1746. const uint32_t b0 = block_colors[0].b;
  1747. const uint32_t r1 = block_colors[3].r;
  1748. const uint32_t g1 = block_colors[3].g;
  1749. const uint32_t b1 = block_colors[3].b;
  1750. uint32_t max16 = (g_bc1_match5_equals_0[r0].m_hi << 11) | (g_bc1_match6_equals_0[g0].m_hi << 5) | g_bc1_match5_equals_0[b0].m_hi;
  1751. uint32_t min16 = (g_bc1_match5_equals_0[r1].m_hi << 11) | (g_bc1_match6_equals_0[g1].m_hi << 5) | g_bc1_match5_equals_0[b1].m_hi;
  1752. uint32_t l = 0, h = 1;
  1753. if (min16 == max16)
  1754. {
  1755. // Make l > h
  1756. if (min16 > 0)
  1757. {
  1758. min16--;
  1759. l = 0;
  1760. h = 0;
  1761. }
  1762. else
  1763. {
  1764. // l = h = 0
  1765. assert(min16 == max16 && max16 == 0);
  1766. max16 = 1;
  1767. min16 = 0;
  1768. l = 1;
  1769. h = 1;
  1770. }
  1771. assert(max16 > min16);
  1772. }
  1773. if (max16 < min16)
  1774. {
  1775. std::swap(max16, min16);
  1776. l = 1;
  1777. h = 0;
  1778. }
  1779. pDst_block->set_low_color((uint16_t)max16);
  1780. pDst_block->set_high_color((uint16_t)min16);
  1781. for (uint32_t y = 0; y < 4; y++)
  1782. {
  1783. for (uint32_t x = 0; x < 4; x++)
  1784. {
  1785. uint32_t s = pSelector->get_selector(x, y);
  1786. pDst_block->set_selector(x, y, (s == 3) ? h : l);
  1787. }
  1788. }
  1789. return;
  1790. }
  1791. const uint32_t selector_range_table = g_etc1_to_dxt1_selector_range_index[low_selector][high_selector];
  1792. //[32][8][RANGES][MAPPING]
  1793. const etc1_to_dxt1_56_solution* pTable_r = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
  1794. const etc1_to_dxt1_56_solution* pTable_g = &g_etc1_to_dxt_6[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
  1795. const etc1_to_dxt1_56_solution* pTable_b = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
  1796. uint32_t best_err = UINT_MAX;
  1797. uint32_t best_mapping = 0;
  1798. assert(NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS == 10);
  1799. #define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
  1800. DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
  1801. DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
  1802. #undef DO_ITER
  1803. uint32_t l = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
  1804. uint32_t h = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
  1805. const uint8_t* pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[best_mapping][0];
  1806. if (l < h)
  1807. {
  1808. std::swap(l, h);
  1809. pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0];
  1810. }
  1811. pDst_block->set_low_color(static_cast<uint16_t>(l));
  1812. pDst_block->set_high_color(static_cast<uint16_t>(h));
  1813. if (l == h)
  1814. {
  1815. uint8_t mask = 0;
  1816. if (!use_threecolor_blocks)
  1817. {
  1818. // This is an annoying edge case that impacts BC3.
  1819. // Make l > h
  1820. if (h > 0)
  1821. h--;
  1822. else
  1823. {
  1824. // l = h = 0
  1825. assert(l == h && h == 0);
  1826. h = 0;
  1827. l = 1;
  1828. mask = 0x55;
  1829. }
  1830. assert(l > h);
  1831. pDst_block->set_low_color(static_cast<uint16_t>(l));
  1832. pDst_block->set_high_color(static_cast<uint16_t>(h));
  1833. }
  1834. pDst_block->m_selectors[0] = mask;
  1835. pDst_block->m_selectors[1] = mask;
  1836. pDst_block->m_selectors[2] = mask;
  1837. pDst_block->m_selectors[3] = mask;
  1838. return;
  1839. }
  1840. pDst_block->m_selectors[0] = pSelectors_xlat_256[pSelector->m_selectors[0]];
  1841. pDst_block->m_selectors[1] = pSelectors_xlat_256[pSelector->m_selectors[1]];
  1842. pDst_block->m_selectors[2] = pSelectors_xlat_256[pSelector->m_selectors[2]];
  1843. pDst_block->m_selectors[3] = pSelectors_xlat_256[pSelector->m_selectors[3]];
  1844. #endif
  1845. }
  1846. #if BASISD_ENABLE_DEBUG_FLAGS
  1847. static void convert_etc1s_to_dxt1_vis(dxt1_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
  1848. {
  1849. convert_etc1s_to_dxt1(pDst_block, pEndpoints, pSelector, use_threecolor_blocks);
  1850. if (g_debug_flags & cDebugFlagVisBC1Sels)
  1851. {
  1852. uint32_t l = dxt1_block::pack_unscaled_color(31, 63, 31);
  1853. uint32_t h = dxt1_block::pack_unscaled_color(0, 0, 0);
  1854. pDst_block->set_low_color(static_cast<uint16_t>(l));
  1855. pDst_block->set_high_color(static_cast<uint16_t>(h));
  1856. }
  1857. else if (g_debug_flags & cDebugFlagVisBC1Endpoints)
  1858. {
  1859. for (uint32_t y = 0; y < 4; y++)
  1860. for (uint32_t x = 0; x < 4; x++)
  1861. pDst_block->set_selector(x, y, (y < 2) ? 0 : 1);
  1862. }
  1863. }
  1864. #endif
  1865. #endif
  1866. #if BASISD_SUPPORT_FXT1
  1867. struct fxt1_block
  1868. {
  1869. union
  1870. {
  1871. struct
  1872. {
  1873. uint64_t m_t00 : 2;
  1874. uint64_t m_t01 : 2;
  1875. uint64_t m_t02 : 2;
  1876. uint64_t m_t03 : 2;
  1877. uint64_t m_t04 : 2;
  1878. uint64_t m_t05 : 2;
  1879. uint64_t m_t06 : 2;
  1880. uint64_t m_t07 : 2;
  1881. uint64_t m_t08 : 2;
  1882. uint64_t m_t09 : 2;
  1883. uint64_t m_t10 : 2;
  1884. uint64_t m_t11 : 2;
  1885. uint64_t m_t12 : 2;
  1886. uint64_t m_t13 : 2;
  1887. uint64_t m_t14 : 2;
  1888. uint64_t m_t15 : 2;
  1889. uint64_t m_t16 : 2;
  1890. uint64_t m_t17 : 2;
  1891. uint64_t m_t18 : 2;
  1892. uint64_t m_t19 : 2;
  1893. uint64_t m_t20 : 2;
  1894. uint64_t m_t21 : 2;
  1895. uint64_t m_t22 : 2;
  1896. uint64_t m_t23 : 2;
  1897. uint64_t m_t24 : 2;
  1898. uint64_t m_t25 : 2;
  1899. uint64_t m_t26 : 2;
  1900. uint64_t m_t27 : 2;
  1901. uint64_t m_t28 : 2;
  1902. uint64_t m_t29 : 2;
  1903. uint64_t m_t30 : 2;
  1904. uint64_t m_t31 : 2;
  1905. } m_lo;
  1906. uint64_t m_lo_bits;
  1907. uint8_t m_sels[8];
  1908. };
  1909. union
  1910. {
  1911. struct
  1912. {
  1913. #ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
  1914. uint64_t m_b1 : 5;
  1915. uint64_t m_g1 : 5;
  1916. uint64_t m_r1 : 5;
  1917. uint64_t m_b0 : 5;
  1918. uint64_t m_g0 : 5;
  1919. uint64_t m_r0 : 5;
  1920. uint64_t m_b3 : 5;
  1921. uint64_t m_g3 : 5;
  1922. uint64_t m_r3 : 5;
  1923. uint64_t m_b2 : 5;
  1924. uint64_t m_g2 : 5;
  1925. uint64_t m_r2 : 5;
  1926. #else
  1927. uint64_t m_b0 : 5;
  1928. uint64_t m_g0 : 5;
  1929. uint64_t m_r0 : 5;
  1930. uint64_t m_b1 : 5;
  1931. uint64_t m_g1 : 5;
  1932. uint64_t m_r1 : 5;
  1933. uint64_t m_b2 : 5;
  1934. uint64_t m_g2 : 5;
  1935. uint64_t m_r2 : 5;
  1936. uint64_t m_b3 : 5;
  1937. uint64_t m_g3 : 5;
  1938. uint64_t m_r3 : 5;
  1939. #endif
  1940. uint64_t m_alpha : 1;
  1941. uint64_t m_glsb : 2;
  1942. uint64_t m_mode : 1;
  1943. } m_hi;
  1944. uint64_t m_hi_bits;
  1945. };
  1946. };
  1947. static uint8_t conv_dxt1_to_fxt1_sels(uint32_t sels)
  1948. {
  1949. static uint8_t s_conv_table[16] = { 0, 3, 1, 2, 12, 15, 13, 14, 4, 7, 5, 6, 8, 11, 9, 10 };
  1950. return s_conv_table[sels & 15] | (s_conv_table[sels >> 4] << 4);
  1951. }
  1952. static void convert_etc1s_to_fxt1(void *pDst, const endpoint *pEndpoints, const selector *pSelectors, uint32_t fxt1_subblock)
  1953. {
  1954. fxt1_block* pBlock = static_cast<fxt1_block*>(pDst);
  1955. // CC_MIXED is basically DXT1 with different encoding tricks.
  1956. // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless.
  1957. // (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.)
  1958. dxt1_block blk;
  1959. convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false);
  1960. const uint32_t l = blk.get_low_color();
  1961. const uint32_t h = blk.get_high_color();
  1962. color32 color0((l >> 11) & 31, (l >> 5) & 63, l & 31, 255);
  1963. color32 color1((h >> 11) & 31, (h >> 5) & 63, h & 31, 255);
  1964. uint32_t g0 = color0.g & 1;
  1965. uint32_t g1 = color1.g & 1;
  1966. color0.g >>= 1;
  1967. color1.g >>= 1;
  1968. blk.m_selectors[0] = conv_dxt1_to_fxt1_sels(blk.m_selectors[0]);
  1969. blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]);
  1970. blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]);
  1971. blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]);
  1972. if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1))
  1973. {
  1974. std::swap(color0, color1);
  1975. std::swap(g0, g1);
  1976. blk.m_selectors[0] ^= 0xFF;
  1977. blk.m_selectors[1] ^= 0xFF;
  1978. blk.m_selectors[2] ^= 0xFF;
  1979. blk.m_selectors[3] ^= 0xFF;
  1980. }
  1981. if (fxt1_subblock == 0)
  1982. {
  1983. pBlock->m_hi.m_mode = 1;
  1984. pBlock->m_hi.m_alpha = 0;
  1985. pBlock->m_hi.m_glsb = g1 | (g1 << 1);
  1986. pBlock->m_hi.m_r0 = color0.r;
  1987. pBlock->m_hi.m_g0 = color0.g;
  1988. pBlock->m_hi.m_b0 = color0.b;
  1989. pBlock->m_hi.m_r1 = color1.r;
  1990. pBlock->m_hi.m_g1 = color1.g;
  1991. pBlock->m_hi.m_b1 = color1.b;
  1992. pBlock->m_hi.m_r2 = color0.r;
  1993. pBlock->m_hi.m_g2 = color0.g;
  1994. pBlock->m_hi.m_b2 = color0.b;
  1995. pBlock->m_hi.m_r3 = color1.r;
  1996. pBlock->m_hi.m_g3 = color1.g;
  1997. pBlock->m_hi.m_b3 = color1.b;
  1998. pBlock->m_sels[0] = blk.m_selectors[0];
  1999. pBlock->m_sels[1] = blk.m_selectors[1];
  2000. pBlock->m_sels[2] = blk.m_selectors[2];
  2001. pBlock->m_sels[3] = blk.m_selectors[3];
  2002. static const uint8_t s_border_dup[4] = { 0, 85, 170, 255 };
  2003. pBlock->m_sels[4] = s_border_dup[blk.m_selectors[0] >> 6];
  2004. pBlock->m_sels[5] = s_border_dup[blk.m_selectors[1] >> 6];
  2005. pBlock->m_sels[6] = s_border_dup[blk.m_selectors[2] >> 6];
  2006. pBlock->m_sels[7] = s_border_dup[blk.m_selectors[3] >> 6];
  2007. }
  2008. else
  2009. {
  2010. pBlock->m_hi.m_glsb = (pBlock->m_hi.m_glsb & 1) | (g1 << 1);
  2011. pBlock->m_hi.m_r2 = color0.r;
  2012. pBlock->m_hi.m_g2 = color0.g;
  2013. pBlock->m_hi.m_b2 = color0.b;
  2014. pBlock->m_hi.m_r3 = color1.r;
  2015. pBlock->m_hi.m_g3 = color1.g;
  2016. pBlock->m_hi.m_b3 = color1.b;
  2017. pBlock->m_sels[4] = blk.m_selectors[0];
  2018. pBlock->m_sels[5] = blk.m_selectors[1];
  2019. pBlock->m_sels[6] = blk.m_selectors[2];
  2020. pBlock->m_sels[7] = blk.m_selectors[3];
  2021. }
  2022. }
  2023. #endif // BASISD_SUPPORT_FXT1
  2024. #if BASISD_SUPPORT_DXT5A
  2025. static dxt_selector_range s_dxt5a_selector_ranges[] =
  2026. {
  2027. { 0, 3 },
  2028. { 1, 3 },
  2029. { 0, 2 },
  2030. { 1, 2 },
  2031. };
  2032. const uint32_t NUM_DXT5A_SELECTOR_RANGES = sizeof(s_dxt5a_selector_ranges) / sizeof(s_dxt5a_selector_ranges[0]);
  2033. struct etc1_g_to_dxt5a_conversion
  2034. {
  2035. uint8_t m_lo, m_hi;
  2036. uint16_t m_trans;
  2037. };
  2038. static etc1_g_to_dxt5a_conversion g_etc1_g_to_dxt5a[32 * 8][NUM_DXT5A_SELECTOR_RANGES] =
  2039. {
  2040. { { 8, 0, 393 },{ 8, 0, 392 },{ 2, 0, 9 },{ 2, 0, 8 }, }, { { 6, 16, 710 },{ 16, 6, 328 },{ 0, 10, 96 },{ 10, 6, 8 }, },
  2041. { { 28, 5, 1327 },{ 24, 14, 328 },{ 8, 18, 96 },{ 18, 14, 8 }, }, { { 36, 13, 1327 },{ 32, 22, 328 },{ 16, 26, 96 },{ 26, 22, 8 }, },
  2042. { { 45, 22, 1327 },{ 41, 31, 328 },{ 25, 35, 96 },{ 35, 31, 8 }, }, { { 53, 30, 1327 },{ 49, 39, 328 },{ 33, 43, 96 },{ 43, 39, 8 }, },
  2043. { { 61, 38, 1327 },{ 57, 47, 328 },{ 41, 51, 96 },{ 51, 47, 8 }, }, { { 69, 46, 1327 },{ 65, 55, 328 },{ 49, 59, 96 },{ 59, 55, 8 }, },
  2044. { { 78, 55, 1327 },{ 74, 64, 328 },{ 58, 68, 96 },{ 68, 64, 8 }, }, { { 86, 63, 1327 },{ 82, 72, 328 },{ 66, 76, 96 },{ 76, 72, 8 }, },
  2045. { { 94, 71, 1327 },{ 90, 80, 328 },{ 74, 84, 96 },{ 84, 80, 8 }, }, { { 102, 79, 1327 },{ 98, 88, 328 },{ 82, 92, 96 },{ 92, 88, 8 }, },
  2046. { { 111, 88, 1327 },{ 107, 97, 328 },{ 91, 101, 96 },{ 101, 97, 8 }, }, { { 119, 96, 1327 },{ 115, 105, 328 },{ 99, 109, 96 },{ 109, 105, 8 }, },
  2047. { { 127, 104, 1327 },{ 123, 113, 328 },{ 107, 117, 96 },{ 117, 113, 8 }, }, { { 135, 112, 1327 },{ 131, 121, 328 },{ 115, 125, 96 },{ 125, 121, 8 }, },
  2048. { { 144, 121, 1327 },{ 140, 130, 328 },{ 124, 134, 96 },{ 134, 130, 8 }, }, { { 152, 129, 1327 },{ 148, 138, 328 },{ 132, 142, 96 },{ 142, 138, 8 }, },
  2049. { { 160, 137, 1327 },{ 156, 146, 328 },{ 140, 150, 96 },{ 150, 146, 8 }, }, { { 168, 145, 1327 },{ 164, 154, 328 },{ 148, 158, 96 },{ 158, 154, 8 }, },
  2050. { { 177, 154, 1327 },{ 173, 163, 328 },{ 157, 167, 96 },{ 167, 163, 8 }, }, { { 185, 162, 1327 },{ 181, 171, 328 },{ 165, 175, 96 },{ 175, 171, 8 }, },
  2051. { { 193, 170, 1327 },{ 189, 179, 328 },{ 173, 183, 96 },{ 183, 179, 8 }, }, { { 201, 178, 1327 },{ 197, 187, 328 },{ 181, 191, 96 },{ 191, 187, 8 }, },
  2052. { { 210, 187, 1327 },{ 206, 196, 328 },{ 190, 200, 96 },{ 200, 196, 8 }, }, { { 218, 195, 1327 },{ 214, 204, 328 },{ 198, 208, 96 },{ 208, 204, 8 }, },
  2053. { { 226, 203, 1327 },{ 222, 212, 328 },{ 206, 216, 96 },{ 216, 212, 8 }, }, { { 234, 211, 1327 },{ 230, 220, 328 },{ 214, 224, 96 },{ 224, 220, 8 }, },
  2054. { { 243, 220, 1327 },{ 239, 229, 328 },{ 223, 233, 96 },{ 233, 229, 8 }, }, { { 251, 228, 1327 },{ 247, 237, 328 },{ 231, 241, 96 },{ 241, 237, 8 }, },
  2055. { { 239, 249, 3680 },{ 245, 249, 3648 },{ 239, 249, 96 },{ 249, 245, 8 }, }, { { 247, 253, 4040 },{ 255, 253, 8 },{ 247, 253, 456 },{ 255, 253, 8 }, },
  2056. { { 5, 17, 566 },{ 5, 17, 560 },{ 5, 0, 9 },{ 5, 0, 8 }, }, { { 25, 0, 313 },{ 25, 3, 328 },{ 13, 0, 49 },{ 13, 3, 8 }, },
  2057. { { 39, 0, 1329 },{ 33, 11, 328 },{ 11, 21, 70 },{ 21, 11, 8 }, }, { { 47, 7, 1329 },{ 41, 19, 328 },{ 29, 7, 33 },{ 29, 19, 8 }, },
  2058. { { 50, 11, 239 },{ 50, 28, 328 },{ 38, 16, 33 },{ 38, 28, 8 }, }, { { 92, 13, 2423 },{ 58, 36, 328 },{ 46, 24, 33 },{ 46, 36, 8 }, },
  2059. { { 100, 21, 2423 },{ 66, 44, 328 },{ 54, 32, 33 },{ 54, 44, 8 }, }, { { 86, 7, 1253 },{ 74, 52, 328 },{ 62, 40, 33 },{ 62, 52, 8 }, },
  2060. { { 95, 16, 1253 },{ 83, 61, 328 },{ 71, 49, 33 },{ 71, 61, 8 }, }, { { 103, 24, 1253 },{ 91, 69, 328 },{ 79, 57, 33 },{ 79, 69, 8 }, },
  2061. { { 111, 32, 1253 },{ 99, 77, 328 },{ 87, 65, 33 },{ 87, 77, 8 }, }, { { 119, 40, 1253 },{ 107, 85, 328 },{ 95, 73, 33 },{ 95, 85, 8 }, },
  2062. { { 128, 49, 1253 },{ 116, 94, 328 },{ 104, 82, 33 },{ 104, 94, 8 }, }, { { 136, 57, 1253 },{ 124, 102, 328 },{ 112, 90, 33 },{ 112, 102, 8 }, },
  2063. { { 144, 65, 1253 },{ 132, 110, 328 },{ 120, 98, 33 },{ 120, 110, 8 }, }, { { 152, 73, 1253 },{ 140, 118, 328 },{ 128, 106, 33 },{ 128, 118, 8 }, },
  2064. { { 161, 82, 1253 },{ 149, 127, 328 },{ 137, 115, 33 },{ 137, 127, 8 }, }, { { 169, 90, 1253 },{ 157, 135, 328 },{ 145, 123, 33 },{ 145, 135, 8 }, },
  2065. { { 177, 98, 1253 },{ 165, 143, 328 },{ 153, 131, 33 },{ 153, 143, 8 }, }, { { 185, 106, 1253 },{ 173, 151, 328 },{ 161, 139, 33 },{ 161, 151, 8 }, },
  2066. { { 194, 115, 1253 },{ 182, 160, 328 },{ 170, 148, 33 },{ 170, 160, 8 }, }, { { 202, 123, 1253 },{ 190, 168, 328 },{ 178, 156, 33 },{ 178, 168, 8 }, },
  2067. { { 210, 131, 1253 },{ 198, 176, 328 },{ 186, 164, 33 },{ 186, 176, 8 }, }, { { 218, 139, 1253 },{ 206, 184, 328 },{ 194, 172, 33 },{ 194, 184, 8 }, },
  2068. { { 227, 148, 1253 },{ 215, 193, 328 },{ 203, 181, 33 },{ 203, 193, 8 }, }, { { 235, 156, 1253 },{ 223, 201, 328 },{ 211, 189, 33 },{ 211, 201, 8 }, },
  2069. { { 243, 164, 1253 },{ 231, 209, 328 },{ 219, 197, 33 },{ 219, 209, 8 }, }, { { 183, 239, 867 },{ 239, 217, 328 },{ 227, 205, 33 },{ 227, 217, 8 }, },
  2070. { { 254, 214, 1329 },{ 248, 226, 328 },{ 236, 214, 33 },{ 236, 226, 8 }, }, { { 222, 244, 3680 },{ 234, 244, 3648 },{ 244, 222, 33 },{ 244, 234, 8 }, },
  2071. { { 230, 252, 3680 },{ 242, 252, 3648 },{ 252, 230, 33 },{ 252, 242, 8 }, }, { { 238, 250, 4040 },{ 255, 250, 8 },{ 238, 250, 456 },{ 255, 250, 8 }, },
  2072. { { 9, 29, 566 },{ 9, 29, 560 },{ 9, 0, 9 },{ 9, 0, 8 }, }, { { 17, 37, 566 },{ 17, 37, 560 },{ 17, 0, 9 },{ 17, 0, 8 }, },
  2073. { { 45, 0, 313 },{ 45, 0, 312 },{ 25, 0, 49 },{ 25, 7, 8 }, }, { { 14, 63, 2758 },{ 5, 53, 784 },{ 15, 33, 70 },{ 33, 15, 8 }, },
  2074. { { 71, 6, 1329 },{ 72, 4, 1328 },{ 42, 4, 33 },{ 42, 24, 8 }, }, { { 70, 3, 239 },{ 70, 2, 232 },{ 50, 12, 33 },{ 50, 32, 8 }, },
  2075. { { 0, 98, 2842 },{ 78, 10, 232 },{ 58, 20, 33 },{ 58, 40, 8 }, }, { { 97, 27, 1329 },{ 86, 18, 232 },{ 66, 28, 33 },{ 66, 48, 8 }, },
  2076. { { 0, 94, 867 },{ 95, 27, 232 },{ 75, 37, 33 },{ 75, 57, 8 }, }, { { 8, 102, 867 },{ 103, 35, 232 },{ 83, 45, 33 },{ 83, 65, 8 }, },
  2077. { { 12, 112, 867 },{ 111, 43, 232 },{ 91, 53, 33 },{ 91, 73, 8 }, }, { { 139, 2, 1253 },{ 119, 51, 232 },{ 99, 61, 33 },{ 99, 81, 8 }, },
  2078. { { 148, 13, 1253 },{ 128, 60, 232 },{ 108, 70, 33 },{ 108, 90, 8 }, }, { { 156, 21, 1253 },{ 136, 68, 232 },{ 116, 78, 33 },{ 116, 98, 8 }, },
  2079. { { 164, 29, 1253 },{ 144, 76, 232 },{ 124, 86, 33 },{ 124, 106, 8 }, }, { { 172, 37, 1253 },{ 152, 84, 232 },{ 132, 94, 33 },{ 132, 114, 8 }, },
  2080. { { 181, 46, 1253 },{ 161, 93, 232 },{ 141, 103, 33 },{ 141, 123, 8 }, }, { { 189, 54, 1253 },{ 169, 101, 232 },{ 149, 111, 33 },{ 149, 131, 8 }, },
  2081. { { 197, 62, 1253 },{ 177, 109, 232 },{ 157, 119, 33 },{ 157, 139, 8 }, }, { { 205, 70, 1253 },{ 185, 117, 232 },{ 165, 127, 33 },{ 165, 147, 8 }, },
  2082. { { 214, 79, 1253 },{ 194, 126, 232 },{ 174, 136, 33 },{ 174, 156, 8 }, }, { { 222, 87, 1253 },{ 202, 134, 232 },{ 182, 144, 33 },{ 182, 164, 8 }, },
  2083. { { 230, 95, 1253 },{ 210, 142, 232 },{ 190, 152, 33 },{ 190, 172, 8 }, }, { { 238, 103, 1253 },{ 218, 150, 232 },{ 198, 160, 33 },{ 198, 180, 8 }, },
  2084. { { 247, 112, 1253 },{ 227, 159, 232 },{ 207, 169, 33 },{ 207, 189, 8 }, }, { { 255, 120, 1253 },{ 235, 167, 232 },{ 215, 177, 33 },{ 215, 197, 8 }, },
  2085. { { 146, 243, 867 },{ 243, 175, 232 },{ 223, 185, 33 },{ 223, 205, 8 }, }, { { 184, 231, 3682 },{ 203, 251, 784 },{ 231, 193, 33 },{ 231, 213, 8 }, },
  2086. { { 193, 240, 3682 },{ 222, 240, 3648 },{ 240, 202, 33 },{ 240, 222, 8 }, }, { { 255, 210, 169 },{ 230, 248, 3648 },{ 248, 210, 33 },{ 248, 230, 8 }, },
  2087. { { 218, 238, 4040 },{ 255, 238, 8 },{ 218, 238, 456 },{ 255, 238, 8 }, }, { { 226, 246, 4040 },{ 255, 246, 8 },{ 226, 246, 456 },{ 255, 246, 8 }, },
  2088. { { 13, 42, 566 },{ 13, 42, 560 },{ 13, 0, 9 },{ 13, 0, 8 }, }, { { 50, 0, 329 },{ 50, 0, 328 },{ 21, 0, 9 },{ 21, 0, 8 }, },
  2089. { { 29, 58, 566 },{ 67, 2, 1352 },{ 3, 29, 70 },{ 29, 3, 8 }, }, { { 10, 79, 2758 },{ 76, 11, 1352 },{ 11, 37, 70 },{ 37, 11, 8 }, },
  2090. { { 7, 75, 790 },{ 7, 75, 784 },{ 20, 46, 70 },{ 46, 20, 8 }, }, { { 15, 83, 790 },{ 97, 1, 1328 },{ 28, 54, 70 },{ 54, 28, 8 }, },
  2091. { { 101, 7, 1329 },{ 105, 9, 1328 },{ 62, 0, 39 },{ 62, 36, 8 }, }, { { 99, 1, 239 },{ 99, 3, 232 },{ 1, 71, 98 },{ 70, 44, 8 }, },
  2092. { { 107, 11, 239 },{ 108, 12, 232 },{ 10, 80, 98 },{ 79, 53, 8 }, }, { { 115, 19, 239 },{ 116, 20, 232 },{ 18, 88, 98 },{ 87, 61, 8 }, },
  2093. { { 123, 27, 239 },{ 124, 28, 232 },{ 26, 96, 98 },{ 95, 69, 8 }, }, { { 131, 35, 239 },{ 132, 36, 232 },{ 34, 104, 98 },{ 103, 77, 8 }, },
  2094. { { 140, 44, 239 },{ 141, 45, 232 },{ 43, 113, 98 },{ 112, 86, 8 }, }, { { 148, 52, 239 },{ 149, 53, 232 },{ 51, 121, 98 },{ 120, 94, 8 }, },
  2095. { { 156, 60, 239 },{ 157, 61, 232 },{ 59, 129, 98 },{ 128, 102, 8 }, }, { { 164, 68, 239 },{ 165, 69, 232 },{ 67, 137, 98 },{ 136, 110, 8 }, },
  2096. { { 173, 77, 239 },{ 174, 78, 232 },{ 76, 146, 98 },{ 145, 119, 8 }, }, { { 181, 85, 239 },{ 182, 86, 232 },{ 84, 154, 98 },{ 153, 127, 8 }, },
  2097. { { 189, 93, 239 },{ 190, 94, 232 },{ 92, 162, 98 },{ 161, 135, 8 }, }, { { 197, 101, 239 },{ 198, 102, 232 },{ 100, 170, 98 },{ 169, 143, 8 }, },
  2098. { { 206, 110, 239 },{ 207, 111, 232 },{ 109, 179, 98 },{ 178, 152, 8 }, }, { { 214, 118, 239 },{ 215, 119, 232 },{ 117, 187, 98 },{ 186, 160, 8 }, },
  2099. { { 222, 126, 239 },{ 223, 127, 232 },{ 125, 195, 98 },{ 194, 168, 8 }, }, { { 230, 134, 239 },{ 231, 135, 232 },{ 133, 203, 98 },{ 202, 176, 8 }, },
  2100. { { 239, 143, 239 },{ 240, 144, 232 },{ 142, 212, 98 },{ 211, 185, 8 }, }, { { 247, 151, 239 },{ 180, 248, 784 },{ 150, 220, 98 },{ 219, 193, 8 }, },
  2101. { { 159, 228, 3682 },{ 201, 227, 3648 },{ 158, 228, 98 },{ 227, 201, 8 }, }, { { 181, 249, 3928 },{ 209, 235, 3648 },{ 166, 236, 98 },{ 235, 209, 8 }, },
  2102. { { 255, 189, 169 },{ 218, 244, 3648 },{ 175, 245, 98 },{ 244, 218, 8 }, }, { { 197, 226, 4040 },{ 226, 252, 3648 },{ 183, 253, 98 },{ 252, 226, 8 }, },
  2103. { { 205, 234, 4040 },{ 255, 234, 8 },{ 205, 234, 456 },{ 255, 234, 8 }, }, { { 213, 242, 4040 },{ 255, 242, 8 },{ 213, 242, 456 },{ 255, 242, 8 }, },
  2104. { { 18, 60, 566 },{ 18, 60, 560 },{ 18, 0, 9 },{ 18, 0, 8 }, }, { { 26, 68, 566 },{ 26, 68, 560 },{ 26, 0, 9 },{ 26, 0, 8 }, },
  2105. { { 34, 76, 566 },{ 34, 76, 560 },{ 34, 0, 9 },{ 34, 0, 8 }, }, { { 5, 104, 2758 },{ 98, 5, 1352 },{ 42, 0, 57 },{ 42, 6, 8 }, },
  2106. { { 92, 0, 313 },{ 93, 1, 312 },{ 15, 51, 70 },{ 51, 15, 8 }, }, { { 3, 101, 790 },{ 3, 101, 784 },{ 0, 59, 88 },{ 59, 23, 8 }, },
  2107. { { 14, 107, 790 },{ 11, 109, 784 },{ 31, 67, 70 },{ 67, 31, 8 }, }, { { 19, 117, 790 },{ 19, 117, 784 },{ 39, 75, 70 },{ 75, 39, 8 }, },
  2108. { { 28, 126, 790 },{ 28, 126, 784 },{ 83, 5, 33 },{ 84, 48, 8 }, }, { { 132, 0, 239 },{ 36, 134, 784 },{ 91, 13, 33 },{ 92, 56, 8 }, },
  2109. { { 142, 4, 239 },{ 44, 142, 784 },{ 99, 21, 33 },{ 100, 64, 8 }, }, { { 150, 12, 239 },{ 52, 150, 784 },{ 107, 29, 33 },{ 108, 72, 8 }, },
  2110. { { 159, 21, 239 },{ 61, 159, 784 },{ 116, 38, 33 },{ 117, 81, 8 }, }, { { 167, 29, 239 },{ 69, 167, 784 },{ 124, 46, 33 },{ 125, 89, 8 }, },
  2111. { { 175, 37, 239 },{ 77, 175, 784 },{ 132, 54, 33 },{ 133, 97, 8 }, }, { { 183, 45, 239 },{ 85, 183, 784 },{ 140, 62, 33 },{ 141, 105, 8 }, },
  2112. { { 192, 54, 239 },{ 94, 192, 784 },{ 149, 71, 33 },{ 150, 114, 8 }, }, { { 200, 62, 239 },{ 102, 200, 784 },{ 157, 79, 33 },{ 158, 122, 8 }, },
  2113. { { 208, 70, 239 },{ 110, 208, 784 },{ 165, 87, 33 },{ 166, 130, 8 }, }, { { 216, 78, 239 },{ 118, 216, 784 },{ 173, 95, 33 },{ 174, 138, 8 }, },
  2114. { { 225, 87, 239 },{ 127, 225, 784 },{ 182, 104, 33 },{ 183, 147, 8 }, }, { { 233, 95, 239 },{ 135, 233, 784 },{ 190, 112, 33 },{ 191, 155, 8 }, },
  2115. { { 241, 103, 239 },{ 143, 241, 784 },{ 198, 120, 33 },{ 199, 163, 8 }, }, { { 111, 208, 3682 },{ 151, 249, 784 },{ 206, 128, 33 },{ 207, 171, 8 }, },
  2116. { { 120, 217, 3682 },{ 180, 216, 3648 },{ 215, 137, 33 },{ 216, 180, 8 }, }, { { 128, 225, 3682 },{ 188, 224, 3648 },{ 223, 145, 33 },{ 224, 188, 8 }, },
  2117. { { 155, 253, 3928 },{ 196, 232, 3648 },{ 231, 153, 33 },{ 232, 196, 8 }, }, { { 144, 241, 3682 },{ 204, 240, 3648 },{ 239, 161, 33 },{ 240, 204, 8 }, },
  2118. { { 153, 250, 3682 },{ 213, 249, 3648 },{ 248, 170, 33 },{ 249, 213, 8 }, }, { { 179, 221, 4040 },{ 255, 221, 8 },{ 179, 221, 456 },{ 255, 221, 8 }, },
  2119. { { 187, 229, 4040 },{ 255, 229, 8 },{ 187, 229, 456 },{ 255, 229, 8 }, }, { { 195, 237, 4040 },{ 255, 237, 8 },{ 195, 237, 456 },{ 255, 237, 8 }, },
  2120. { { 24, 80, 566 },{ 24, 80, 560 },{ 24, 0, 9 },{ 24, 0, 8 }, }, { { 32, 88, 566 },{ 32, 88, 560 },{ 32, 0, 9 },{ 32, 0, 8 }, },
  2121. { { 40, 96, 566 },{ 40, 96, 560 },{ 40, 0, 9 },{ 40, 0, 8 }, }, { { 48, 104, 566 },{ 48, 104, 560 },{ 48, 0, 9 },{ 48, 0, 8 }, },
  2122. { { 9, 138, 2758 },{ 130, 7, 1352 },{ 9, 57, 70 },{ 57, 9, 8 }, }, { { 119, 0, 313 },{ 120, 0, 312 },{ 17, 65, 70 },{ 65, 17, 8 }, },
  2123. { { 0, 128, 784 },{ 128, 6, 312 },{ 25, 73, 70 },{ 73, 25, 8 }, }, { { 6, 137, 790 },{ 5, 136, 784 },{ 33, 81, 70 },{ 81, 33, 8 }, },
  2124. { { 42, 171, 2758 },{ 14, 145, 784 },{ 42, 90, 70 },{ 90, 42, 8 }, }, { { 50, 179, 2758 },{ 22, 153, 784 },{ 50, 98, 70 },{ 98, 50, 8 }, },
  2125. { { 58, 187, 2758 },{ 30, 161, 784 },{ 58, 106, 70 },{ 106, 58, 8 }, }, { { 191, 18, 1329 },{ 38, 169, 784 },{ 112, 9, 33 },{ 114, 66, 8 }, },
  2126. { { 176, 0, 239 },{ 47, 178, 784 },{ 121, 18, 33 },{ 123, 75, 8 }, }, { { 187, 1, 239 },{ 55, 186, 784 },{ 129, 26, 33 },{ 131, 83, 8 }, },
  2127. { { 195, 10, 239 },{ 63, 194, 784 },{ 137, 34, 33 },{ 139, 91, 8 }, }, { { 203, 18, 239 },{ 71, 202, 784 },{ 145, 42, 33 },{ 147, 99, 8 }, },
  2128. { { 212, 27, 239 },{ 80, 211, 784 },{ 154, 51, 33 },{ 156, 108, 8 }, }, { { 220, 35, 239 },{ 88, 219, 784 },{ 162, 59, 33 },{ 164, 116, 8 }, },
  2129. { { 228, 43, 239 },{ 96, 227, 784 },{ 170, 67, 33 },{ 172, 124, 8 }, }, { { 236, 51, 239 },{ 104, 235, 784 },{ 178, 75, 33 },{ 180, 132, 8 }, },
  2130. { { 245, 60, 239 },{ 113, 244, 784 },{ 187, 84, 33 },{ 189, 141, 8 }, }, { { 91, 194, 3680 },{ 149, 197, 3648 },{ 195, 92, 33 },{ 197, 149, 8 }, },
  2131. { { 99, 202, 3680 },{ 157, 205, 3648 },{ 203, 100, 33 },{ 205, 157, 8 }, }, { { 107, 210, 3680 },{ 165, 213, 3648 },{ 211, 108, 33 },{ 213, 165, 8 }, },
  2132. { { 119, 249, 3928 },{ 174, 222, 3648 },{ 220, 117, 33 },{ 222, 174, 8 }, }, { { 127, 255, 856 },{ 182, 230, 3648 },{ 228, 125, 33 },{ 230, 182, 8 }, },
  2133. { { 255, 135, 169 },{ 190, 238, 3648 },{ 236, 133, 33 },{ 238, 190, 8 }, }, { { 140, 243, 3680 },{ 198, 246, 3648 },{ 244, 141, 33 },{ 246, 198, 8 }, },
  2134. { { 151, 207, 4040 },{ 255, 207, 8 },{ 151, 207, 456 },{ 255, 207, 8 }, }, { { 159, 215, 4040 },{ 255, 215, 8 },{ 159, 215, 456 },{ 255, 215, 8 }, },
  2135. { { 167, 223, 4040 },{ 255, 223, 8 },{ 167, 223, 456 },{ 255, 223, 8 }, }, { { 175, 231, 4040 },{ 255, 231, 8 },{ 175, 231, 456 },{ 255, 231, 8 }, },
  2136. { { 33, 106, 566 },{ 33, 106, 560 },{ 33, 0, 9 },{ 33, 0, 8 }, }, { { 41, 114, 566 },{ 41, 114, 560 },{ 41, 0, 9 },{ 41, 0, 8 }, },
  2137. { { 49, 122, 566 },{ 49, 122, 560 },{ 49, 0, 9 },{ 49, 0, 8 }, }, { { 57, 130, 566 },{ 57, 130, 560 },{ 57, 0, 9 },{ 57, 0, 8 }, },
  2138. { { 66, 139, 566 },{ 66, 139, 560 },{ 66, 0, 9 },{ 66, 0, 8 }, }, { { 74, 147, 566 },{ 170, 7, 1352 },{ 8, 74, 70 },{ 74, 8, 8 }, },
  2139. { { 152, 0, 313 },{ 178, 15, 1352 },{ 0, 82, 80 },{ 82, 16, 8 }, }, { { 162, 0, 313 },{ 186, 23, 1352 },{ 24, 90, 70 },{ 90, 24, 8 }, },
  2140. { { 0, 171, 784 },{ 195, 32, 1352 },{ 33, 99, 70 },{ 99, 33, 8 }, }, { { 6, 179, 790 },{ 203, 40, 1352 },{ 41, 107, 70 },{ 107, 41, 8 }, },
  2141. { { 15, 187, 790 },{ 211, 48, 1352 },{ 115, 0, 41 },{ 115, 49, 8 }, }, { { 61, 199, 710 },{ 219, 56, 1352 },{ 57, 123, 70 },{ 123, 57, 8 }, },
  2142. { { 70, 208, 710 },{ 228, 65, 1352 },{ 66, 132, 70 },{ 132, 66, 8 }, }, { { 78, 216, 710 },{ 236, 73, 1352 },{ 74, 140, 70 },{ 140, 74, 8 }, },
  2143. { { 86, 224, 710 },{ 244, 81, 1352 },{ 145, 7, 33 },{ 148, 82, 8 }, }, { { 222, 8, 233 },{ 252, 89, 1352 },{ 153, 15, 33 },{ 156, 90, 8 }, },
  2144. { { 235, 0, 239 },{ 241, 101, 328 },{ 166, 6, 39 },{ 165, 99, 8 }, }, { { 32, 170, 3680 },{ 249, 109, 328 },{ 0, 175, 98 },{ 173, 107, 8 }, },
  2145. { { 40, 178, 3680 },{ 115, 181, 3648 },{ 8, 183, 98 },{ 181, 115, 8 }, }, { { 48, 186, 3680 },{ 123, 189, 3648 },{ 16, 191, 98 },{ 189, 123, 8 }, },
  2146. { { 57, 195, 3680 },{ 132, 198, 3648 },{ 25, 200, 98 },{ 198, 132, 8 }, }, { { 67, 243, 3928 },{ 140, 206, 3648 },{ 33, 208, 98 },{ 206, 140, 8 }, },
  2147. { { 76, 251, 3928 },{ 148, 214, 3648 },{ 41, 216, 98 },{ 214, 148, 8 }, }, { { 86, 255, 856 },{ 156, 222, 3648 },{ 49, 224, 98 },{ 222, 156, 8 }, },
  2148. { { 255, 93, 169 },{ 165, 231, 3648 },{ 58, 233, 98 },{ 231, 165, 8 }, }, { { 98, 236, 3680 },{ 173, 239, 3648 },{ 66, 241, 98 },{ 239, 173, 8 }, },
  2149. { { 108, 181, 4040 },{ 181, 247, 3648 },{ 74, 249, 98 },{ 247, 181, 8 }, }, { { 116, 189, 4040 },{ 255, 189, 8 },{ 116, 189, 456 },{ 255, 189, 8 }, },
  2150. { { 125, 198, 4040 },{ 255, 198, 8 },{ 125, 198, 456 },{ 255, 198, 8 }, }, { { 133, 206, 4040 },{ 255, 206, 8 },{ 133, 206, 456 },{ 255, 206, 8 }, },
  2151. { { 141, 214, 4040 },{ 255, 214, 8 },{ 141, 214, 456 },{ 255, 214, 8 }, }, { { 149, 222, 4040 },{ 255, 222, 8 },{ 149, 222, 456 },{ 255, 222, 8 }, },
  2152. { { 47, 183, 566 },{ 47, 183, 560 },{ 47, 0, 9 },{ 47, 0, 8 }, }, { { 55, 191, 566 },{ 55, 191, 560 },{ 55, 0, 9 },{ 55, 0, 8 }, },
  2153. { { 63, 199, 566 },{ 63, 199, 560 },{ 63, 0, 9 },{ 63, 0, 8 }, }, { { 71, 207, 566 },{ 71, 207, 560 },{ 71, 0, 9 },{ 71, 0, 8 }, },
  2154. { { 80, 216, 566 },{ 80, 216, 560 },{ 80, 0, 9 },{ 80, 0, 8 }, }, { { 88, 224, 566 },{ 88, 224, 560 },{ 88, 0, 9 },{ 88, 0, 8 }, },
  2155. { { 3, 233, 710 },{ 3, 233, 704 },{ 2, 96, 70 },{ 96, 2, 8 }, }, { { 11, 241, 710 },{ 11, 241, 704 },{ 10, 104, 70 },{ 104, 10, 8 }, },
  2156. { { 20, 250, 710 },{ 20, 250, 704 },{ 19, 113, 70 },{ 113, 19, 8 }, }, { { 27, 121, 3654 },{ 27, 121, 3648 },{ 27, 121, 70 },{ 121, 27, 8 }, },
  2157. { { 35, 129, 3654 },{ 35, 129, 3648 },{ 35, 129, 70 },{ 129, 35, 8 }, }, { { 43, 137, 3654 },{ 43, 137, 3648 },{ 43, 137, 70 },{ 137, 43, 8 }, },
  2158. { { 52, 146, 3654 },{ 52, 146, 3648 },{ 52, 146, 70 },{ 146, 52, 8 }, }, { { 60, 154, 3654 },{ 60, 154, 3648 },{ 60, 154, 70 },{ 154, 60, 8 }, },
  2159. { { 68, 162, 3654 },{ 68, 162, 3648 },{ 68, 162, 70 },{ 162, 68, 8 }, }, { { 76, 170, 3654 },{ 76, 170, 3648 },{ 76, 170, 70 },{ 170, 76, 8 }, },
  2160. { { 85, 179, 3654 },{ 85, 179, 3648 },{ 85, 179, 70 },{ 179, 85, 8 }, }, { { 93, 187, 3654 },{ 93, 187, 3648 },{ 93, 187, 70 },{ 187, 93, 8 }, },
  2161. { { 101, 195, 3654 },{ 101, 195, 3648 },{ 101, 195, 70 },{ 195, 101, 8 }, }, { { 109, 203, 3654 },{ 109, 203, 3648 },{ 109, 203, 70 },{ 203, 109, 8 }, },
  2162. { { 118, 212, 3654 },{ 118, 212, 3648 },{ 118, 212, 70 },{ 212, 118, 8 }, }, { { 126, 220, 3654 },{ 126, 220, 3648 },{ 126, 220, 70 },{ 220, 126, 8 }, },
  2163. { { 134, 228, 3654 },{ 134, 228, 3648 },{ 134, 228, 70 },{ 228, 134, 8 }, }, { { 5, 236, 3680 },{ 142, 236, 3648 },{ 5, 236, 96 },{ 236, 142, 8 }, },
  2164. { { 14, 245, 3680 },{ 151, 245, 3648 },{ 14, 245, 96 },{ 245, 151, 8 }, }, { { 23, 159, 4040 },{ 159, 253, 3648 },{ 23, 159, 456 },{ 253, 159, 8 }, },
  2165. { { 31, 167, 4040 },{ 255, 167, 8 },{ 31, 167, 456 },{ 255, 167, 8 }, }, { { 39, 175, 4040 },{ 255, 175, 8 },{ 39, 175, 456 },{ 255, 175, 8 }, },
  2166. { { 48, 184, 4040 },{ 255, 184, 8 },{ 48, 184, 456 },{ 255, 184, 8 }, }, { { 56, 192, 4040 },{ 255, 192, 8 },{ 56, 192, 456 },{ 255, 192, 8 }, },
  2167. { { 64, 200, 4040 },{ 255, 200, 8 },{ 64, 200, 456 },{ 255, 200, 8 }, },{ { 72, 208, 4040 },{ 255, 208, 8 },{ 72, 208, 456 },{ 255, 208, 8 }, },
  2168. };
  2169. struct dxt5a_block
  2170. {
  2171. uint8_t m_endpoints[2];
  2172. enum { cTotalSelectorBytes = 6 };
  2173. uint8_t m_selectors[cTotalSelectorBytes];
  2174. inline void clear()
  2175. {
  2176. basisu::clear_obj(*this);
  2177. }
  2178. inline uint32_t get_low_alpha() const
  2179. {
  2180. return m_endpoints[0];
  2181. }
  2182. inline uint32_t get_high_alpha() const
  2183. {
  2184. return m_endpoints[1];
  2185. }
  2186. inline void set_low_alpha(uint32_t i)
  2187. {
  2188. assert(i <= UINT8_MAX);
  2189. m_endpoints[0] = static_cast<uint8_t>(i);
  2190. }
  2191. inline void set_high_alpha(uint32_t i)
  2192. {
  2193. assert(i <= UINT8_MAX);
  2194. m_endpoints[1] = static_cast<uint8_t>(i);
  2195. }
  2196. inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
  2197. uint32_t get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); }
  2198. uint32_t get_selectors_as_word(uint32_t index) { assert(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); }
  2199. inline uint32_t get_selector(uint32_t x, uint32_t y) const
  2200. {
  2201. assert((x < 4U) && (y < 4U));
  2202. uint32_t selector_index = (y * 4) + x;
  2203. uint32_t bit_index = selector_index * cDXT5SelectorBits;
  2204. uint32_t byte_index = bit_index >> 3;
  2205. uint32_t bit_ofs = bit_index & 7;
  2206. uint32_t v = m_selectors[byte_index];
  2207. if (byte_index < (cTotalSelectorBytes - 1))
  2208. v |= (m_selectors[byte_index + 1] << 8);
  2209. return (v >> bit_ofs) & 7;
  2210. }
  2211. inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
  2212. {
  2213. assert((x < 4U) && (y < 4U) && (val < 8U));
  2214. uint32_t selector_index = (y * 4) + x;
  2215. uint32_t bit_index = selector_index * cDXT5SelectorBits;
  2216. uint32_t byte_index = bit_index >> 3;
  2217. uint32_t bit_ofs = bit_index & 7;
  2218. uint32_t v = m_selectors[byte_index];
  2219. if (byte_index < (cTotalSelectorBytes - 1))
  2220. v |= (m_selectors[byte_index + 1] << 8);
  2221. v &= (~(7 << bit_ofs));
  2222. v |= (val << bit_ofs);
  2223. m_selectors[byte_index] = static_cast<uint8_t>(v);
  2224. if (byte_index < (cTotalSelectorBytes - 1))
  2225. m_selectors[byte_index + 1] = static_cast<uint8_t>(v >> 8);
  2226. }
  2227. enum { cMaxSelectorValues = 8 };
  2228. static uint32_t get_block_values6(color32* pDst, uint32_t l, uint32_t h)
  2229. {
  2230. pDst[0].a = static_cast<uint8_t>(l);
  2231. pDst[1].a = static_cast<uint8_t>(h);
  2232. pDst[2].a = static_cast<uint8_t>((l * 4 + h) / 5);
  2233. pDst[3].a = static_cast<uint8_t>((l * 3 + h * 2) / 5);
  2234. pDst[4].a = static_cast<uint8_t>((l * 2 + h * 3) / 5);
  2235. pDst[5].a = static_cast<uint8_t>((l + h * 4) / 5);
  2236. pDst[6].a = 0;
  2237. pDst[7].a = 255;
  2238. return 6;
  2239. }
  2240. static uint32_t get_block_values8(color32* pDst, uint32_t l, uint32_t h)
  2241. {
  2242. pDst[0].a = static_cast<uint8_t>(l);
  2243. pDst[1].a = static_cast<uint8_t>(h);
  2244. pDst[2].a = static_cast<uint8_t>((l * 6 + h) / 7);
  2245. pDst[3].a = static_cast<uint8_t>((l * 5 + h * 2) / 7);
  2246. pDst[4].a = static_cast<uint8_t>((l * 4 + h * 3) / 7);
  2247. pDst[5].a = static_cast<uint8_t>((l * 3 + h * 4) / 7);
  2248. pDst[6].a = static_cast<uint8_t>((l * 2 + h * 5) / 7);
  2249. pDst[7].a = static_cast<uint8_t>((l + h * 6) / 7);
  2250. return 8;
  2251. }
  2252. static uint32_t get_block_values(color32* pDst, uint32_t l, uint32_t h)
  2253. {
  2254. if (l > h)
  2255. return get_block_values8(pDst, l, h);
  2256. else
  2257. return get_block_values6(pDst, l, h);
  2258. }
  2259. };
  2260. static void convert_etc1s_to_dxt5a(dxt5a_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
  2261. {
  2262. const uint32_t low_selector = pSelector->m_lo_selector;
  2263. const uint32_t high_selector = pSelector->m_hi_selector;
  2264. const color32& base_color = pEndpoints->m_color5;
  2265. const uint32_t inten_table = pEndpoints->m_inten5;
  2266. if (low_selector == high_selector)
  2267. {
  2268. uint32_t r;
  2269. decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
  2270. pDst_block->set_low_alpha(r);
  2271. pDst_block->set_high_alpha(r);
  2272. pDst_block->m_selectors[0] = 0;
  2273. pDst_block->m_selectors[1] = 0;
  2274. pDst_block->m_selectors[2] = 0;
  2275. pDst_block->m_selectors[3] = 0;
  2276. pDst_block->m_selectors[4] = 0;
  2277. pDst_block->m_selectors[5] = 0;
  2278. return;
  2279. }
  2280. else if (pSelector->m_num_unique_selectors == 2)
  2281. {
  2282. color32 block_colors[4];
  2283. decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
  2284. const uint32_t r0 = block_colors[low_selector].r;
  2285. const uint32_t r1 = block_colors[high_selector].r;
  2286. pDst_block->set_low_alpha(r0);
  2287. pDst_block->set_high_alpha(r1);
  2288. // TODO: Optimize this
  2289. for (uint32_t y = 0; y < 4; y++)
  2290. {
  2291. for (uint32_t x = 0; x < 4; x++)
  2292. {
  2293. uint32_t s = pSelector->get_selector(x, y);
  2294. pDst_block->set_selector(x, y, (s == high_selector) ? 1 : 0);
  2295. }
  2296. }
  2297. return;
  2298. }
  2299. uint32_t selector_range_table = 0;
  2300. for (selector_range_table = 0; selector_range_table < NUM_DXT5A_SELECTOR_RANGES; selector_range_table++)
  2301. if ((low_selector == s_dxt5a_selector_ranges[selector_range_table].m_low) && (high_selector == s_dxt5a_selector_ranges[selector_range_table].m_high))
  2302. break;
  2303. if (selector_range_table >= NUM_DXT5A_SELECTOR_RANGES)
  2304. selector_range_table = 0;
  2305. const etc1_g_to_dxt5a_conversion* pTable_entry = &g_etc1_g_to_dxt5a[base_color.r + inten_table * 32][selector_range_table];
  2306. pDst_block->set_low_alpha(pTable_entry->m_lo);
  2307. pDst_block->set_high_alpha(pTable_entry->m_hi);
  2308. // TODO: Optimize this (like ETC1->BC1)
  2309. for (uint32_t y = 0; y < 4; y++)
  2310. {
  2311. for (uint32_t x = 0; x < 4; x++)
  2312. {
  2313. uint32_t s = pSelector->get_selector(x, y);
  2314. uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
  2315. pDst_block->set_selector(x, y, ds);
  2316. }
  2317. }
  2318. }
  2319. #endif //BASISD_SUPPORT_DXT5A
  2320. // PVRTC
  2321. #if BASISD_SUPPORT_PVRTC1 || BASISD_SUPPORT_UASTC
  2322. static const uint16_t g_pvrtc_swizzle_table[256] =
  2323. {
  2324. 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
  2325. 0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
  2326. 0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
  2327. 0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
  2328. 0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
  2329. 0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
  2330. 0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
  2331. 0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
  2332. };
  2333. // Note we can't use simple calculations to convert PVRTC1 encoded endpoint components to/from 8-bits, due to hardware approximations.
  2334. static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 };
  2335. static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 };
  2336. static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 };
  2337. static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 };
  2338. static const uint8_t g_pvrtc_5_floor[256] =
  2339. {
  2340. 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,
  2341. 3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,
  2342. 7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
  2343. 11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,
  2344. 15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,
  2345. 19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,
  2346. 23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,
  2347. 27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31
  2348. };
  2349. static const uint8_t g_pvrtc_5_ceil[256] =
  2350. {
  2351. 0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,
  2352. 4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,
  2353. 8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12,
  2354. 12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16,
  2355. 16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20,
  2356. 20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24,
  2357. 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,
  2358. 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31
  2359. };
  2360. static const uint8_t g_pvrtc_4_floor[256] =
  2361. {
  2362. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2363. 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
  2364. 3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  2365. 5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,
  2366. 7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,
  2367. 9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,
  2368. 11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,
  2369. 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15
  2370. };
  2371. static const uint8_t g_pvrtc_4_ceil[256] =
  2372. {
  2373. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  2374. 2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  2375. 4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,
  2376. 6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,
  2377. 8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,
  2378. 10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,
  2379. 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,
  2380. 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
  2381. };
  2382. static const uint8_t g_pvrtc_3_floor[256] =
  2383. {
  2384. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  2385. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2386. 1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  2387. 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
  2388. 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,
  2389. 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
  2390. 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,
  2391. 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7
  2392. };
  2393. static const uint8_t g_pvrtc_3_ceil[256] =
  2394. {
  2395. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2396. 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  2397. 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
  2398. 3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  2399. 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
  2400. 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,
  2401. 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,
  2402. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
  2403. };
  2404. static const uint8_t g_pvrtc_alpha_floor[256] =
  2405. {
  2406. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  2407. 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2408. 1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  2409. 2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
  2410. 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  2411. 4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  2412. 5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  2413. 6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8
  2414. };
  2415. static const uint8_t g_pvrtc_alpha_ceil[256] =
  2416. {
  2417. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2418. 1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  2419. 2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
  2420. 3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  2421. 4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  2422. 5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  2423. 6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  2424. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
  2425. };
  2426. struct pvrtc4_block
  2427. {
  2428. uint32_t m_modulation;
  2429. uint32_t m_endpoints;
  2430. pvrtc4_block() : m_modulation(0), m_endpoints(0) { }
  2431. inline bool operator== (const pvrtc4_block& rhs) const
  2432. {
  2433. return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints);
  2434. }
  2435. inline void clear()
  2436. {
  2437. m_modulation = 0;
  2438. m_endpoints = 0;
  2439. }
  2440. inline bool get_block_uses_transparent_modulation() const
  2441. {
  2442. return (m_endpoints & 1) != 0;
  2443. }
  2444. inline void set_block_uses_transparent_modulation(bool m)
  2445. {
  2446. m_endpoints = (m_endpoints & ~1U) | static_cast<uint32_t>(m);
  2447. }
  2448. inline bool is_endpoint_opaque(uint32_t endpoint_index) const
  2449. {
  2450. static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
  2451. return (m_endpoints & s_bitmasks[basisu::open_range_check(endpoint_index, 2U)]) != 0;
  2452. }
  2453. inline void set_endpoint_opaque(uint32_t endpoint_index, bool opaque)
  2454. {
  2455. assert(endpoint_index < 2);
  2456. static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
  2457. if (opaque)
  2458. m_endpoints |= s_bitmasks[endpoint_index];
  2459. else
  2460. m_endpoints &= ~s_bitmasks[endpoint_index];
  2461. }
  2462. inline color32 get_endpoint_5554(uint32_t endpoint_index) const
  2463. {
  2464. assert(endpoint_index < 2);
  2465. static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
  2466. uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
  2467. uint32_t r, g, b, a;
  2468. if (packed & 0x8000)
  2469. {
  2470. // opaque 554 or 555
  2471. r = (packed >> 10) & 31;
  2472. g = (packed >> 5) & 31;
  2473. b = packed & 31;
  2474. if (!endpoint_index)
  2475. b |= (b >> 4);
  2476. a = 0xF;
  2477. }
  2478. else
  2479. {
  2480. // translucent 4433 or 4443
  2481. r = (packed >> 7) & 0x1E;
  2482. g = (packed >> 3) & 0x1E;
  2483. b = (packed & 0xF) << 1;
  2484. r |= (r >> 4);
  2485. g |= (g >> 4);
  2486. if (!endpoint_index)
  2487. b |= (b >> 3);
  2488. else
  2489. b |= (b >> 4);
  2490. a = (packed >> 11) & 0xE;
  2491. }
  2492. assert((r < 32) && (g < 32) && (b < 32) && (a < 16));
  2493. return color32(r, g, b, a);
  2494. }
  2495. inline color32 get_endpoint_8888(uint32_t endpoint_index) const
  2496. {
  2497. assert(endpoint_index < 2);
  2498. static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
  2499. uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
  2500. uint32_t r, g, b, a;
  2501. if (packed & 0x8000)
  2502. {
  2503. // opaque 554 or 555
  2504. // 1RRRRRGGGGGBBBBM
  2505. // 1RRRRRGGGGGBBBBB
  2506. r = (packed >> 10) & 31;
  2507. g = (packed >> 5) & 31;
  2508. b = packed & 31;
  2509. r = g_pvrtc_5[r];
  2510. g = g_pvrtc_5[g];
  2511. if (!endpoint_index)
  2512. b = g_pvrtc_4[b >> 1];
  2513. else
  2514. b = g_pvrtc_5[b];
  2515. a = 255;
  2516. }
  2517. else
  2518. {
  2519. // translucent 4433 or 4443
  2520. // 0AAA RRRR GGGG BBBM
  2521. // 0AAA RRRR GGGG BBBB
  2522. r = (packed >> 8) & 0xF;
  2523. g = (packed >> 4) & 0xF;
  2524. b = packed & 0xF;
  2525. a = (packed >> 12) & 7;
  2526. r = g_pvrtc_4[r];
  2527. g = g_pvrtc_4[g];
  2528. if (!endpoint_index)
  2529. b = g_pvrtc_3[b >> 1];
  2530. else
  2531. b = g_pvrtc_4[b];
  2532. a = g_pvrtc_alpha[a];
  2533. }
  2534. return color32(r, g, b, a);
  2535. }
  2536. inline uint32_t get_endpoint_l8(uint32_t endpoint_index) const
  2537. {
  2538. color32 c(get_endpoint_8888(endpoint_index));
  2539. return c.r + c.g + c.b + c.a;
  2540. }
  2541. inline uint32_t get_opaque_endpoint_l0() const
  2542. {
  2543. uint32_t packed = m_endpoints & 0xFFFE;
  2544. uint32_t r, g, b;
  2545. assert(packed & 0x8000);
  2546. // opaque 554 or 555
  2547. r = (packed >> 10) & 31;
  2548. g = (packed >> 5) & 31;
  2549. b = packed & 31;
  2550. b |= (b >> 4);
  2551. return r + g + b;
  2552. }
  2553. inline uint32_t get_opaque_endpoint_l1() const
  2554. {
  2555. uint32_t packed = m_endpoints >> 16;
  2556. uint32_t r, g, b;
  2557. assert(packed & 0x8000);
  2558. // opaque 554 or 555
  2559. r = (packed >> 10) & 31;
  2560. g = (packed >> 5) & 31;
  2561. b = packed & 31;
  2562. return r + g + b;
  2563. }
  2564. static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint)
  2565. {
  2566. static const uint32_t s_comp_prec[4][4] =
  2567. {
  2568. // R0 G0 B0 A0 R1 G1 B1 A1
  2569. { 4, 4, 3, 3 },{ 4, 4, 4, 3 }, // transparent endpoint
  2570. { 5, 5, 4, 0 },{ 5, 5, 5, 0 } // opaque endpoint
  2571. };
  2572. return s_comp_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][basisu::open_range_check(c, 4U)];
  2573. }
  2574. static color32 get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint)
  2575. {
  2576. static const color32 s_color_prec[4] =
  2577. {
  2578. color32(4, 4, 3, 3), color32(4, 4, 4, 3), // transparent endpoint
  2579. color32(5, 5, 4, 0), color32(5, 5, 5, 0) // opaque endpoint
  2580. };
  2581. return s_color_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)];
  2582. }
  2583. inline void set_opaque_endpoint_floor(uint32_t endpoint_index, const color32& c)
  2584. {
  2585. assert(endpoint_index < 2);
  2586. const uint32_t m = m_endpoints & 1;
  2587. uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
  2588. if (!endpoint_index)
  2589. b = g_pvrtc_4_floor[b] << 1;
  2590. else
  2591. b = g_pvrtc_5_floor[b];
  2592. // rgba=555 here
  2593. assert((r < 32) && (g < 32) && (b < 32));
  2594. // 1RRRRRGGGGGBBBBM
  2595. // 1RRRRRGGGGGBBBBB
  2596. // opaque 554 or 555
  2597. uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
  2598. if (!endpoint_index)
  2599. packed = (packed & ~1) | m;
  2600. assert(packed <= 0xFFFF);
  2601. if (endpoint_index)
  2602. m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
  2603. else
  2604. m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
  2605. }
  2606. inline void set_opaque_endpoint_ceil(uint32_t endpoint_index, const color32& c)
  2607. {
  2608. assert(endpoint_index < 2);
  2609. const uint32_t m = m_endpoints & 1;
  2610. uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
  2611. if (!endpoint_index)
  2612. b = g_pvrtc_4_ceil[b] << 1;
  2613. else
  2614. b = g_pvrtc_5_ceil[b];
  2615. // rgba=555 here
  2616. assert((r < 32) && (g < 32) && (b < 32));
  2617. // 1RRRRRGGGGGBBBBM
  2618. // 1RRRRRGGGGGBBBBB
  2619. // opaque 554 or 555
  2620. uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
  2621. if (!endpoint_index)
  2622. packed |= m;
  2623. assert(packed <= 0xFFFF);
  2624. if (endpoint_index)
  2625. m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
  2626. else
  2627. m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
  2628. }
  2629. // opaque endpoints: 554 or 555
  2630. // transparent endpoints: 3443 or 3444
  2631. inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint)
  2632. {
  2633. assert(endpoint_index < 2);
  2634. const uint32_t m = m_endpoints & 1;
  2635. uint32_t r = c[0], g = c[1], b = c[2], a = c[3];
  2636. uint32_t packed;
  2637. if (opaque_endpoint)
  2638. {
  2639. if (!endpoint_index)
  2640. {
  2641. // 554
  2642. // 1RRRRRGGGGGBBBBM
  2643. assert((r < 32) && (g < 32) && (b < 16));
  2644. packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m;
  2645. }
  2646. else
  2647. {
  2648. // 555
  2649. // 1RRRRRGGGGGBBBBB
  2650. assert((r < 32) && (g < 32) && (b < 32));
  2651. packed = 0x8000 | (r << 10) | (g << 5) | b;
  2652. }
  2653. }
  2654. else
  2655. {
  2656. if (!endpoint_index)
  2657. {
  2658. // 3443
  2659. // 0AAA RRRR GGGG BBBM
  2660. assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
  2661. packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m;
  2662. }
  2663. else
  2664. {
  2665. // 3444
  2666. // 0AAA RRRR GGGG BBBB
  2667. assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
  2668. packed = (a << 12) | (r << 8) | (g << 4) | b;
  2669. }
  2670. }
  2671. assert(packed <= 0xFFFF);
  2672. if (endpoint_index)
  2673. m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
  2674. else
  2675. m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
  2676. }
  2677. inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c)
  2678. {
  2679. assert(endpoint_index < 2);
  2680. int a = g_pvrtc_alpha_floor[c.a];
  2681. if (a == 8)
  2682. {
  2683. // 554 or 555
  2684. uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
  2685. if (!endpoint_index)
  2686. b = g_pvrtc_4_floor[b];
  2687. else
  2688. b = g_pvrtc_5_floor[b];
  2689. set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
  2690. }
  2691. else
  2692. {
  2693. // 4433 or 4443
  2694. uint32_t r = g_pvrtc_4_floor[c[0]], g = g_pvrtc_4_floor[c[1]], b = c[2];
  2695. if (!endpoint_index)
  2696. b = g_pvrtc_3_floor[b];
  2697. else
  2698. b = g_pvrtc_4_floor[b];
  2699. set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
  2700. }
  2701. }
  2702. inline void set_endpoint_ceil(uint32_t endpoint_index, const color32& c)
  2703. {
  2704. assert(endpoint_index < 2);
  2705. int a = g_pvrtc_alpha_ceil[c.a];
  2706. if (a == 8)
  2707. {
  2708. // 554 or 555
  2709. uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
  2710. if (!endpoint_index)
  2711. b = g_pvrtc_4_ceil[b];
  2712. else
  2713. b = g_pvrtc_5_ceil[b];
  2714. set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
  2715. }
  2716. else
  2717. {
  2718. // 4433 or 4443
  2719. uint32_t r = g_pvrtc_4_ceil[c[0]], g = g_pvrtc_4_ceil[c[1]], b = c[2];
  2720. if (!endpoint_index)
  2721. b = g_pvrtc_3_ceil[b];
  2722. else
  2723. b = g_pvrtc_4_ceil[b];
  2724. set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
  2725. }
  2726. }
  2727. inline uint32_t get_modulation(uint32_t x, uint32_t y) const
  2728. {
  2729. assert((x < 4) && (y < 4));
  2730. return (m_modulation >> ((y * 4 + x) * 2)) & 3;
  2731. }
  2732. // Scaled by 8
  2733. inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const
  2734. {
  2735. static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 },{ 0, 4, 4, 8 } };
  2736. return s_block_scales[block_uses_transparent_modulation];
  2737. }
  2738. // Scaled by 8
  2739. inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const
  2740. {
  2741. return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)];
  2742. }
  2743. inline void set_modulation(uint32_t x, uint32_t y, uint32_t s)
  2744. {
  2745. assert((x < 4) && (y < 4) && (s < 4));
  2746. uint32_t n = (y * 4 + x) * 2;
  2747. m_modulation = (m_modulation & (~(3 << n))) | (s << n);
  2748. assert(get_modulation(x, y) == s);
  2749. }
  2750. // Assumes modulation was initialized to 0
  2751. inline void set_modulation_fast(uint32_t x, uint32_t y, uint32_t s)
  2752. {
  2753. assert((x < 4) && (y < 4) && (s < 4));
  2754. uint32_t n = (y * 4 + x) * 2;
  2755. m_modulation |= (s << n);
  2756. assert(get_modulation(x, y) == s);
  2757. }
  2758. };
  2759. #if 0
  2760. static const uint8_t g_pvrtc_bilinear_weights[16][4] =
  2761. {
  2762. { 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 },
  2763. { 2, 2, 6, 6 }, { 1, 3, 3, 9 }, { 4, 0, 12, 0 }, { 3, 1, 9, 3 },
  2764. { 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 },
  2765. { 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 },
  2766. };
  2767. #endif
  2768. struct pvrtc1_temp_block
  2769. {
  2770. decoder_etc_block m_etc1_block;
  2771. uint32_t m_pvrtc_endpoints;
  2772. };
  2773. static inline uint32_t get_opaque_endpoint_l0(uint32_t endpoints)
  2774. {
  2775. uint32_t packed = endpoints;
  2776. uint32_t r, g, b;
  2777. assert(packed & 0x8000);
  2778. r = (packed >> 10) & 31;
  2779. g = (packed >> 5) & 31;
  2780. b = packed & 30;
  2781. b |= (b >> 4);
  2782. return r + g + b;
  2783. }
  2784. static inline uint32_t get_opaque_endpoint_l1(uint32_t endpoints)
  2785. {
  2786. uint32_t packed = endpoints >> 16;
  2787. uint32_t r, g, b;
  2788. assert(packed & 0x8000);
  2789. r = (packed >> 10) & 31;
  2790. g = (packed >> 5) & 31;
  2791. b = packed & 31;
  2792. return r + g + b;
  2793. }
  2794. static color32 get_endpoint_8888(uint32_t endpoints, uint32_t endpoint_index)
  2795. {
  2796. assert(endpoint_index < 2);
  2797. static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
  2798. uint32_t packed = (endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
  2799. uint32_t r, g, b, a;
  2800. if (packed & 0x8000)
  2801. {
  2802. // opaque 554 or 555
  2803. // 1RRRRRGGGGGBBBBM
  2804. // 1RRRRRGGGGGBBBBB
  2805. r = (packed >> 10) & 31;
  2806. g = (packed >> 5) & 31;
  2807. b = packed & 31;
  2808. r = g_pvrtc_5[r];
  2809. g = g_pvrtc_5[g];
  2810. if (!endpoint_index)
  2811. b = g_pvrtc_4[b >> 1];
  2812. else
  2813. b = g_pvrtc_5[b];
  2814. a = 255;
  2815. }
  2816. else
  2817. {
  2818. // translucent 4433 or 4443
  2819. // 0AAA RRRR GGGG BBBM
  2820. // 0AAA RRRR GGGG BBBB
  2821. r = (packed >> 8) & 0xF;
  2822. g = (packed >> 4) & 0xF;
  2823. b = packed & 0xF;
  2824. a = (packed >> 12) & 7;
  2825. r = g_pvrtc_4[r];
  2826. g = g_pvrtc_4[g];
  2827. if (!endpoint_index)
  2828. b = g_pvrtc_3[b >> 1];
  2829. else
  2830. b = g_pvrtc_4[b];
  2831. a = g_pvrtc_alpha[a];
  2832. }
  2833. return color32(r, g, b, a);
  2834. }
  2835. static uint32_t get_endpoint_l8(uint32_t endpoints, uint32_t endpoint_index)
  2836. {
  2837. color32 c(get_endpoint_8888(endpoints, endpoint_index));
  2838. return c.r + c.g + c.b + c.a;
  2839. }
  2840. #endif
  2841. #if BASISD_SUPPORT_PVRTC1
  2842. // TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture.
  2843. static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
  2844. {
  2845. const uint32_t x_mask = num_blocks_x - 1;
  2846. const uint32_t y_mask = num_blocks_y - 1;
  2847. const uint32_t x_bits = basisu::total_bits(x_mask);
  2848. const uint32_t y_bits = basisu::total_bits(y_mask);
  2849. const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
  2850. //const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
  2851. const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
  2852. uint32_t block_index = 0;
  2853. // really 3x3
  2854. int e0[4][4], e1[4][4];
  2855. for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
  2856. {
  2857. const uint32_t* pE_rows[3];
  2858. for (int ey = 0; ey < 3; ey++)
  2859. {
  2860. int by = y + ey - 1;
  2861. const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
  2862. pE_rows[ey] = pE;
  2863. for (int ex = 0; ex < 3; ex++)
  2864. {
  2865. int bx = 0 + ex - 1;
  2866. const uint32_t e = pE[bx & x_mask];
  2867. e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
  2868. e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
  2869. }
  2870. }
  2871. const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
  2872. for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
  2873. {
  2874. const decoder_etc_block& src_block = pETC_Blocks[block_index];
  2875. const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
  2876. uint32_t swizzled = x_swizzle | y_swizzle;
  2877. if (num_blocks_x != num_blocks_y)
  2878. {
  2879. swizzled &= swizzle_mask;
  2880. if (num_blocks_x > num_blocks_y)
  2881. swizzled |= ((x >> min_bits) << (min_bits * 2));
  2882. else
  2883. swizzled |= ((y >> min_bits) << (min_bits * 2));
  2884. }
  2885. pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
  2886. pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
  2887. uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
  2888. uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
  2889. uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
  2890. const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
  2891. int by = (base_r + base_g + base_b) * 16;
  2892. int block_colors_y_x16[4];
  2893. block_colors_y_x16[0] = by + pInten_table48[2];
  2894. block_colors_y_x16[1] = by + pInten_table48[3];
  2895. block_colors_y_x16[2] = by + pInten_table48[1];
  2896. block_colors_y_x16[3] = by + pInten_table48[0];
  2897. {
  2898. const uint32_t ex = 2;
  2899. int bx = x + ex - 1;
  2900. bx &= x_mask;
  2901. #define DO_ROW(ey) \
  2902. { \
  2903. const uint32_t e = pE_rows[ey][bx]; \
  2904. e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
  2905. e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
  2906. }
  2907. DO_ROW(0);
  2908. DO_ROW(1);
  2909. DO_ROW(2);
  2910. #undef DO_ROW
  2911. }
  2912. uint32_t mod = 0;
  2913. uint32_t lookup_x[4];
  2914. #define DO_LOOKUP(lx) { \
  2915. const uint32_t byte_ofs = 7 - (((lx) * 4) >> 3); \
  2916. const uint32_t lsb_bits = src_block.m_bytes[byte_ofs] >> (((lx) & 1) * 4); \
  2917. const uint32_t msb_bits = src_block.m_bytes[byte_ofs - 2] >> (((lx) & 1) * 4); \
  2918. lookup_x[lx] = (lsb_bits & 0xF) | ((msb_bits & 0xF) << 4); }
  2919. DO_LOOKUP(0);
  2920. DO_LOOKUP(1);
  2921. DO_LOOKUP(2);
  2922. DO_LOOKUP(3);
  2923. #undef DO_LOOKUP
  2924. #define DO_PIX(lx, ly, w0, w1, w2, w3) \
  2925. { \
  2926. int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
  2927. int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
  2928. int cl = block_colors_y_x16[g_etc1_x_selector_unpack[ly][lookup_x[lx]]]; \
  2929. int dl = cb_l - ca_l; \
  2930. int vl = cl - ca_l; \
  2931. int p = vl * 16; \
  2932. if (ca_l > cb_l) { p = -p; dl = -dl; } \
  2933. uint32_t m = 0; \
  2934. if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
  2935. if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
  2936. if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
  2937. mod |= m; \
  2938. }
  2939. {
  2940. const uint32_t ex = 0, ey = 0;
  2941. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  2942. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  2943. DO_PIX(0, 0, 4, 4, 4, 4);
  2944. DO_PIX(1, 0, 2, 6, 2, 6);
  2945. DO_PIX(0, 1, 2, 2, 6, 6);
  2946. DO_PIX(1, 1, 1, 3, 3, 9);
  2947. }
  2948. {
  2949. const uint32_t ex = 1, ey = 0;
  2950. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  2951. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  2952. DO_PIX(2, 0, 8, 0, 8, 0);
  2953. DO_PIX(3, 0, 6, 2, 6, 2);
  2954. DO_PIX(2, 1, 4, 0, 12, 0);
  2955. DO_PIX(3, 1, 3, 1, 9, 3);
  2956. }
  2957. {
  2958. const uint32_t ex = 0, ey = 1;
  2959. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  2960. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  2961. DO_PIX(0, 2, 8, 8, 0, 0);
  2962. DO_PIX(1, 2, 4, 12, 0, 0);
  2963. DO_PIX(0, 3, 6, 6, 2, 2);
  2964. DO_PIX(1, 3, 3, 9, 1, 3);
  2965. }
  2966. {
  2967. const uint32_t ex = 1, ey = 1;
  2968. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  2969. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  2970. DO_PIX(2, 2, 16, 0, 0, 0);
  2971. DO_PIX(3, 2, 12, 4, 0, 0);
  2972. DO_PIX(2, 3, 12, 0, 4, 0);
  2973. DO_PIX(3, 3, 9, 3, 3, 1);
  2974. }
  2975. #undef DO_PIX
  2976. pDst_block->m_modulation = mod;
  2977. e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
  2978. e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
  2979. e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
  2980. e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
  2981. e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
  2982. e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
  2983. } // x
  2984. } // y
  2985. }
  2986. static void fixup_pvrtc1_4_modulation_rgba(
  2987. const decoder_etc_block* pETC_Blocks,
  2988. const uint32_t* pPVRTC_endpoints,
  2989. void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks,
  2990. const endpoint* pEndpoints, const selector* pSelectors)
  2991. {
  2992. const uint32_t x_mask = num_blocks_x - 1;
  2993. const uint32_t y_mask = num_blocks_y - 1;
  2994. const uint32_t x_bits = basisu::total_bits(x_mask);
  2995. const uint32_t y_bits = basisu::total_bits(y_mask);
  2996. const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
  2997. //const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
  2998. const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
  2999. uint32_t block_index = 0;
  3000. // really 3x3
  3001. int e0[4][4], e1[4][4];
  3002. for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
  3003. {
  3004. const uint32_t* pE_rows[3];
  3005. for (int ey = 0; ey < 3; ey++)
  3006. {
  3007. int by = y + ey - 1;
  3008. const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
  3009. pE_rows[ey] = pE;
  3010. for (int ex = 0; ex < 3; ex++)
  3011. {
  3012. int bx = 0 + ex - 1;
  3013. const uint32_t e = pE[bx & x_mask];
  3014. e0[ex][ey] = get_endpoint_l8(e, 0);
  3015. e1[ex][ey] = get_endpoint_l8(e, 1);
  3016. }
  3017. }
  3018. const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
  3019. for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
  3020. {
  3021. const decoder_etc_block& src_block = pETC_Blocks[block_index];
  3022. const uint16_t* pSrc_alpha_block = reinterpret_cast<const uint16_t*>(static_cast<const uint32_t*>(pAlpha_blocks) + x + (y * num_blocks_x));
  3023. const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]];
  3024. const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]];
  3025. const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
  3026. uint32_t swizzled = x_swizzle | y_swizzle;
  3027. if (num_blocks_x != num_blocks_y)
  3028. {
  3029. swizzled &= swizzle_mask;
  3030. if (num_blocks_x > num_blocks_y)
  3031. swizzled |= ((x >> min_bits) << (min_bits * 2));
  3032. else
  3033. swizzled |= ((y >> min_bits) << (min_bits * 2));
  3034. }
  3035. pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
  3036. pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
  3037. uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
  3038. uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
  3039. uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
  3040. const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
  3041. int by = (base_r + base_g + base_b) * 16;
  3042. int block_colors_y_x16[4];
  3043. block_colors_y_x16[0] = basisu::clamp<int>(by + pInten_table48[0], 0, 48 * 255);
  3044. block_colors_y_x16[1] = basisu::clamp<int>(by + pInten_table48[1], 0, 48 * 255);
  3045. block_colors_y_x16[2] = basisu::clamp<int>(by + pInten_table48[2], 0, 48 * 255);
  3046. block_colors_y_x16[3] = basisu::clamp<int>(by + pInten_table48[3], 0, 48 * 255);
  3047. uint32_t alpha_base_g = g_etc_5_to_8[pAlpha_endpoints->m_color5.g] * 16;
  3048. const int* pInten_table16 = g_etc1_inten_tables16[pAlpha_endpoints->m_inten5];
  3049. int alpha_block_colors_x16[4];
  3050. alpha_block_colors_x16[0] = basisu::clamp<int>(alpha_base_g + pInten_table16[0], 0, 16 * 255);
  3051. alpha_block_colors_x16[1] = basisu::clamp<int>(alpha_base_g + pInten_table16[1], 0, 16 * 255);
  3052. alpha_block_colors_x16[2] = basisu::clamp<int>(alpha_base_g + pInten_table16[2], 0, 16 * 255);
  3053. alpha_block_colors_x16[3] = basisu::clamp<int>(alpha_base_g + pInten_table16[3], 0, 16 * 255);
  3054. // clamp((base_r + base_g + base_b) * 16 + color_inten[s] * 48) + clamp(alpha_base_g * 16 + alpha_inten[as] * 16)
  3055. {
  3056. const uint32_t ex = 2;
  3057. int bx = x + ex - 1;
  3058. bx &= x_mask;
  3059. #define DO_ROW(ey) \
  3060. { \
  3061. const uint32_t e = pE_rows[ey][bx]; \
  3062. e0[ex][ey] = get_endpoint_l8(e, 0); \
  3063. e1[ex][ey] = get_endpoint_l8(e, 1); \
  3064. }
  3065. DO_ROW(0);
  3066. DO_ROW(1);
  3067. DO_ROW(2);
  3068. #undef DO_ROW
  3069. }
  3070. uint32_t mod = 0;
  3071. #define DO_PIX(lx, ly, w0, w1, w2, w3) \
  3072. { \
  3073. int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
  3074. int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
  3075. int cl = block_colors_y_x16[(src_block.m_bytes[4 + ly] >> (lx * 2)) & 3] + alpha_block_colors_x16[(pAlpha_selectors->m_selectors[ly] >> (lx * 2)) & 3]; \
  3076. int dl = cb_l - ca_l; \
  3077. int vl = cl - ca_l; \
  3078. int p = vl * 16; \
  3079. if (ca_l > cb_l) { p = -p; dl = -dl; } \
  3080. uint32_t m = 0; \
  3081. if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
  3082. if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
  3083. if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
  3084. mod |= m; \
  3085. }
  3086. {
  3087. const uint32_t ex = 0, ey = 0;
  3088. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  3089. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  3090. DO_PIX(0, 0, 4, 4, 4, 4);
  3091. DO_PIX(1, 0, 2, 6, 2, 6);
  3092. DO_PIX(0, 1, 2, 2, 6, 6);
  3093. DO_PIX(1, 1, 1, 3, 3, 9);
  3094. }
  3095. {
  3096. const uint32_t ex = 1, ey = 0;
  3097. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  3098. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  3099. DO_PIX(2, 0, 8, 0, 8, 0);
  3100. DO_PIX(3, 0, 6, 2, 6, 2);
  3101. DO_PIX(2, 1, 4, 0, 12, 0);
  3102. DO_PIX(3, 1, 3, 1, 9, 3);
  3103. }
  3104. {
  3105. const uint32_t ex = 0, ey = 1;
  3106. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  3107. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  3108. DO_PIX(0, 2, 8, 8, 0, 0);
  3109. DO_PIX(1, 2, 4, 12, 0, 0);
  3110. DO_PIX(0, 3, 6, 6, 2, 2);
  3111. DO_PIX(1, 3, 3, 9, 1, 3);
  3112. }
  3113. {
  3114. const uint32_t ex = 1, ey = 1;
  3115. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  3116. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  3117. DO_PIX(2, 2, 16, 0, 0, 0);
  3118. DO_PIX(3, 2, 12, 4, 0, 0);
  3119. DO_PIX(2, 3, 12, 0, 4, 0);
  3120. DO_PIX(3, 3, 9, 3, 3, 1);
  3121. }
  3122. #undef DO_PIX
  3123. pDst_block->m_modulation = mod;
  3124. e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
  3125. e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
  3126. e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
  3127. e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
  3128. e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
  3129. e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
  3130. } // x
  3131. } // y
  3132. }
  3133. #endif // BASISD_SUPPORT_PVRTC1
  3134. #if BASISD_SUPPORT_BC7_MODE5
  3135. static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] =
  3136. {
  3137. { 0, 3 },
  3138. { 1, 3 },
  3139. { 0, 2 },
  3140. { 1, 2 },
  3141. { 2, 3 },
  3142. { 0, 1 },
  3143. };
  3144. const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]);
  3145. static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4];
  3146. const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10;
  3147. static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] =
  3148. {
  3149. { 0, 0, 1, 1 },
  3150. { 0, 0, 1, 2 },
  3151. { 0, 0, 1, 3 },
  3152. { 0, 0, 2, 3 },
  3153. { 0, 1, 1, 1 },
  3154. { 0, 1, 2, 2 },
  3155. { 0, 1, 2, 3 },
  3156. { 0, 2, 3, 3 },
  3157. { 1, 2, 2, 2 },
  3158. { 1, 2, 3, 3 },
  3159. };
  3160. struct etc1_to_bc7_m5_solution
  3161. {
  3162. uint8_t m_lo;
  3163. uint8_t m_hi;
  3164. uint16_t m_err;
  3165. };
  3166. static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = {
  3167. #include "basisu_transcoder_tables_bc7_m5_color.inc"
  3168. };
  3169. static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] =
  3170. {
  3171. { 0, 3 },
  3172. { 1, 3 },
  3173. { 0, 2 },
  3174. { 1, 2 },
  3175. { 2, 3 },
  3176. { 0, 1 }
  3177. };
  3178. const uint32_t NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5a_selector_ranges) / sizeof(g_etc1_to_bc7_m5a_selector_ranges[0]);
  3179. static uint32_t g_etc1_to_bc7_m5a_selector_range_index[4][4];
  3180. struct etc1_g_to_bc7_m5a_conversion
  3181. {
  3182. uint8_t m_lo, m_hi;
  3183. uint8_t m_trans;
  3184. };
  3185. static etc1_g_to_bc7_m5a_conversion g_etc1_g_to_bc7_m5a[8 * 32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES] =
  3186. {
  3187. #include "basisu_transcoder_tables_bc7_m5_alpha.inc"
  3188. };
  3189. static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs)
  3190. {
  3191. assert(num_bits < 32);
  3192. assert(val < (1ULL << num_bits));
  3193. uint32_t mask = static_cast<uint32_t>((1ULL << num_bits) - 1);
  3194. while (num_bits)
  3195. {
  3196. const uint32_t n = basisu::minimum<uint32_t>(8 - (cur_ofs & 7), num_bits);
  3197. pBytes[cur_ofs >> 3] &= ~static_cast<uint8_t>(mask << (cur_ofs & 7));
  3198. pBytes[cur_ofs >> 3] |= static_cast<uint8_t>(val << (cur_ofs & 7));
  3199. val >>= n;
  3200. mask >>= n;
  3201. num_bits -= n;
  3202. cur_ofs += n;
  3203. }
  3204. return cur_ofs;
  3205. }
  3206. struct bc7_mode_5
  3207. {
  3208. union
  3209. {
  3210. struct
  3211. {
  3212. uint64_t m_mode : 6;
  3213. uint64_t m_rot : 2;
  3214. uint64_t m_r0 : 7;
  3215. uint64_t m_r1 : 7;
  3216. uint64_t m_g0 : 7;
  3217. uint64_t m_g1 : 7;
  3218. uint64_t m_b0 : 7;
  3219. uint64_t m_b1 : 7;
  3220. uint64_t m_a0 : 8;
  3221. uint64_t m_a1_0 : 6;
  3222. } m_lo;
  3223. uint64_t m_lo_bits;
  3224. };
  3225. union
  3226. {
  3227. struct
  3228. {
  3229. uint64_t m_a1_1 : 2;
  3230. // bit 2
  3231. uint64_t m_c00 : 1;
  3232. uint64_t m_c10 : 2;
  3233. uint64_t m_c20 : 2;
  3234. uint64_t m_c30 : 2;
  3235. uint64_t m_c01 : 2;
  3236. uint64_t m_c11 : 2;
  3237. uint64_t m_c21 : 2;
  3238. uint64_t m_c31 : 2;
  3239. uint64_t m_c02 : 2;
  3240. uint64_t m_c12 : 2;
  3241. uint64_t m_c22 : 2;
  3242. uint64_t m_c32 : 2;
  3243. uint64_t m_c03 : 2;
  3244. uint64_t m_c13 : 2;
  3245. uint64_t m_c23 : 2;
  3246. uint64_t m_c33 : 2;
  3247. // bit 33
  3248. uint64_t m_a00 : 1;
  3249. uint64_t m_a10 : 2;
  3250. uint64_t m_a20 : 2;
  3251. uint64_t m_a30 : 2;
  3252. uint64_t m_a01 : 2;
  3253. uint64_t m_a11 : 2;
  3254. uint64_t m_a21 : 2;
  3255. uint64_t m_a31 : 2;
  3256. uint64_t m_a02 : 2;
  3257. uint64_t m_a12 : 2;
  3258. uint64_t m_a22 : 2;
  3259. uint64_t m_a32 : 2;
  3260. uint64_t m_a03 : 2;
  3261. uint64_t m_a13 : 2;
  3262. uint64_t m_a23 : 2;
  3263. uint64_t m_a33 : 2;
  3264. } m_hi;
  3265. uint64_t m_hi_bits;
  3266. };
  3267. };
  3268. #if BASISD_WRITE_NEW_BC7_MODE5_TABLES
  3269. static void create_etc1_to_bc7_m5_color_conversion_table()
  3270. {
  3271. FILE* pFile = nullptr;
  3272. fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_color.inc", "w");
  3273. uint32_t n = 0;
  3274. for (int inten = 0; inten < 8; inten++)
  3275. {
  3276. for (uint32_t g = 0; g < 32; g++)
  3277. {
  3278. color32 block_colors[4];
  3279. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  3280. for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; sr++)
  3281. {
  3282. const uint32_t low_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_low;
  3283. const uint32_t high_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_high;
  3284. for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS; m++)
  3285. {
  3286. uint32_t best_lo = 0;
  3287. uint32_t best_hi = 0;
  3288. uint64_t best_err = UINT64_MAX;
  3289. for (uint32_t hi = 0; hi <= 127; hi++)
  3290. {
  3291. for (uint32_t lo = 0; lo <= 127; lo++)
  3292. {
  3293. uint32_t colors[4];
  3294. colors[0] = (lo << 1) | (lo >> 6);
  3295. colors[3] = (hi << 1) | (hi >> 6);
  3296. colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
  3297. colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
  3298. uint64_t total_err = 0;
  3299. for (uint32_t s = low_selector; s <= high_selector; s++)
  3300. {
  3301. int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]];
  3302. int err_scale = 1;
  3303. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  3304. // the low/high selectors which are clamping to either 0 or 255.
  3305. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  3306. err_scale = 5;
  3307. total_err += (err * err) * err_scale;
  3308. }
  3309. if (total_err < best_err)
  3310. {
  3311. best_err = total_err;
  3312. best_lo = lo;
  3313. best_hi = hi;
  3314. }
  3315. }
  3316. }
  3317. best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
  3318. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  3319. n++;
  3320. if ((n & 31) == 31)
  3321. fprintf(pFile, "\n");
  3322. } // m
  3323. } // sr
  3324. } // g
  3325. } // inten
  3326. fclose(pFile);
  3327. }
  3328. static void create_etc1_to_bc7_m5_alpha_conversion_table()
  3329. {
  3330. FILE* pFile = nullptr;
  3331. fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_alpha.inc", "w");
  3332. uint32_t n = 0;
  3333. for (int inten = 0; inten < 8; inten++)
  3334. {
  3335. for (uint32_t g = 0; g < 32; g++)
  3336. {
  3337. color32 block_colors[4];
  3338. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  3339. for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; sr++)
  3340. {
  3341. const uint32_t low_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_low;
  3342. const uint32_t high_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_high;
  3343. uint32_t best_lo = 0;
  3344. uint32_t best_hi = 0;
  3345. uint64_t best_err = UINT64_MAX;
  3346. uint32_t best_output_selectors = 0;
  3347. for (uint32_t hi = 0; hi <= 255; hi++)
  3348. {
  3349. for (uint32_t lo = 0; lo <= 255; lo++)
  3350. {
  3351. uint32_t colors[4];
  3352. colors[0] = lo;
  3353. colors[3] = hi;
  3354. colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
  3355. colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
  3356. uint64_t total_err = 0;
  3357. uint32_t output_selectors = 0;
  3358. for (uint32_t s = low_selector; s <= high_selector; s++)
  3359. {
  3360. int best_mapping_err = INT_MAX;
  3361. int best_k = 0;
  3362. for (int k = 0; k < 4; k++)
  3363. {
  3364. int mapping_err = block_colors[s].g - colors[k];
  3365. mapping_err *= mapping_err;
  3366. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  3367. // the low/high selectors which are clamping to either 0 or 255.
  3368. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  3369. mapping_err *= 5;
  3370. if (mapping_err < best_mapping_err)
  3371. {
  3372. best_mapping_err = mapping_err;
  3373. best_k = k;
  3374. }
  3375. } // k
  3376. total_err += best_mapping_err;
  3377. output_selectors |= (best_k << (s * 2));
  3378. } // s
  3379. if (total_err < best_err)
  3380. {
  3381. best_err = total_err;
  3382. best_lo = lo;
  3383. best_hi = hi;
  3384. best_output_selectors = output_selectors;
  3385. }
  3386. } // lo
  3387. } // hi
  3388. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors);
  3389. n++;
  3390. if ((n & 31) == 31)
  3391. fprintf(pFile, "\n");
  3392. } // sr
  3393. } // g
  3394. } // inten
  3395. fclose(pFile);
  3396. }
  3397. #endif // BASISD_WRITE_NEW_BC7_MODE5_TABLES
  3398. struct bc7_m5_match_entry
  3399. {
  3400. uint8_t m_hi;
  3401. uint8_t m_lo;
  3402. };
  3403. static bc7_m5_match_entry g_bc7_m5_equals_1[256] =
  3404. {
  3405. {0,0},{1,0},{3,0},{4,0},{6,0},{7,0},{9,0},{10,0},{12,0},{13,0},{15,0},{16,0},{18,0},{20,0},{21,0},{23,0},
  3406. {24,0},{26,0},{27,0},{29,0},{30,0},{32,0},{33,0},{35,0},{36,0},{38,0},{39,0},{41,0},{42,0},{44,0},{45,0},{47,0},
  3407. {48,0},{50,0},{52,0},{53,0},{55,0},{56,0},{58,0},{59,0},{61,0},{62,0},{64,0},{65,0},{66,0},{68,0},{69,0},{71,0},
  3408. {72,0},{74,0},{75,0},{77,0},{78,0},{80,0},{82,0},{83,0},{85,0},{86,0},{88,0},{89,0},{91,0},{92,0},{94,0},{95,0},
  3409. {97,0},{98,0},{100,0},{101,0},{103,0},{104,0},{106,0},{107,0},{109,0},{110,0},{112,0},{114,0},{115,0},{117,0},{118,0},{120,0},
  3410. {121,0},{123,0},{124,0},{126,0},{127,0},{127,1},{126,2},{126,3},{127,3},{127,4},{126,5},{126,6},{127,6},{127,7},{126,8},{126,9},
  3411. {127,9},{127,10},{126,11},{126,12},{127,12},{127,13},{126,14},{125,15},{127,15},{126,16},{126,17},{127,17},{127,18},{126,19},{126,20},{127,20},
  3412. {127,21},{126,22},{126,23},{127,23},{127,24},{126,25},{126,26},{127,26},{127,27},{126,28},{126,29},{127,29},{127,30},{126,31},{126,32},{127,32},
  3413. {127,33},{126,34},{126,35},{127,35},{127,36},{126,37},{126,38},{127,38},{127,39},{126,40},{126,41},{127,41},{127,42},{126,43},{126,44},{127,44},
  3414. {127,45},{126,46},{125,47},{127,47},{126,48},{126,49},{127,49},{127,50},{126,51},{126,52},{127,52},{127,53},{126,54},{126,55},{127,55},{127,56},
  3415. {126,57},{126,58},{127,58},{127,59},{126,60},{126,61},{127,61},{127,62},{126,63},{125,64},{126,64},{126,65},{127,65},{127,66},{126,67},{126,68},
  3416. {127,68},{127,69},{126,70},{126,71},{127,71},{127,72},{126,73},{126,74},{127,74},{127,75},{126,76},{125,77},{127,77},{126,78},{126,79},{127,79},
  3417. {127,80},{126,81},{126,82},{127,82},{127,83},{126,84},{126,85},{127,85},{127,86},{126,87},{126,88},{127,88},{127,89},{126,90},{126,91},{127,91},
  3418. {127,92},{126,93},{126,94},{127,94},{127,95},{126,96},{126,97},{127,97},{127,98},{126,99},{126,100},{127,100},{127,101},{126,102},{126,103},{127,103},
  3419. {127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115},
  3420. {126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127}
  3421. };
  3422. static void transcoder_init_bc7_mode5()
  3423. {
  3424. #if 0
  3425. // This is a little too much work to do at init time, so precompute it.
  3426. for (int i = 0; i < 256; i++)
  3427. {
  3428. int lowest_e = 256;
  3429. for (int lo = 0; lo < 128; lo++)
  3430. {
  3431. for (int hi = 0; hi < 128; hi++)
  3432. {
  3433. const int lo_e = (lo << 1) | (lo >> 6);
  3434. const int hi_e = (hi << 1) | (hi >> 6);
  3435. // Selector 1
  3436. int v = (lo_e * (64 - 21) + hi_e * 21 + 32) >> 6;
  3437. int e = abs(v - i);
  3438. if (e < lowest_e)
  3439. {
  3440. g_bc7_m5_equals_1[i].m_hi = static_cast<uint8_t>(hi);
  3441. g_bc7_m5_equals_1[i].m_lo = static_cast<uint8_t>(lo);
  3442. lowest_e = e;
  3443. }
  3444. } // hi
  3445. } // lo
  3446. printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo);
  3447. if ((i & 15) == 15) printf("\n");
  3448. }
  3449. #endif
  3450. for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; i++)
  3451. {
  3452. uint32_t l = g_etc1_to_bc7_m5_selector_ranges[i].m_low;
  3453. uint32_t h = g_etc1_to_bc7_m5_selector_ranges[i].m_high;
  3454. g_etc1_to_bc7_m5_selector_range_index[l][h] = i;
  3455. }
  3456. for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; i++)
  3457. {
  3458. uint32_t l = g_etc1_to_bc7_m5a_selector_ranges[i].m_low;
  3459. uint32_t h = g_etc1_to_bc7_m5a_selector_ranges[i].m_high;
  3460. g_etc1_to_bc7_m5a_selector_range_index[l][h] = i;
  3461. }
  3462. }
  3463. static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
  3464. {
  3465. bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
  3466. // First ensure the block is cleared to all 0's
  3467. static_cast<uint64_t*>(pDst)[0] = 0;
  3468. static_cast<uint64_t*>(pDst)[1] = 0;
  3469. // Set alpha to 255
  3470. pDst_block->m_lo.m_mode = 1 << 5;
  3471. pDst_block->m_lo.m_a0 = 255;
  3472. pDst_block->m_lo.m_a1_0 = 63;
  3473. pDst_block->m_hi.m_a1_1 = 3;
  3474. const uint32_t low_selector = pSelector->m_lo_selector;
  3475. const uint32_t high_selector = pSelector->m_hi_selector;
  3476. const uint32_t base_color_r = pEndpoints->m_color5.r;
  3477. const uint32_t base_color_g = pEndpoints->m_color5.g;
  3478. const uint32_t base_color_b = pEndpoints->m_color5.b;
  3479. const uint32_t inten_table = pEndpoints->m_inten5;
  3480. if (pSelector->m_num_unique_selectors == 1)
  3481. {
  3482. // Solid color block - use precomputed tables and set selectors to 1.
  3483. uint32_t r, g, b;
  3484. decoder_etc_block::get_block_color5(pEndpoints->m_color5, inten_table, low_selector, r, g, b);
  3485. pDst_block->m_lo.m_r0 = g_bc7_m5_equals_1[r].m_lo;
  3486. pDst_block->m_lo.m_g0 = g_bc7_m5_equals_1[g].m_lo;
  3487. pDst_block->m_lo.m_b0 = g_bc7_m5_equals_1[b].m_lo;
  3488. pDst_block->m_lo.m_r1 = g_bc7_m5_equals_1[r].m_hi;
  3489. pDst_block->m_lo.m_g1 = g_bc7_m5_equals_1[g].m_hi;
  3490. pDst_block->m_lo.m_b1 = g_bc7_m5_equals_1[b].m_hi;
  3491. set_block_bits((uint8_t*)pDst, 0x2aaaaaab, 31, 66);
  3492. return;
  3493. }
  3494. else if (pSelector->m_num_unique_selectors == 2)
  3495. {
  3496. // Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
  3497. color32 block_colors[4];
  3498. decoder_etc_block::get_block_colors5(block_colors, color32(base_color_r, base_color_g, base_color_b, 255), inten_table);
  3499. const uint32_t r0 = block_colors[low_selector].r;
  3500. const uint32_t g0 = block_colors[low_selector].g;
  3501. const uint32_t b0 = block_colors[low_selector].b;
  3502. const uint32_t r1 = block_colors[high_selector].r;
  3503. const uint32_t g1 = block_colors[high_selector].g;
  3504. const uint32_t b1 = block_colors[high_selector].b;
  3505. pDst_block->m_lo.m_r0 = r0 >> 1;
  3506. pDst_block->m_lo.m_g0 = g0 >> 1;
  3507. pDst_block->m_lo.m_b0 = b0 >> 1;
  3508. pDst_block->m_lo.m_r1 = r1 >> 1;
  3509. pDst_block->m_lo.m_g1 = g1 >> 1;
  3510. pDst_block->m_lo.m_b1 = b1 >> 1;
  3511. uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
  3512. for (uint32_t y = 0; y < 4; y++)
  3513. {
  3514. for (uint32_t x = 0; x < 4; x++)
  3515. {
  3516. uint32_t s = pSelector->get_selector(x, y);
  3517. uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
  3518. uint32_t num_bits = 2;
  3519. if ((x | y) == 0)
  3520. {
  3521. if (os & 2)
  3522. {
  3523. pDst_block->m_lo.m_r0 = r1 >> 1;
  3524. pDst_block->m_lo.m_g0 = g1 >> 1;
  3525. pDst_block->m_lo.m_b0 = b1 >> 1;
  3526. pDst_block->m_lo.m_r1 = r0 >> 1;
  3527. pDst_block->m_lo.m_g1 = g0 >> 1;
  3528. pDst_block->m_lo.m_b1 = b0 >> 1;
  3529. output_low_selector = 3;
  3530. os = 0;
  3531. }
  3532. num_bits = 1;
  3533. }
  3534. output_bits |= (os << output_bit_offset);
  3535. output_bit_offset += num_bits;
  3536. }
  3537. }
  3538. set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
  3539. return;
  3540. }
  3541. const uint32_t selector_range_table = g_etc1_to_bc7_m5_selector_range_index[low_selector][high_selector];
  3542. //[32][8][RANGES][MAPPING]
  3543. const etc1_to_bc7_m5_solution* pTable_r = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_r) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
  3544. const etc1_to_bc7_m5_solution* pTable_g = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_g) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
  3545. const etc1_to_bc7_m5_solution* pTable_b = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_b) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
  3546. uint32_t best_err = UINT_MAX;
  3547. uint32_t best_mapping = 0;
  3548. assert(NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS == 10);
  3549. #define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
  3550. DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
  3551. DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
  3552. #undef DO_ITER
  3553. const uint8_t* pSelectors_xlat = &g_etc1_to_bc7_m5_selector_mappings[best_mapping][0];
  3554. uint32_t s_inv = 0;
  3555. if (pSelectors_xlat[pSelector->get_selector(0, 0)] & 2)
  3556. {
  3557. pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_hi;
  3558. pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_hi;
  3559. pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_hi;
  3560. pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo;
  3561. pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo;
  3562. pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo;
  3563. s_inv = 3;
  3564. }
  3565. else
  3566. {
  3567. pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_lo;
  3568. pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_lo;
  3569. pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_lo;
  3570. pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_hi;
  3571. pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_hi;
  3572. pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_hi;
  3573. }
  3574. uint32_t output_bits = 0, output_bit_ofs = 0;
  3575. for (uint32_t y = 0; y < 4; y++)
  3576. {
  3577. for (uint32_t x = 0; x < 4; x++)
  3578. {
  3579. const uint32_t s = pSelector->get_selector(x, y);
  3580. const uint32_t os = pSelectors_xlat[s] ^ s_inv;
  3581. output_bits |= (os << output_bit_ofs);
  3582. output_bit_ofs += (((x | y) == 0) ? 1 : 2);
  3583. }
  3584. }
  3585. set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
  3586. }
  3587. static void convert_etc1s_to_bc7_m5_alpha(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
  3588. {
  3589. bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
  3590. const uint32_t low_selector = pSelector->m_lo_selector;
  3591. const uint32_t high_selector = pSelector->m_hi_selector;
  3592. const uint32_t base_color_r = pEndpoints->m_color5.r;
  3593. const uint32_t inten_table = pEndpoints->m_inten5;
  3594. if (pSelector->m_num_unique_selectors == 1)
  3595. {
  3596. uint32_t r;
  3597. decoder_etc_block::get_block_color5_r(pEndpoints->m_color5, inten_table, low_selector, r);
  3598. pDst_block->m_lo.m_a0 = r;
  3599. pDst_block->m_lo.m_a1_0 = r & 63;
  3600. pDst_block->m_hi.m_a1_1 = r >> 6;
  3601. return;
  3602. }
  3603. else if (pSelector->m_num_unique_selectors == 2)
  3604. {
  3605. // Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
  3606. int block_colors[4];
  3607. decoder_etc_block::get_block_colors5_g(block_colors, pEndpoints->m_color5, inten_table);
  3608. pDst_block->m_lo.m_a0 = block_colors[low_selector];
  3609. pDst_block->m_lo.m_a1_0 = block_colors[high_selector] & 63;
  3610. pDst_block->m_hi.m_a1_1 = block_colors[high_selector] >> 6;
  3611. uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
  3612. for (uint32_t y = 0; y < 4; y++)
  3613. {
  3614. for (uint32_t x = 0; x < 4; x++)
  3615. {
  3616. const uint32_t s = pSelector->get_selector(x, y);
  3617. uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
  3618. uint32_t num_bits = 2;
  3619. if ((x | y) == 0)
  3620. {
  3621. if (os & 2)
  3622. {
  3623. pDst_block->m_lo.m_a0 = block_colors[high_selector];
  3624. pDst_block->m_lo.m_a1_0 = block_colors[low_selector] & 63;
  3625. pDst_block->m_hi.m_a1_1 = block_colors[low_selector] >> 6;
  3626. output_low_selector = 3;
  3627. os = 0;
  3628. }
  3629. num_bits = 1;
  3630. }
  3631. output_bits |= (os << output_bit_offset);
  3632. output_bit_offset += num_bits;
  3633. }
  3634. }
  3635. set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
  3636. return;
  3637. }
  3638. const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector];
  3639. const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table];
  3640. pDst_block->m_lo.m_a0 = pTable->m_lo;
  3641. pDst_block->m_lo.m_a1_0 = pTable->m_hi & 63;
  3642. pDst_block->m_hi.m_a1_1 = pTable->m_hi >> 6;
  3643. uint32_t output_bit_offset = 0, output_bits = 0, selector_trans = pTable->m_trans;
  3644. for (uint32_t y = 0; y < 4; y++)
  3645. {
  3646. for (uint32_t x = 0; x < 4; x++)
  3647. {
  3648. const uint32_t s = pSelector->get_selector(x, y);
  3649. uint32_t os = (selector_trans >> (s * 2)) & 3;
  3650. uint32_t num_bits = 2;
  3651. if ((x | y) == 0)
  3652. {
  3653. if (os & 2)
  3654. {
  3655. pDst_block->m_lo.m_a0 = pTable->m_hi;
  3656. pDst_block->m_lo.m_a1_0 = pTable->m_lo & 63;
  3657. pDst_block->m_hi.m_a1_1 = pTable->m_lo >> 6;
  3658. selector_trans ^= 0xFF;
  3659. os ^= 3;
  3660. }
  3661. num_bits = 1;
  3662. }
  3663. output_bits |= (os << output_bit_offset);
  3664. output_bit_offset += num_bits;
  3665. }
  3666. }
  3667. set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
  3668. }
  3669. #endif // BASISD_SUPPORT_BC7_MODE5
  3670. #if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_UASTC
  3671. static const uint8_t g_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
  3672. #endif
  3673. #if BASISD_SUPPORT_ETC2_EAC_A8
  3674. static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
  3675. {
  3676. const uint32_t low_selector = pSelector->m_lo_selector;
  3677. const uint32_t high_selector = pSelector->m_hi_selector;
  3678. const color32& base_color = pEndpoints->m_color5;
  3679. const uint32_t inten_table = pEndpoints->m_inten5;
  3680. if (low_selector == high_selector)
  3681. {
  3682. uint32_t r;
  3683. decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
  3684. // Constant alpha block
  3685. // Select table 13, use selector 4 (0), set multiplier to 1 and base color g
  3686. pDst_block->m_base = r;
  3687. pDst_block->m_table = 13;
  3688. pDst_block->m_multiplier = 1;
  3689. // selectors are all 4's
  3690. memcpy(pDst_block->m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
  3691. return;
  3692. }
  3693. uint32_t selector_range_table = 0;
  3694. for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
  3695. if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
  3696. break;
  3697. if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
  3698. selector_range_table = 0;
  3699. const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_a8[base_color.r + inten_table * 32][selector_range_table];
  3700. pDst_block->m_base = pTable_entry->m_base;
  3701. pDst_block->m_table = pTable_entry->m_table_mul >> 4;
  3702. pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
  3703. uint64_t selector_bits = 0;
  3704. for (uint32_t y = 0; y < 4; y++)
  3705. {
  3706. for (uint32_t x = 0; x < 4; x++)
  3707. {
  3708. uint32_t s = pSelector->get_selector(x, y);
  3709. uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
  3710. const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
  3711. selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
  3712. }
  3713. }
  3714. pDst_block->set_selector_bits(selector_bits);
  3715. }
  3716. #endif // BASISD_SUPPORT_ETC2_EAC_A8
  3717. #if BASISD_SUPPORT_ETC2_EAC_RG11
  3718. static const etc1_g_to_eac_conversion s_etc1_g_to_etc2_r11[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
  3719. {
  3720. {{0,1,3328},{0,1,3328},{0,16,457},{0,16,456}},
  3721. {{0,226,3936},{0,226,3936},{0,17,424},{8,0,472}},
  3722. {{6,178,4012},{6,178,4008},{0,146,501},{16,0,472}},
  3723. {{14,178,4012},{14,178,4008},{8,146,501},{24,0,472}},
  3724. {{23,178,4012},{23,178,4008},{17,146,501},{33,0,472}},
  3725. {{31,178,4012},{31,178,4008},{25,146,501},{41,0,472}},
  3726. {{39,178,4012},{39,178,4008},{33,146,501},{49,0,472}},
  3727. {{47,178,4012},{47,178,4008},{41,146,501},{27,228,496}},
  3728. {{56,178,4012},{56,178,4008},{50,146,501},{36,228,496}},
  3729. {{64,178,4012},{64,178,4008},{58,146,501},{44,228,496}},
  3730. {{72,178,4012},{72,178,4008},{66,146,501},{52,228,496}},
  3731. {{80,178,4012},{80,178,4008},{74,146,501},{60,228,496}},
  3732. {{89,178,4012},{89,178,4008},{83,146,501},{69,228,496}},
  3733. {{97,178,4012},{97,178,4008},{91,146,501},{77,228,496}},
  3734. {{105,178,4012},{105,178,4008},{99,146,501},{85,228,496}},
  3735. {{113,178,4012},{113,178,4008},{107,146,501},{93,228,496}},
  3736. {{122,178,4012},{122,178,4008},{116,146,501},{102,228,496}},
  3737. {{130,178,4012},{130,178,4008},{124,146,501},{110,228,496}},
  3738. {{138,178,4012},{138,178,4008},{132,146,501},{118,228,496}},
  3739. {{146,178,4012},{146,178,4008},{140,146,501},{126,228,496}},
  3740. {{155,178,4012},{155,178,4008},{149,146,501},{135,228,496}},
  3741. {{163,178,4012},{163,178,4008},{157,146,501},{143,228,496}},
  3742. {{171,178,4012},{171,178,4008},{165,146,501},{151,228,496}},
  3743. {{179,178,4012},{179,178,4008},{173,146,501},{159,228,496}},
  3744. {{188,178,4012},{188,178,4008},{182,146,501},{168,228,496}},
  3745. {{196,178,4012},{196,178,4008},{190,146,501},{176,228,496}},
  3746. {{204,178,4012},{204,178,4008},{198,146,501},{184,228,496}},
  3747. {{212,178,4012},{212,178,4008},{206,146,501},{192,228,496}},
  3748. {{221,178,4012},{221,178,4008},{215,146,501},{201,228,496}},
  3749. {{229,178,4012},{229,178,4008},{223,146,501},{209,228,496}},
  3750. {{235,66,4012},{221,100,4008},{231,146,501},{217,228,496}},
  3751. {{211,102,4085},{254,32,4040},{211,102,501},{254,32,456}},
  3752. {{0,2,3328},{0,2,3328},{0,1,320},{0,1,320}},
  3753. {{7,162,3905},{7,162,3904},{0,17,480},{0,17,480}},
  3754. {{15,162,3906},{15,162,3904},{1,117,352},{1,117,352}},
  3755. {{23,162,3906},{23,162,3904},{5,34,500},{4,53,424}},
  3756. {{32,162,3906},{32,162,3904},{14,34,500},{3,69,424}},
  3757. {{40,162,3906},{40,162,3904},{22,34,500},{1,133,496}},
  3758. {{48,162,3906},{48,162,3904},{30,34,500},{4,85,496}},
  3759. {{56,162,3906},{56,162,3904},{38,34,500},{12,85,496}},
  3760. {{65,162,3906},{65,162,3904},{47,34,500},{1,106,424}},
  3761. {{73,162,3906},{73,162,3904},{55,34,500},{9,106,424}},
  3762. {{81,162,3906},{81,162,3904},{63,34,500},{7,234,496}},
  3763. {{89,162,3906},{89,162,3904},{71,34,500},{15,234,496}},
  3764. {{98,162,3906},{98,162,3904},{80,34,500},{24,234,496}},
  3765. {{106,162,3906},{106,162,3904},{88,34,500},{32,234,496}},
  3766. {{114,162,3906},{114,162,3904},{96,34,500},{40,234,496}},
  3767. {{122,162,3906},{122,162,3904},{104,34,500},{48,234,496}},
  3768. {{131,162,3906},{131,162,3904},{113,34,500},{57,234,496}},
  3769. {{139,162,3906},{139,162,3904},{121,34,500},{65,234,496}},
  3770. {{147,162,3906},{147,162,3904},{129,34,500},{73,234,496}},
  3771. {{155,162,3906},{155,162,3904},{137,34,500},{81,234,496}},
  3772. {{164,162,3906},{164,162,3904},{146,34,500},{90,234,496}},
  3773. {{172,162,3906},{172,162,3904},{154,34,500},{98,234,496}},
  3774. {{180,162,3906},{180,162,3904},{162,34,500},{106,234,496}},
  3775. {{188,162,3906},{188,162,3904},{170,34,500},{114,234,496}},
  3776. {{197,162,3906},{197,162,3904},{179,34,500},{123,234,496}},
  3777. {{205,162,3906},{205,162,3904},{187,34,500},{131,234,496}},
  3778. {{213,162,3906},{213,162,3904},{195,34,500},{139,234,496}},
  3779. {{221,162,3906},{221,162,3904},{203,34,500},{147,234,496}},
  3780. {{230,162,3906},{230,162,3904},{212,34,500},{156,234,496}},
  3781. {{238,162,3906},{174,106,4008},{220,34,500},{164,234,496}},
  3782. {{240,178,4001},{182,106,4008},{228,34,500},{172,234,496}},
  3783. {{166,108,4085},{115,31,4080},{166,108,501},{115,31,496}},
  3784. {{1,68,3328},{1,68,3328},{0,1,384},{0,1,384}},
  3785. {{1,51,3968},{1,51,3968},{0,2,384},{0,2,384}},
  3786. {{21,18,3851},{21,18,3848},{1,50,488},{1,50,488}},
  3787. {{26,195,3851},{29,18,3848},{0,67,488},{0,67,488}},
  3788. {{35,195,3851},{38,18,3848},{12,115,488},{0,3,496}},
  3789. {{43,195,3851},{46,18,3848},{20,115,488},{2,6,424}},
  3790. {{51,195,3851},{54,18,3848},{36,66,482},{4,22,424}},
  3791. {{59,195,3851},{62,18,3848},{44,66,482},{3,73,424}},
  3792. {{68,195,3851},{71,18,3848},{53,66,482},{3,22,496}},
  3793. {{76,195,3851},{79,18,3848},{61,66,482},{2,137,496}},
  3794. {{84,195,3851},{87,18,3848},{69,66,482},{1,89,496}},
  3795. {{92,195,3851},{95,18,3848},{77,66,482},{9,89,496}},
  3796. {{101,195,3851},{104,18,3848},{86,66,482},{18,89,496}},
  3797. {{109,195,3851},{112,18,3848},{94,66,482},{26,89,496}},
  3798. {{117,195,3851},{120,18,3848},{102,66,482},{34,89,496}},
  3799. {{125,195,3851},{128,18,3848},{110,66,482},{42,89,496}},
  3800. {{134,195,3851},{137,18,3848},{119,66,482},{51,89,496}},
  3801. {{141,195,3907},{145,18,3848},{127,66,482},{59,89,496}},
  3802. {{149,195,3907},{153,18,3848},{135,66,482},{67,89,496}},
  3803. {{157,195,3907},{161,18,3848},{143,66,482},{75,89,496}},
  3804. {{166,195,3907},{170,18,3848},{152,66,482},{84,89,496}},
  3805. {{174,195,3907},{178,18,3848},{160,66,482},{92,89,496}},
  3806. {{182,195,3907},{186,18,3848},{168,66,482},{100,89,496}},
  3807. {{190,195,3907},{194,18,3848},{176,66,482},{108,89,496}},
  3808. {{199,195,3907},{203,18,3848},{185,66,482},{117,89,496}},
  3809. {{207,195,3907},{211,18,3848},{193,66,482},{125,89,496}},
  3810. {{215,195,3907},{219,18,3848},{201,66,482},{133,89,496}},
  3811. {{223,195,3907},{227,18,3848},{209,66,482},{141,89,496}},
  3812. {{232,195,3907},{168,89,4008},{218,66,482},{150,89,496}},
  3813. {{236,18,3907},{176,89,4008},{226,66,482},{158,89,496}},
  3814. {{158,90,4085},{103,31,4080},{158,90,501},{103,31,496}},
  3815. {{166,90,4085},{111,31,4080},{166,90,501},{111,31,496}},
  3816. {{0,70,3328},{0,70,3328},{0,17,448},{0,17,448}},
  3817. {{0,117,3904},{0,117,3904},{0,35,384},{0,35,384}},
  3818. {{13,165,3905},{13,165,3904},{2,211,480},{2,211,480}},
  3819. {{21,165,3906},{21,165,3904},{1,51,488},{1,51,488}},
  3820. {{30,165,3906},{30,165,3904},{7,61,352},{7,61,352}},
  3821. {{38,165,3906},{38,165,3904},{2,125,352},{2,125,352}},
  3822. {{46,165,3906},{46,165,3904},{1,37,500},{10,125,352}},
  3823. {{54,165,3906},{54,165,3904},{9,37,500},{5,61,424}},
  3824. {{63,165,3906},{63,165,3904},{18,37,500},{1,189,424}},
  3825. {{71,165,3906},{71,165,3904},{26,37,500},{9,189,424}},
  3826. {{79,165,3906},{79,165,3904},{34,37,500},{4,77,424}},
  3827. {{87,165,3906},{87,165,3904},{42,37,500},{12,77,424}},
  3828. {{96,165,3906},{96,165,3904},{51,37,500},{8,93,424}},
  3829. {{104,165,3906},{104,165,3904},{59,37,500},{3,141,496}},
  3830. {{112,165,3906},{112,165,3904},{68,37,500},{11,141,496}},
  3831. {{120,165,3906},{120,165,3904},{76,37,500},{6,93,496}},
  3832. {{129,165,3906},{129,165,3904},{85,37,500},{15,93,496}},
  3833. {{70,254,4012},{137,165,3904},{93,37,500},{23,93,496}},
  3834. {{145,165,3906},{145,165,3904},{101,37,500},{31,93,496}},
  3835. {{86,254,4012},{153,165,3904},{109,37,500},{39,93,496}},
  3836. {{163,165,3906},{162,165,3904},{118,37,500},{48,93,496}},
  3837. {{171,165,3906},{170,165,3904},{126,37,500},{56,93,496}},
  3838. {{179,165,3906},{178,165,3904},{134,37,500},{64,93,496}},
  3839. {{187,165,3906},{187,165,3904},{142,37,500},{72,93,496}},
  3840. {{196,165,3906},{196,165,3904},{151,37,500},{81,93,496}},
  3841. {{204,165,3906},{204,165,3904},{159,37,500},{89,93,496}},
  3842. {{212,165,3906},{136,77,4008},{167,37,500},{97,93,496}},
  3843. {{220,165,3906},{131,93,4008},{175,37,500},{105,93,496}},
  3844. {{214,181,4001},{140,93,4008},{184,37,500},{114,93,496}},
  3845. {{222,181,4001},{148,93,4008},{192,37,500},{122,93,496}},
  3846. {{115,95,4085},{99,31,4080},{115,95,501},{99,31,496}},
  3847. {{123,95,4085},{107,31,4080},{123,95,501},{107,31,496}},
  3848. {{0,102,3840},{0,102,3840},{0,18,384},{0,18,384}},
  3849. {{5,167,3904},{5,167,3904},{0,13,256},{0,13,256}},
  3850. {{4,54,3968},{4,54,3968},{1,67,448},{1,67,448}},
  3851. {{30,198,3850},{30,198,3848},{0,3,480},{0,3,480}},
  3852. {{39,198,3850},{39,198,3848},{3,52,488},{3,52,488}},
  3853. {{47,198,3851},{47,198,3848},{3,4,488},{3,4,488}},
  3854. {{55,198,3851},{55,198,3848},{1,70,488},{1,70,488}},
  3855. {{53,167,3906},{63,198,3848},{3,22,488},{3,22,488}},
  3856. {{62,167,3906},{72,198,3848},{24,118,488},{0,6,496}},
  3857. {{70,167,3906},{80,198,3848},{32,118,488},{2,89,488}},
  3858. {{78,167,3906},{88,198,3848},{40,118,488},{1,73,496}},
  3859. {{86,167,3906},{96,198,3848},{48,118,488},{0,28,424}},
  3860. {{95,167,3906},{105,198,3848},{57,118,488},{9,28,424}},
  3861. {{103,167,3906},{113,198,3848},{65,118,488},{5,108,496}},
  3862. {{111,167,3906},{121,198,3848},{73,118,488},{13,108,496}},
  3863. {{119,167,3906},{129,198,3848},{81,118,488},{21,108,496}},
  3864. {{128,167,3906},{138,198,3848},{90,118,488},{6,28,496}},
  3865. {{136,167,3906},{146,198,3848},{98,118,488},{14,28,496}},
  3866. {{145,167,3906},{154,198,3848},{106,118,488},{22,28,496}},
  3867. {{153,167,3906},{162,198,3848},{114,118,488},{30,28,496}},
  3868. {{162,167,3906},{171,198,3848},{123,118,488},{39,28,496}},
  3869. {{170,167,3906},{179,198,3848},{131,118,488},{47,28,496}},
  3870. {{178,167,3906},{187,198,3848},{139,118,488},{55,28,496}},
  3871. {{186,167,3906},{195,198,3848},{147,118,488},{63,28,496}},
  3872. {{194,167,3906},{120,12,4008},{156,118,488},{72,28,496}},
  3873. {{206,198,3907},{116,28,4008},{164,118,488},{80,28,496}},
  3874. {{214,198,3907},{124,28,4008},{172,118,488},{88,28,496}},
  3875. {{222,198,3395},{132,28,4008},{180,118,488},{96,28,496}},
  3876. {{207,134,4001},{141,28,4008},{189,118,488},{105,28,496}},
  3877. {{95,30,4085},{86,31,4080},{95,30,501},{86,31,496}},
  3878. {{103,30,4085},{94,31,4080},{103,30,501},{94,31,496}},
  3879. {{111,30,4085},{102,31,4080},{111,30,501},{102,31,496}},
  3880. {{0,104,3840},{0,104,3840},{0,18,448},{0,18,448}},
  3881. {{4,39,3904},{4,39,3904},{0,4,384},{0,4,384}},
  3882. {{0,56,3968},{0,56,3968},{0,84,448},{0,84,448}},
  3883. {{6,110,3328},{6,110,3328},{0,20,448},{0,20,448}},
  3884. {{41,200,3850},{41,200,3848},{1,4,480},{1,4,480}},
  3885. {{49,200,3850},{49,200,3848},{1,8,416},{1,8,416}},
  3886. {{57,200,3851},{57,200,3848},{1,38,488},{1,38,488}},
  3887. {{65,200,3851},{65,200,3848},{1,120,488},{1,120,488}},
  3888. {{74,200,3851},{74,200,3848},{2,72,488},{2,72,488}},
  3889. {{68,6,3907},{82,200,3848},{2,24,488},{2,24,488}},
  3890. {{77,6,3907},{90,200,3848},{26,120,488},{10,24,488}},
  3891. {{97,63,3330},{98,200,3848},{34,120,488},{2,8,496}},
  3892. {{106,63,3330},{107,200,3848},{43,120,488},{3,92,488}},
  3893. {{114,63,3330},{115,200,3848},{51,120,488},{11,92,488}},
  3894. {{122,63,3330},{123,200,3848},{59,120,488},{7,76,496}},
  3895. {{130,63,3330},{131,200,3848},{67,120,488},{15,76,496}},
  3896. {{139,63,3330},{140,200,3848},{76,120,488},{24,76,496}},
  3897. {{147,63,3330},{148,200,3848},{84,120,488},{32,76,496}},
  3898. {{155,63,3330},{156,200,3848},{92,120,488},{40,76,496}},
  3899. {{164,63,3330},{164,200,3848},{100,120,488},{48,76,496}},
  3900. {{173,63,3330},{173,200,3848},{109,120,488},{57,76,496}},
  3901. {{184,6,3851},{181,200,3848},{117,120,488},{65,76,496}},
  3902. {{192,6,3851},{133,28,3936},{125,120,488},{73,76,496}},
  3903. {{189,200,3907},{141,28,3936},{133,120,488},{81,76,496}},
  3904. {{198,200,3907},{138,108,4000},{142,120,488},{90,76,496}},
  3905. {{206,200,3907},{146,108,4000},{150,120,488},{98,76,496}},
  3906. {{214,200,3395},{154,108,4000},{158,120,488},{106,76,496}},
  3907. {{190,136,4001},{162,108,4000},{166,120,488},{114,76,496}},
  3908. {{123,30,4076},{87,15,4080},{123,30,492},{87,15,496}},
  3909. {{117,110,4084},{80,31,4080},{117,110,500},{80,31,496}},
  3910. {{125,110,4084},{88,31,4080},{125,110,500},{88,31,496}},
  3911. {{133,110,4084},{96,31,4080},{133,110,500},{96,31,496}},
  3912. {{9,56,3904},{9,56,3904},{0,67,448},{0,67,448}},
  3913. {{1,8,3904},{1,8,3904},{1,84,448},{1,84,448}},
  3914. {{1,124,3904},{1,124,3904},{0,39,384},{0,39,384}},
  3915. {{9,124,3904},{9,124,3904},{1,4,448},{1,4,448}},
  3916. {{6,76,3904},{6,76,3904},{0,70,448},{0,70,448}},
  3917. {{62,6,3859},{62,6,3856},{2,38,480},{2,38,480}},
  3918. {{70,6,3859},{70,6,3856},{5,43,416},{5,43,416}},
  3919. {{78,6,3859},{78,6,3856},{2,11,416},{2,11,416}},
  3920. {{87,6,3859},{87,6,3856},{0,171,488},{0,171,488}},
  3921. {{67,8,3906},{95,6,3856},{8,171,488},{8,171,488}},
  3922. {{75,8,3907},{103,6,3856},{5,123,488},{5,123,488}},
  3923. {{83,8,3907},{111,6,3856},{2,75,488},{2,75,488}},
  3924. {{92,8,3907},{120,6,3856},{0,27,488},{0,27,488}},
  3925. {{100,8,3907},{128,6,3856},{8,27,488},{8,27,488}},
  3926. {{120,106,3843},{136,6,3856},{99,6,387},{16,27,488}},
  3927. {{128,106,3843},{144,6,3856},{107,6,387},{2,11,496}},
  3928. {{137,106,3843},{153,6,3856},{117,6,387},{11,11,496}},
  3929. {{145,106,3843},{161,6,3856},{125,6,387},{19,11,496}},
  3930. {{163,8,3851},{137,43,3904},{133,6,387},{27,11,496}},
  3931. {{171,8,3851},{145,43,3904},{141,6,387},{35,11,496}},
  3932. {{180,8,3851},{110,11,4000},{150,6,387},{44,11,496}},
  3933. {{188,8,3851},{118,11,4000},{158,6,387},{52,11,496}},
  3934. {{172,72,3907},{126,11,4000},{166,6,387},{60,11,496}},
  3935. {{174,6,3971},{134,11,4000},{174,6,387},{68,11,496}},
  3936. {{183,6,3971},{143,11,4000},{183,6,387},{77,11,496}},
  3937. {{191,6,3971},{151,11,4000},{191,6,387},{85,11,496}},
  3938. {{199,6,3971},{159,11,4000},{199,6,387},{93,11,496}},
  3939. {{92,12,4084},{69,15,4080},{92,12,500},{69,15,496}},
  3940. {{101,12,4084},{78,15,4080},{101,12,500},{78,15,496}},
  3941. {{110,12,4084},{86,15,4080},{110,12,500},{86,15,496}},
  3942. {{118,12,4084},{79,31,4080},{118,12,500},{79,31,496}},
  3943. {{126,12,4084},{87,31,4080},{126,12,500},{87,31,496}},
  3944. {{71,8,3602},{71,8,3600},{2,21,384},{2,21,384}},
  3945. {{79,8,3611},{79,8,3608},{0,69,448},{0,69,448}},
  3946. {{87,8,3611},{87,8,3608},{0,23,384},{0,23,384}},
  3947. {{95,8,3611},{95,8,3608},{1,5,448},{1,5,448}},
  3948. {{104,8,3611},{104,8,3608},{0,88,448},{0,88,448}},
  3949. {{112,8,3611},{112,8,3608},{0,72,448},{0,72,448}},
  3950. {{120,8,3611},{121,8,3608},{36,21,458},{36,21,456}},
  3951. {{133,47,3091},{129,8,3608},{44,21,458},{44,21,456}},
  3952. {{142,47,3091},{138,8,3608},{53,21,459},{53,21,456}},
  3953. {{98,12,3850},{98,12,3848},{61,21,459},{61,21,456}},
  3954. {{106,12,3850},{106,12,3848},{10,92,480},{69,21,456}},
  3955. {{114,12,3851},{114,12,3848},{18,92,480},{77,21,456}},
  3956. {{123,12,3851},{123,12,3848},{3,44,488},{86,21,456}},
  3957. {{95,12,3906},{95,12,3904},{11,44,488},{94,21,456}},
  3958. {{103,12,3906},{103,12,3904},{19,44,488},{102,21,456}},
  3959. {{111,12,3907},{111,12,3904},{27,44,489},{110,21,456}},
  3960. {{120,12,3907},{120,12,3904},{36,44,489},{119,21,456}},
  3961. {{128,12,3907},{128,12,3904},{44,44,489},{127,21,456}},
  3962. {{136,12,3907},{136,12,3904},{52,44,489},{135,21,456}},
  3963. {{144,12,3907},{144,12,3904},{60,44,490},{144,21,456}},
  3964. {{153,12,3907},{153,12,3904},{69,44,490},{153,21,456}},
  3965. {{161,12,3395},{149,188,3968},{77,44,490},{161,21,456}},
  3966. {{169,12,3395},{199,21,3928},{85,44,490},{169,21,456}},
  3967. {{113,95,4001},{202,69,3992},{125,8,483},{177,21,456}},
  3968. {{122,95,4001},{201,21,3984},{134,8,483},{186,21,456}},
  3969. {{143,8,4067},{209,21,3984},{142,8,483},{194,21,456}},
  3970. {{151,8,4067},{47,15,4080},{151,8,483},{47,15,496}},
  3971. {{159,8,4067},{55,15,4080},{159,8,483},{55,15,496}},
  3972. {{168,8,4067},{64,15,4080},{168,8,483},{64,15,496}},
  3973. {{160,40,4075},{72,15,4080},{160,40,491},{72,15,496}},
  3974. {{168,40,4075},{80,15,4080},{168,40,491},{80,15,496}},
  3975. {{144,8,4082},{88,15,4080},{144,8,498},{88,15,496}},
  3976. };
  3977. static void convert_etc1s_to_etc2_eac_r11(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
  3978. {
  3979. const uint32_t low_selector = pSelector->m_lo_selector;
  3980. const uint32_t high_selector = pSelector->m_hi_selector;
  3981. const color32& base_color = pEndpoints->m_color5;
  3982. const uint32_t inten_table = pEndpoints->m_inten5;
  3983. if (low_selector == high_selector)
  3984. {
  3985. uint32_t r;
  3986. decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
  3987. // Constant alpha block
  3988. // Select table 13, use selector 4 (0), set multiplier to 1 and base color r
  3989. pDst_block->m_base = r;
  3990. pDst_block->m_table = 13;
  3991. pDst_block->m_multiplier = 1;
  3992. // selectors are all 4's
  3993. static const uint8_t s_etc2_eac_r11_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
  3994. memcpy(pDst_block->m_selectors, s_etc2_eac_r11_sel4, sizeof(s_etc2_eac_r11_sel4));
  3995. return;
  3996. }
  3997. uint32_t selector_range_table = 0;
  3998. for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
  3999. if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
  4000. break;
  4001. if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
  4002. selector_range_table = 0;
  4003. const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_r11[base_color.r + inten_table * 32][selector_range_table];
  4004. pDst_block->m_base = pTable_entry->m_base;
  4005. pDst_block->m_table = pTable_entry->m_table_mul >> 4;
  4006. pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
  4007. uint64_t selector_bits = 0;
  4008. for (uint32_t y = 0; y < 4; y++)
  4009. {
  4010. for (uint32_t x = 0; x < 4; x++)
  4011. {
  4012. uint32_t s = pSelector->get_selector(x, y);
  4013. uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
  4014. const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
  4015. selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
  4016. }
  4017. }
  4018. pDst_block->set_selector_bits(selector_bits);
  4019. }
  4020. #endif // BASISD_SUPPORT_ETC2_EAC_RG11
  4021. // ASTC
  4022. struct etc1_to_astc_solution
  4023. {
  4024. uint8_t m_lo;
  4025. uint8_t m_hi;
  4026. uint16_t m_err;
  4027. };
  4028. #if BASISD_SUPPORT_ASTC
  4029. static dxt_selector_range g_etc1_to_astc_selector_ranges[] =
  4030. {
  4031. { 0, 3 },
  4032. { 1, 3 },
  4033. { 0, 2 },
  4034. { 1, 2 },
  4035. { 2, 3 },
  4036. { 0, 1 },
  4037. };
  4038. const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_RANGES = sizeof(g_etc1_to_astc_selector_ranges) / sizeof(g_etc1_to_astc_selector_ranges[0]);
  4039. static uint32_t g_etc1_to_astc_selector_range_index[4][4];
  4040. const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS = 10;
  4041. static const uint8_t g_etc1_to_astc_selector_mappings[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS][4] =
  4042. {
  4043. { 0, 0, 1, 1 },
  4044. { 0, 0, 1, 2 },
  4045. { 0, 0, 1, 3 },
  4046. { 0, 0, 2, 3 },
  4047. { 0, 1, 1, 1 },
  4048. { 0, 1, 2, 2 },
  4049. { 0, 1, 2, 3 },
  4050. { 0, 2, 3, 3 },
  4051. { 1, 2, 2, 2 },
  4052. { 1, 2, 3, 3 },
  4053. };
  4054. static const etc1_to_astc_solution g_etc1_to_astc[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
  4055. #include "basisu_transcoder_tables_astc.inc"
  4056. };
  4057. // The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data.
  4058. static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
  4059. #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
  4060. static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
  4061. #include "basisu_transcoder_tables_astc_0_255.inc"
  4062. };
  4063. static uint8_t g_etc1_to_astc_best_grayscale_mapping_0_255[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
  4064. #endif
  4065. static uint32_t g_ise_to_unquant[48];
  4066. #if BASISD_WRITE_NEW_ASTC_TABLES
  4067. static void create_etc1_to_astc_conversion_table_0_47()
  4068. {
  4069. FILE* pFile = nullptr;
  4070. fopen_s(&pFile, "basisu_transcoder_tables_astc.inc", "w");
  4071. uint32_t n = 0;
  4072. for (int inten = 0; inten < 8; inten++)
  4073. {
  4074. for (uint32_t g = 0; g < 32; g++)
  4075. {
  4076. color32 block_colors[4];
  4077. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  4078. for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
  4079. {
  4080. const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
  4081. const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
  4082. uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4083. uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4084. uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4085. uint64_t highest_best_err = 0;
  4086. for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
  4087. {
  4088. uint32_t best_lo = 0;
  4089. uint32_t best_hi = 0;
  4090. uint64_t best_err = UINT64_MAX;
  4091. for (uint32_t hi = 0; hi <= 47; hi++)
  4092. {
  4093. for (uint32_t lo = 0; lo <= 47; lo++)
  4094. {
  4095. uint32_t colors[4];
  4096. for (uint32_t s = 0; s < 4; s++)
  4097. {
  4098. uint32_t s_scaled = s | (s << 2) | (s << 4);
  4099. if (s_scaled > 32)
  4100. s_scaled++;
  4101. uint32_t c0 = g_ise_to_unquant[lo] | (g_ise_to_unquant[lo] << 8);
  4102. uint32_t c1 = g_ise_to_unquant[hi] | (g_ise_to_unquant[hi] << 8);
  4103. colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
  4104. }
  4105. uint64_t total_err = 0;
  4106. for (uint32_t s = low_selector; s <= high_selector; s++)
  4107. {
  4108. int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
  4109. int err_scale = 1;
  4110. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  4111. // the low/high selectors which are clamping to either 0 or 255.
  4112. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  4113. err_scale = 8;
  4114. total_err += (err * err) * err_scale;
  4115. }
  4116. if (total_err < best_err)
  4117. {
  4118. best_err = total_err;
  4119. best_lo = lo;
  4120. best_hi = hi;
  4121. }
  4122. }
  4123. }
  4124. mapping_best_low[m] = best_lo;
  4125. mapping_best_high[m] = best_hi;
  4126. mapping_best_err[m] = best_err;
  4127. highest_best_err = basisu::maximum(highest_best_err, best_err);
  4128. } // m
  4129. for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
  4130. {
  4131. uint64_t err = mapping_best_err[m];
  4132. err = basisu::minimum<uint64_t>(err, 0xFFFF);
  4133. fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
  4134. n++;
  4135. if ((n & 31) == 31)
  4136. fprintf(pFile, "\n");
  4137. } // m
  4138. } // sr
  4139. } // g
  4140. } // inten
  4141. fclose(pFile);
  4142. }
  4143. static void create_etc1_to_astc_conversion_table_0_255()
  4144. {
  4145. FILE* pFile = nullptr;
  4146. fopen_s(&pFile, "basisu_transcoder_tables_astc_0_255.inc", "w");
  4147. uint32_t n = 0;
  4148. for (int inten = 0; inten < 8; inten++)
  4149. {
  4150. for (uint32_t g = 0; g < 32; g++)
  4151. {
  4152. color32 block_colors[4];
  4153. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  4154. for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
  4155. {
  4156. const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
  4157. const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
  4158. uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4159. uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4160. uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4161. uint64_t highest_best_err = 0;
  4162. for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
  4163. {
  4164. uint32_t best_lo = 0;
  4165. uint32_t best_hi = 0;
  4166. uint64_t best_err = UINT64_MAX;
  4167. for (uint32_t hi = 0; hi <= 255; hi++)
  4168. {
  4169. for (uint32_t lo = 0; lo <= 255; lo++)
  4170. {
  4171. uint32_t colors[4];
  4172. for (uint32_t s = 0; s < 4; s++)
  4173. {
  4174. uint32_t s_scaled = s | (s << 2) | (s << 4);
  4175. if (s_scaled > 32)
  4176. s_scaled++;
  4177. uint32_t c0 = lo | (lo << 8);
  4178. uint32_t c1 = hi | (hi << 8);
  4179. colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
  4180. }
  4181. uint64_t total_err = 0;
  4182. for (uint32_t s = low_selector; s <= high_selector; s++)
  4183. {
  4184. int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
  4185. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  4186. // the low/high selectors which are clamping to either 0 or 255.
  4187. int err_scale = 1;
  4188. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  4189. err_scale = 8;
  4190. total_err += (err * err) * err_scale;
  4191. }
  4192. if (total_err < best_err)
  4193. {
  4194. best_err = total_err;
  4195. best_lo = lo;
  4196. best_hi = hi;
  4197. }
  4198. }
  4199. }
  4200. mapping_best_low[m] = best_lo;
  4201. mapping_best_high[m] = best_hi;
  4202. mapping_best_err[m] = best_err;
  4203. highest_best_err = basisu::maximum(highest_best_err, best_err);
  4204. } // m
  4205. for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
  4206. {
  4207. uint64_t err = mapping_best_err[m];
  4208. err = basisu::minimum<uint64_t>(err, 0xFFFF);
  4209. fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
  4210. n++;
  4211. if ((n & 31) == 31)
  4212. fprintf(pFile, "\n");
  4213. } // m
  4214. } // sr
  4215. } // g
  4216. } // inten
  4217. fclose(pFile);
  4218. }
  4219. #endif
  4220. #endif
  4221. #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
  4222. // Table encodes 5 trits to 8 output bits. 3^5 entries.
  4223. // Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
  4224. static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
  4225. 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
  4226. 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
  4227. 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
  4228. 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
  4229. 191, 223, 124, 125, 126 };
  4230. // Extracts bits [low,high]
  4231. static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
  4232. {
  4233. return (bits >> low) & ((1 << (high - low + 1)) - 1);
  4234. }
  4235. // Writes bits to output in an endian safe way
  4236. static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits)
  4237. {
  4238. uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
  4239. while (total_bits)
  4240. {
  4241. const uint32_t bits_to_write = basisu::minimum<int>(total_bits, 8 - (bit_pos & 7));
  4242. pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
  4243. bit_pos += bits_to_write;
  4244. total_bits -= bits_to_write;
  4245. value >>= bits_to_write;
  4246. }
  4247. }
  4248. // Encodes 5 values to output, usable for any range that uses trits and bits
  4249. static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
  4250. {
  4251. // First extract the trits and the bits from the 5 input values
  4252. int trits = 0, bits[5];
  4253. const uint32_t bit_mask = (1 << n) - 1;
  4254. for (int i = 0; i < 5; i++)
  4255. {
  4256. static const int s_muls[5] = { 1, 3, 9, 27, 81 };
  4257. const int t = pValues[i] >> n;
  4258. trits += t * s_muls[i];
  4259. bits[i] = pValues[i] & bit_mask;
  4260. }
  4261. // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
  4262. // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
  4263. assert(trits < 243);
  4264. const int T = g_astc_trit_encode[trits];
  4265. // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
  4266. astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
  4267. astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
  4268. (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
  4269. }
  4270. #endif // #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
  4271. #if BASISD_SUPPORT_ASTC
  4272. struct astc_block_params
  4273. {
  4274. // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00)
  4275. uint8_t m_endpoints[10];
  4276. uint8_t m_weights[32];
  4277. };
  4278. // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2).
  4279. // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity.
  4280. // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color.
  4281. // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47.
  4282. // Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec:
  4283. // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
  4284. // 32 total weights, stored as 16 CA CA, each ranging from 0-3.
  4285. static void astc_pack_block_cem_12_weight_range2(uint32_t *pOutput, const astc_block_params* pBlock)
  4286. {
  4287. uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
  4288. // Write constant block mode, color component selector, number of partitions, color endpoint mode
  4289. // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
  4290. pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
  4291. pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
  4292. pOutput[2] = 0;
  4293. pOutput[3] = 0;
  4294. // Pack 8 endpoints (each ranging between [0,47]) using BISE starting at bit 17
  4295. int bit_pos = 17;
  4296. astc_encode_trits(pOutput, pBlock->m_endpoints, bit_pos, 4);
  4297. astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4);
  4298. // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
  4299. for (uint32_t i = 0; i < 32; i++)
  4300. {
  4301. static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
  4302. const uint32_t ofs = 126 - (i * 2);
  4303. pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
  4304. }
  4305. }
  4306. // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights
  4307. // This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient.
  4308. static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock)
  4309. {
  4310. uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
  4311. // Write constant block mode, color component selector, number of partitions, color endpoint mode
  4312. // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
  4313. pBytes[0] = 0x41; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
  4314. pOutput[1] = 0;
  4315. pBytes[8] = 0x00; pBytes[9] = 0x00; pBytes[10] = 0x00; pBytes[11] = 0xc0;
  4316. pOutput[3] = 0;
  4317. // Pack 8 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
  4318. int bit_pos = 17;
  4319. for (uint32_t i = 0; i < 8; i++)
  4320. astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
  4321. // Pack 32 1-bit weights, which are stored from the top down into the block in opposite bit order.
  4322. for (uint32_t i = 0; i < 32; i++)
  4323. {
  4324. const uint32_t ofs = 127 - i;
  4325. pBytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
  4326. }
  4327. }
  4328. #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
  4329. // Optional 8-bit endpoint packing functions.
  4330. // CEM mode 4 (LDR Luminance+Alpha Direct), 8-bit endpoints, 2 bit weights
  4331. static void astc_pack_block_cem_4_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
  4332. {
  4333. uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
  4334. // Write constant block mode, color component selector, number of partitions, color endpoint mode
  4335. // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
  4336. pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00;
  4337. pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
  4338. pOutput[2] = 0;
  4339. pOutput[3] = 0;
  4340. // Pack 4 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
  4341. int bit_pos = 17;
  4342. for (uint32_t i = 0; i < 4; i++)
  4343. astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
  4344. // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
  4345. for (uint32_t i = 0; i < 32; i++)
  4346. {
  4347. static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
  4348. const uint32_t ofs = 126 - (i * 2);
  4349. pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
  4350. }
  4351. }
  4352. // CEM mode 8 (LDR RGB Direct), 8-bit endpoints, 2 bit weights
  4353. static void astc_pack_block_cem_8_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
  4354. {
  4355. uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
  4356. // Write constant block mode, color component selector, number of partitions, color endpoint mode
  4357. // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
  4358. pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00;
  4359. pOutput[1] = 0;
  4360. pOutput[2] = 0;
  4361. pOutput[3] = 0;
  4362. // Pack 6 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
  4363. int bit_pos = 17;
  4364. for (uint32_t i = 0; i < 6; i++)
  4365. astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
  4366. // Pack 16 2-bit weights, which are stored from the top down into the block in opposite bit order.
  4367. for (uint32_t i = 0; i < 16; i++)
  4368. {
  4369. static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
  4370. const uint32_t ofs = 126 - (i * 2);
  4371. pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
  4372. }
  4373. }
  4374. #endif
  4375. // Optimal quantized [0,47] entry to use given [0,255] input
  4376. static uint8_t g_astc_single_color_encoding_0[256];
  4377. // Optimal quantized [0,47] low/high values given [0,255] input assuming a selector of 1
  4378. static struct
  4379. {
  4380. uint8_t m_lo, m_hi;
  4381. } g_astc_single_color_encoding_1[256];
  4382. static void transcoder_init_astc()
  4383. {
  4384. for (uint32_t base_color = 0; base_color < 32; base_color++)
  4385. {
  4386. for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
  4387. {
  4388. for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
  4389. {
  4390. const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4391. uint32_t best_mapping = 0;
  4392. uint32_t best_err = UINT32_MAX;
  4393. for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
  4394. {
  4395. if (pTable_g[mapping_index].m_err < best_err)
  4396. {
  4397. best_err = pTable_g[mapping_index].m_err;
  4398. best_mapping = mapping_index;
  4399. }
  4400. }
  4401. g_etc1_to_astc_best_grayscale_mapping[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
  4402. }
  4403. }
  4404. }
  4405. #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
  4406. for (uint32_t base_color = 0; base_color < 32; base_color++)
  4407. {
  4408. for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
  4409. {
  4410. for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
  4411. {
  4412. const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4413. uint32_t best_mapping = 0;
  4414. uint32_t best_err = UINT32_MAX;
  4415. for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
  4416. {
  4417. if (pTable_g[mapping_index].m_err < best_err)
  4418. {
  4419. best_err = pTable_g[mapping_index].m_err;
  4420. best_mapping = mapping_index;
  4421. }
  4422. }
  4423. g_etc1_to_astc_best_grayscale_mapping_0_255[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
  4424. }
  4425. }
  4426. }
  4427. #endif
  4428. for (uint32_t i = 0; i < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; i++)
  4429. {
  4430. uint32_t l = g_etc1_to_astc_selector_ranges[i].m_low;
  4431. uint32_t h = g_etc1_to_astc_selector_ranges[i].m_high;
  4432. g_etc1_to_astc_selector_range_index[l][h] = i;
  4433. }
  4434. // Endpoint dequantization, see:
  4435. // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
  4436. for (uint32_t trit = 0; trit < 3; trit++)
  4437. {
  4438. for (uint32_t bit = 0; bit < 16; bit++)
  4439. {
  4440. const uint32_t A = (bit & 1) ? 511 : 0;
  4441. const uint32_t B = (bit >> 1) | ((bit >> 1) << 6);
  4442. const uint32_t C = 22;
  4443. const uint32_t D = trit;
  4444. uint32_t unq = D * C + B;
  4445. unq = unq ^ A;
  4446. unq = (A & 0x80) | (unq >> 2);
  4447. g_ise_to_unquant[bit | (trit << 4)] = unq;
  4448. }
  4449. }
  4450. // Compute table used for optimal single color encoding.
  4451. for (int i = 0; i < 256; i++)
  4452. {
  4453. int lowest_e = INT_MAX;
  4454. for (int lo = 0; lo < 48; lo++)
  4455. {
  4456. for (int hi = 0; hi < 48; hi++)
  4457. {
  4458. const int lo_v = g_ise_to_unquant[lo];
  4459. const int hi_v = g_ise_to_unquant[hi];
  4460. int l = lo_v | (lo_v << 8);
  4461. int h = hi_v | (hi_v << 8);
  4462. int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8;
  4463. int e = abs(v - i);
  4464. if (e < lowest_e)
  4465. {
  4466. g_astc_single_color_encoding_1[i].m_hi = static_cast<uint8_t>(hi);
  4467. g_astc_single_color_encoding_1[i].m_lo = static_cast<uint8_t>(lo);
  4468. lowest_e = e;
  4469. }
  4470. } // hi
  4471. } // lo
  4472. }
  4473. for (int i = 0; i < 256; i++)
  4474. {
  4475. int lowest_e = INT_MAX;
  4476. for (int lo = 0; lo < 48; lo++)
  4477. {
  4478. const int lo_v = g_ise_to_unquant[lo];
  4479. int e = abs(lo_v - i);
  4480. if (e < lowest_e)
  4481. {
  4482. g_astc_single_color_encoding_0[i] = static_cast<uint8_t>(lo);
  4483. lowest_e = e;
  4484. }
  4485. } // lo
  4486. }
  4487. }
  4488. // Converts opaque or color+alpha ETC1S block to ASTC 4x4.
  4489. // This function tries to use the best ASTC mode given the block's actual contents.
  4490. static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector,
  4491. bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook)
  4492. {
  4493. astc_block_params blk;
  4494. blk.m_endpoints[8] = 0;
  4495. blk.m_endpoints[9] = 0;
  4496. int constant_alpha_val = 255;
  4497. int num_unique_alpha_selectors = 1;
  4498. if (transcode_alpha)
  4499. {
  4500. const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
  4501. num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
  4502. if (num_unique_alpha_selectors == 1)
  4503. {
  4504. const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
  4505. const color32& alpha_base_color = alpha_endpoint.m_color5;
  4506. const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
  4507. int alpha_block_colors[4];
  4508. decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
  4509. constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
  4510. }
  4511. }
  4512. const color32& base_color = pEndpoints->m_color5;
  4513. const uint32_t inten_table = pEndpoints->m_inten5;
  4514. const uint32_t low_selector = pSelector->m_lo_selector;
  4515. const uint32_t high_selector = pSelector->m_hi_selector;
  4516. // Handle solid color or BTC blocks, which can always be encoded from ETC1S to ASTC losslessly.
  4517. if ((pSelector->m_num_unique_selectors == 1) && (num_unique_alpha_selectors == 1))
  4518. {
  4519. // Both color and alpha are constant, write a solid color block and exit.
  4520. // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks
  4521. uint32_t r, g, b;
  4522. decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
  4523. uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
  4524. uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
  4525. pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
  4526. pOutput[1] = 0xffffffff;
  4527. pOutput[2] = 0;
  4528. pOutput[3] = 0;
  4529. int bit_pos = 64;
  4530. astc_set_bits(pOutput, bit_pos, r | (r << 8), 16);
  4531. astc_set_bits(pOutput, bit_pos, g | (g << 8), 16);
  4532. astc_set_bits(pOutput, bit_pos, b | (b << 8), 16);
  4533. astc_set_bits(pOutput, bit_pos, constant_alpha_val | (constant_alpha_val << 8), 16);
  4534. return;
  4535. }
  4536. else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2))
  4537. {
  4538. // Both color and alpha use <= 2 unique selectors each.
  4539. // Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights).
  4540. color32 block_colors[4];
  4541. decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
  4542. blk.m_endpoints[0] = block_colors[low_selector].r;
  4543. blk.m_endpoints[2] = block_colors[low_selector].g;
  4544. blk.m_endpoints[4] = block_colors[low_selector].b;
  4545. blk.m_endpoints[1] = block_colors[high_selector].r;
  4546. blk.m_endpoints[3] = block_colors[high_selector].g;
  4547. blk.m_endpoints[5] = block_colors[high_selector].b;
  4548. int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
  4549. int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
  4550. bool invert = false;
  4551. if (s1 < s0)
  4552. {
  4553. std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
  4554. std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
  4555. std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
  4556. invert = true;
  4557. }
  4558. if (transcode_alpha)
  4559. {
  4560. const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
  4561. const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
  4562. const color32& alpha_base_color = alpha_endpoint.m_color5;
  4563. const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
  4564. const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
  4565. const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
  4566. int alpha_block_colors[4];
  4567. decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
  4568. blk.m_endpoints[6] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
  4569. blk.m_endpoints[7] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
  4570. for (uint32_t y = 0; y < 4; y++)
  4571. {
  4572. for (uint32_t x = 0; x < 4; x++)
  4573. {
  4574. uint32_t s = alpha_selectors.get_selector(x, y);
  4575. s = (s == alpha_high_selector) ? 1 : 0;
  4576. blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(s);
  4577. } // x
  4578. } // y
  4579. }
  4580. else
  4581. {
  4582. blk.m_endpoints[6] = 255;
  4583. blk.m_endpoints[7] = 255;
  4584. for (uint32_t i = 0; i < 16; i++)
  4585. blk.m_weights[i * 2 + 1] = 0;
  4586. }
  4587. for (uint32_t y = 0; y < 4; y++)
  4588. {
  4589. for (uint32_t x = 0; x < 4; x++)
  4590. {
  4591. uint32_t s = pSelector->get_selector(x, y);
  4592. s = (s == high_selector) ? 1 : 0;
  4593. if (invert)
  4594. s = 1 - s;
  4595. blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(s);
  4596. } // x
  4597. } // y
  4598. astc_pack_block_cem_12_weight_range0(reinterpret_cast<uint32_t*>(pDst_block), &blk);
  4599. return;
  4600. }
  4601. // Either alpha and/or color use > 2 unique selectors each, so we must do something more complex.
  4602. #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
  4603. // The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints.
  4604. // If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha.
  4605. if ((base_color.r == base_color.g) && (base_color.r == base_color.b))
  4606. {
  4607. if (transcode_alpha)
  4608. {
  4609. const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
  4610. const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
  4611. const color32& alpha_base_color = alpha_endpoint.m_color5;
  4612. const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
  4613. const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
  4614. const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
  4615. if (num_unique_alpha_selectors <= 2)
  4616. {
  4617. // Simple alpha block with only 1 or 2 unique values, so use BTC. This is lossless.
  4618. int alpha_block_colors[4];
  4619. decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
  4620. blk.m_endpoints[2] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
  4621. blk.m_endpoints[3] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
  4622. for (uint32_t i = 0; i < 16; i++)
  4623. {
  4624. uint32_t s = alpha_selectors.get_selector(i & 3, i >> 2);
  4625. blk.m_weights[i * 2 + 1] = (s == alpha_high_selector) ? 3 : 0;
  4626. }
  4627. }
  4628. else
  4629. {
  4630. // Convert ETC1S alpha
  4631. const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
  4632. //[32][8][RANGES][MAPPING]
  4633. const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4634. const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
  4635. blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
  4636. blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
  4637. const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
  4638. for (uint32_t y = 0; y < 4; y++)
  4639. {
  4640. for (uint32_t x = 0; x < 4; x++)
  4641. {
  4642. uint32_t s = alpha_selectors.get_selector(x, y);
  4643. uint32_t as = pSelectors_xlat[s];
  4644. blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
  4645. } // x
  4646. } // y
  4647. }
  4648. }
  4649. else
  4650. {
  4651. // No alpha slice - set output alpha to all 255's
  4652. blk.m_endpoints[2] = 255;
  4653. blk.m_endpoints[3] = 255;
  4654. for (uint32_t i = 0; i < 16; i++)
  4655. blk.m_weights[i * 2 + 1] = 0;
  4656. }
  4657. if (pSelector->m_num_unique_selectors <= 2)
  4658. {
  4659. // Simple color block with only 1 or 2 unique values, so use BTC. This is lossless.
  4660. int block_colors[4];
  4661. decoder_etc_block::get_block_colors5_g(block_colors, base_color, inten_table);
  4662. blk.m_endpoints[0] = static_cast<uint8_t>(block_colors[low_selector]);
  4663. blk.m_endpoints[1] = static_cast<uint8_t>(block_colors[high_selector]);
  4664. for (uint32_t i = 0; i < 16; i++)
  4665. {
  4666. uint32_t s = pSelector->get_selector(i & 3, i >> 2);
  4667. blk.m_weights[i * 2] = (s == high_selector) ? 3 : 0;
  4668. }
  4669. }
  4670. else
  4671. {
  4672. // Convert ETC1S alpha
  4673. const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
  4674. //[32][8][RANGES][MAPPING]
  4675. const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4676. const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table];
  4677. blk.m_endpoints[0] = pTable_g[best_mapping].m_lo;
  4678. blk.m_endpoints[1] = pTable_g[best_mapping].m_hi;
  4679. const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
  4680. for (uint32_t y = 0; y < 4; y++)
  4681. {
  4682. for (uint32_t x = 0; x < 4; x++)
  4683. {
  4684. uint32_t s = pSelector->get_selector(x, y);
  4685. uint32_t as = pSelectors_xlat[s];
  4686. blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
  4687. } // x
  4688. } // y
  4689. }
  4690. astc_pack_block_cem_4_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
  4691. return;
  4692. }
  4693. // The block isn't grayscale and it uses > 2 unique selectors for opaque and/or alpha.
  4694. // Check for fully opaque blocks, if so use 8-bit endpoints for slightly higher opaque quality (higher than BC1, but lower than BC7 mode 6 opaque).
  4695. if ((num_unique_alpha_selectors == 1) && (constant_alpha_val == 255))
  4696. {
  4697. // Convert ETC1S color
  4698. const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
  4699. //[32][8][RANGES][MAPPING]
  4700. const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4701. const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4702. const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4703. uint32_t best_err = UINT_MAX;
  4704. uint32_t best_mapping = 0;
  4705. assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
  4706. #define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
  4707. DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
  4708. DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
  4709. #undef DO_ITER
  4710. blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
  4711. blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
  4712. blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
  4713. blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
  4714. blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
  4715. blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
  4716. int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
  4717. int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
  4718. bool invert = false;
  4719. if (s1 < s0)
  4720. {
  4721. std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
  4722. std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
  4723. std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
  4724. invert = true;
  4725. }
  4726. const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
  4727. for (uint32_t y = 0; y < 4; y++)
  4728. {
  4729. for (uint32_t x = 0; x < 4; x++)
  4730. {
  4731. uint32_t s = pSelector->get_selector(x, y);
  4732. uint32_t as = pSelectors_xlat[s];
  4733. if (invert)
  4734. as = 3 - as;
  4735. blk.m_weights[x + y * 4] = static_cast<uint8_t>(as);
  4736. } // x
  4737. } // y
  4738. // Now pack to ASTC
  4739. astc_pack_block_cem_8_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
  4740. return;
  4741. }
  4742. #endif //#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
  4743. // Nothing else worked, so fall back to CEM Mode 12 (LDR RGBA Direct), [0,47] endpoints, weight range 2 (2-bit weights), dual planes.
  4744. // This mode can handle everything, but at slightly less quality than BC1.
  4745. if (transcode_alpha)
  4746. {
  4747. const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
  4748. const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
  4749. const color32& alpha_base_color = alpha_endpoint.m_color5;
  4750. const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
  4751. const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
  4752. const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
  4753. if (alpha_low_selector == alpha_high_selector)
  4754. {
  4755. // Solid alpha block - use precomputed tables.
  4756. int alpha_block_colors[4];
  4757. decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
  4758. const uint32_t g = alpha_block_colors[alpha_low_selector];
  4759. blk.m_endpoints[6] = g_astc_single_color_encoding_1[g].m_lo;
  4760. blk.m_endpoints[7] = g_astc_single_color_encoding_1[g].m_hi;
  4761. for (uint32_t i = 0; i < 16; i++)
  4762. blk.m_weights[i * 2 + 1] = 1;
  4763. }
  4764. else if ((alpha_inten_table >= 7) && (alpha_selectors.m_num_unique_selectors == 2) && (alpha_low_selector == 0) && (alpha_high_selector == 3))
  4765. {
  4766. // Handle outlier case where only the two outer colors are used with inten table 7.
  4767. color32 alpha_block_colors[4];
  4768. decoder_etc_block::get_block_colors5(alpha_block_colors, alpha_base_color, alpha_inten_table);
  4769. const uint32_t g0 = alpha_block_colors[0].g;
  4770. const uint32_t g1 = alpha_block_colors[3].g;
  4771. blk.m_endpoints[6] = g_astc_single_color_encoding_0[g0];
  4772. blk.m_endpoints[7] = g_astc_single_color_encoding_0[g1];
  4773. for (uint32_t y = 0; y < 4; y++)
  4774. {
  4775. for (uint32_t x = 0; x < 4; x++)
  4776. {
  4777. uint32_t s = alpha_selectors.get_selector(x, y);
  4778. uint32_t as = (s == alpha_high_selector) ? 3 : 0;
  4779. blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
  4780. } // x
  4781. } // y
  4782. }
  4783. else
  4784. {
  4785. // Convert ETC1S alpha
  4786. const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
  4787. //[32][8][RANGES][MAPPING]
  4788. const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4789. const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
  4790. blk.m_endpoints[6] = pTable_g[best_mapping].m_lo;
  4791. blk.m_endpoints[7] = pTable_g[best_mapping].m_hi;
  4792. const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
  4793. for (uint32_t y = 0; y < 4; y++)
  4794. {
  4795. for (uint32_t x = 0; x < 4; x++)
  4796. {
  4797. uint32_t s = alpha_selectors.get_selector(x, y);
  4798. uint32_t as = pSelectors_xlat[s];
  4799. blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
  4800. } // x
  4801. } // y
  4802. }
  4803. }
  4804. else
  4805. {
  4806. // No alpha slice - set output alpha to all 255's
  4807. // 1 is 255 when dequantized
  4808. blk.m_endpoints[6] = 1;
  4809. blk.m_endpoints[7] = 1;
  4810. for (uint32_t i = 0; i < 16; i++)
  4811. blk.m_weights[i * 2 + 1] = 0;
  4812. }
  4813. if (low_selector == high_selector)
  4814. {
  4815. // Solid color block - use precomputed tables of optimal endpoints assuming selector weights are all 1.
  4816. color32 block_colors[4];
  4817. decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
  4818. const uint32_t r = block_colors[low_selector].r;
  4819. const uint32_t g = block_colors[low_selector].g;
  4820. const uint32_t b = block_colors[low_selector].b;
  4821. blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo;
  4822. blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi;
  4823. blk.m_endpoints[2] = g_astc_single_color_encoding_1[g].m_lo;
  4824. blk.m_endpoints[3] = g_astc_single_color_encoding_1[g].m_hi;
  4825. blk.m_endpoints[4] = g_astc_single_color_encoding_1[b].m_lo;
  4826. blk.m_endpoints[5] = g_astc_single_color_encoding_1[b].m_hi;
  4827. int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
  4828. int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
  4829. bool invert = false;
  4830. if (s1 < s0)
  4831. {
  4832. std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
  4833. std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
  4834. std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
  4835. invert = true;
  4836. }
  4837. for (uint32_t i = 0; i < 16; i++)
  4838. blk.m_weights[i * 2] = invert ? 2 : 1;
  4839. }
  4840. else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
  4841. {
  4842. // Handle outlier case where only the two outer colors are used with inten table 7.
  4843. color32 block_colors[4];
  4844. decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
  4845. const uint32_t r0 = block_colors[0].r;
  4846. const uint32_t g0 = block_colors[0].g;
  4847. const uint32_t b0 = block_colors[0].b;
  4848. const uint32_t r1 = block_colors[3].r;
  4849. const uint32_t g1 = block_colors[3].g;
  4850. const uint32_t b1 = block_colors[3].b;
  4851. blk.m_endpoints[0] = g_astc_single_color_encoding_0[r0];
  4852. blk.m_endpoints[1] = g_astc_single_color_encoding_0[r1];
  4853. blk.m_endpoints[2] = g_astc_single_color_encoding_0[g0];
  4854. blk.m_endpoints[3] = g_astc_single_color_encoding_0[g1];
  4855. blk.m_endpoints[4] = g_astc_single_color_encoding_0[b0];
  4856. blk.m_endpoints[5] = g_astc_single_color_encoding_0[b1];
  4857. int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
  4858. int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
  4859. bool invert = false;
  4860. if (s1 < s0)
  4861. {
  4862. std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
  4863. std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
  4864. std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
  4865. invert = true;
  4866. }
  4867. for (uint32_t y = 0; y < 4; y++)
  4868. {
  4869. for (uint32_t x = 0; x < 4; x++)
  4870. {
  4871. uint32_t s = pSelector->get_selector(x, y);
  4872. uint32_t as = (s == low_selector) ? 0 : 3;
  4873. if (invert)
  4874. as = 3 - as;
  4875. blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
  4876. } // x
  4877. } // y
  4878. }
  4879. else
  4880. {
  4881. // Convert ETC1S color
  4882. const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
  4883. //[32][8][RANGES][MAPPING]
  4884. const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4885. const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4886. const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
  4887. uint32_t best_err = UINT_MAX;
  4888. uint32_t best_mapping = 0;
  4889. assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
  4890. #define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
  4891. DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
  4892. DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
  4893. #undef DO_ITER
  4894. blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
  4895. blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
  4896. blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
  4897. blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
  4898. blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
  4899. blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
  4900. int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
  4901. int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
  4902. bool invert = false;
  4903. if (s1 < s0)
  4904. {
  4905. std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
  4906. std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
  4907. std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
  4908. invert = true;
  4909. }
  4910. const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
  4911. for (uint32_t y = 0; y < 4; y++)
  4912. {
  4913. for (uint32_t x = 0; x < 4; x++)
  4914. {
  4915. uint32_t s = pSelector->get_selector(x, y);
  4916. uint32_t as = pSelectors_xlat[s];
  4917. if (invert)
  4918. as = 3 - as;
  4919. blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
  4920. } // x
  4921. } // y
  4922. }
  4923. // Now pack to ASTC
  4924. astc_pack_block_cem_12_weight_range2(reinterpret_cast<uint32_t *>(pDst_block), &blk);
  4925. }
  4926. #endif
  4927. #if BASISD_SUPPORT_ATC
  4928. // ATC and PVRTC2 both use these tables.
  4929. struct etc1s_to_atc_solution
  4930. {
  4931. uint8_t m_lo;
  4932. uint8_t m_hi;
  4933. uint16_t m_err;
  4934. };
  4935. static dxt_selector_range g_etc1s_to_atc_selector_ranges[] =
  4936. {
  4937. { 0, 3 },
  4938. { 1, 3 },
  4939. { 0, 2 },
  4940. { 1, 2 },
  4941. { 2, 3 },
  4942. { 0, 1 },
  4943. };
  4944. const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_RANGES = sizeof(g_etc1s_to_atc_selector_ranges) / sizeof(g_etc1s_to_atc_selector_ranges[0]);
  4945. static uint32_t g_etc1s_to_atc_selector_range_index[4][4];
  4946. const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS = 10;
  4947. static const uint8_t g_etc1s_to_atc_selector_mappings[NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS][4] =
  4948. {
  4949. { 0, 0, 1, 1 },
  4950. { 0, 0, 1, 2 },
  4951. { 0, 0, 1, 3 },
  4952. { 0, 0, 2, 3 },
  4953. { 0, 1, 1, 1 },
  4954. { 0, 1, 2, 2 },
  4955. { 0, 1, 2, 3 }, //6 - identity
  4956. { 0, 2, 3, 3 },
  4957. { 1, 2, 2, 2 },
  4958. { 1, 2, 3, 3 },
  4959. };
  4960. const uint32_t ATC_IDENTITY_SELECTOR_MAPPING_INDEX = 6;
  4961. #if BASISD_SUPPORT_PVRTC2
  4962. static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
  4963. #include "basisu_transcoder_tables_pvrtc2_45.inc"
  4964. };
  4965. #if 0
  4966. static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
  4967. #include "basisu_transcoder_tables_pvrtc2_alpha_33.inc"
  4968. };
  4969. #endif
  4970. #endif
  4971. static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
  4972. #include "basisu_transcoder_tables_atc_55.inc"
  4973. };
  4974. static const etc1s_to_atc_solution g_etc1s_to_atc_56[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
  4975. #include "basisu_transcoder_tables_atc_56.inc"
  4976. };
  4977. struct atc_match_entry
  4978. {
  4979. uint8_t m_lo;
  4980. uint8_t m_hi;
  4981. };
  4982. static atc_match_entry g_pvrtc2_match45_equals_1[256], g_atc_match55_equals_1[256], g_atc_match56_equals_1[256]; // selector 1
  4983. static atc_match_entry g_pvrtc2_match4[256], g_atc_match5[256], g_atc_match6[256];
  4984. static void prepare_atc_single_color_table(atc_match_entry* pTable, int size0, int size1, int sel)
  4985. {
  4986. for (int i = 0; i < 256; i++)
  4987. {
  4988. int lowest_e = 256;
  4989. for (int lo = 0; lo < size0; lo++)
  4990. {
  4991. int lo_e = lo;
  4992. if (size0 == 16)
  4993. {
  4994. lo_e = (lo_e << 1) | (lo_e >> 3);
  4995. lo_e = (lo_e << 3) | (lo_e >> 2);
  4996. }
  4997. else if (size0 == 32)
  4998. lo_e = (lo_e << 3) | (lo_e >> 2);
  4999. else
  5000. lo_e = (lo_e << 2) | (lo_e >> 4);
  5001. for (int hi = 0; hi < size1; hi++)
  5002. {
  5003. int hi_e = hi;
  5004. if (size1 == 16)
  5005. {
  5006. // This is only for PVRTC2 - expand to 5 then 8
  5007. hi_e = (hi_e << 1) | (hi_e >> 3);
  5008. hi_e = (hi_e << 3) | (hi_e >> 2);
  5009. }
  5010. else if (size1 == 32)
  5011. hi_e = (hi_e << 3) | (hi_e >> 2);
  5012. else
  5013. hi_e = (hi_e << 2) | (hi_e >> 4);
  5014. int e;
  5015. if (sel == 1)
  5016. {
  5017. // Selector 1
  5018. e = abs(((lo_e * 5 + hi_e * 3) / 8) - i);
  5019. }
  5020. else
  5021. {
  5022. assert(sel == 3);
  5023. // Selector 3
  5024. e = abs(hi_e - i);
  5025. }
  5026. if (e < lowest_e)
  5027. {
  5028. pTable[i].m_lo = static_cast<uint8_t>(lo);
  5029. pTable[i].m_hi = static_cast<uint8_t>(hi);
  5030. lowest_e = e;
  5031. }
  5032. } // hi
  5033. } // lo
  5034. } // i
  5035. }
  5036. static void transcoder_init_atc()
  5037. {
  5038. prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1);
  5039. prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1);
  5040. prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1);
  5041. prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3);
  5042. prepare_atc_single_color_table(g_atc_match5, 1, 32, 3);
  5043. prepare_atc_single_color_table(g_atc_match6, 1, 64, 3);
  5044. for (uint32_t i = 0; i < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; i++)
  5045. {
  5046. uint32_t l = g_etc1s_to_atc_selector_ranges[i].m_low;
  5047. uint32_t h = g_etc1s_to_atc_selector_ranges[i].m_high;
  5048. g_etc1s_to_atc_selector_range_index[l][h] = i;
  5049. }
  5050. }
  5051. struct atc_block
  5052. {
  5053. uint8_t m_lo[2];
  5054. uint8_t m_hi[2];
  5055. uint8_t m_sels[4];
  5056. void set_low_color(uint32_t r, uint32_t g, uint32_t b)
  5057. {
  5058. assert((r < 32) && (g < 32) && (b < 32));
  5059. uint32_t x = (r << 10) | (g << 5) | b;
  5060. m_lo[0] = x & 0xFF;
  5061. m_lo[1] = (x >> 8) & 0xFF;
  5062. }
  5063. void set_high_color(uint32_t r, uint32_t g, uint32_t b)
  5064. {
  5065. assert((r < 32) && (g < 64) && (b < 32));
  5066. uint32_t x = (r << 11) | (g << 5) | b;
  5067. m_hi[0] = x & 0xFF;
  5068. m_hi[1] = (x >> 8) & 0xFF;
  5069. }
  5070. };
  5071. static void convert_etc1s_to_atc(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
  5072. {
  5073. atc_block* pBlock = static_cast<atc_block*>(pDst);
  5074. const uint32_t low_selector = pSelector->m_lo_selector;
  5075. const uint32_t high_selector = pSelector->m_hi_selector;
  5076. const color32& base_color = pEndpoints->m_color5;
  5077. const uint32_t inten_table = pEndpoints->m_inten5;
  5078. if (low_selector == high_selector)
  5079. {
  5080. uint32_t r, g, b;
  5081. decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
  5082. pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo);
  5083. pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi);
  5084. pBlock->m_sels[0] = 0x55;
  5085. pBlock->m_sels[1] = 0x55;
  5086. pBlock->m_sels[2] = 0x55;
  5087. pBlock->m_sels[3] = 0x55;
  5088. return;
  5089. }
  5090. else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
  5091. {
  5092. color32 block_colors[4];
  5093. decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
  5094. const uint32_t r0 = block_colors[0].r;
  5095. const uint32_t g0 = block_colors[0].g;
  5096. const uint32_t b0 = block_colors[0].b;
  5097. const uint32_t r1 = block_colors[3].r;
  5098. const uint32_t g1 = block_colors[3].g;
  5099. const uint32_t b1 = block_colors[3].b;
  5100. pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_atc_match5[b0].m_hi);
  5101. pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match6[g1].m_hi, g_atc_match5[b1].m_hi);
  5102. pBlock->m_sels[0] = pSelector->m_selectors[0];
  5103. pBlock->m_sels[1] = pSelector->m_selectors[1];
  5104. pBlock->m_sels[2] = pSelector->m_selectors[2];
  5105. pBlock->m_sels[3] = pSelector->m_selectors[3];
  5106. return;
  5107. }
  5108. const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
  5109. //[32][8][RANGES][MAPPING]
  5110. const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
  5111. const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_56[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
  5112. const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
  5113. uint32_t best_err = UINT_MAX;
  5114. uint32_t best_mapping = 0;
  5115. assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
  5116. #define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
  5117. DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
  5118. DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
  5119. #undef DO_ITER
  5120. pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
  5121. pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
  5122. if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
  5123. {
  5124. pBlock->m_sels[0] = pSelector->m_selectors[0];
  5125. pBlock->m_sels[1] = pSelector->m_selectors[1];
  5126. pBlock->m_sels[2] = pSelector->m_selectors[2];
  5127. pBlock->m_sels[3] = pSelector->m_selectors[3];
  5128. }
  5129. else
  5130. {
  5131. const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
  5132. const uint32_t sel_bits0 = pSelector->m_selectors[0];
  5133. const uint32_t sel_bits1 = pSelector->m_selectors[1];
  5134. const uint32_t sel_bits2 = pSelector->m_selectors[2];
  5135. const uint32_t sel_bits3 = pSelector->m_selectors[3];
  5136. uint32_t atc_sels0 = 0, atc_sels1 = 0, atc_sels2 = 0, atc_sels3 = 0;
  5137. #define DO_X(x) { \
  5138. const uint32_t x_shift = (x) * 2; \
  5139. atc_sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
  5140. atc_sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
  5141. atc_sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
  5142. atc_sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
  5143. DO_X(0);
  5144. DO_X(1);
  5145. DO_X(2);
  5146. DO_X(3);
  5147. #undef DO_X
  5148. pBlock->m_sels[0] = (uint8_t)atc_sels0;
  5149. pBlock->m_sels[1] = (uint8_t)atc_sels1;
  5150. pBlock->m_sels[2] = (uint8_t)atc_sels2;
  5151. pBlock->m_sels[3] = (uint8_t)atc_sels3;
  5152. }
  5153. }
  5154. #if BASISD_WRITE_NEW_ATC_TABLES
  5155. static void create_etc1s_to_atc_conversion_tables()
  5156. {
  5157. // ATC 55
  5158. FILE* pFile = nullptr;
  5159. fopen_s(&pFile, "basisu_transcoder_tables_atc_55.inc", "w");
  5160. uint32_t n = 0;
  5161. for (int inten = 0; inten < 8; inten++)
  5162. {
  5163. for (uint32_t g = 0; g < 32; g++)
  5164. {
  5165. color32 block_colors[4];
  5166. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  5167. for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
  5168. {
  5169. const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
  5170. const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
  5171. for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
  5172. {
  5173. uint32_t best_lo = 0;
  5174. uint32_t best_hi = 0;
  5175. uint64_t best_err = UINT64_MAX;
  5176. for (uint32_t hi = 0; hi <= 31; hi++)
  5177. {
  5178. for (uint32_t lo = 0; lo <= 31; lo++)
  5179. {
  5180. uint32_t colors[4];
  5181. colors[0] = (lo << 3) | (lo >> 2);
  5182. colors[3] = (hi << 3) | (hi >> 2);
  5183. colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
  5184. colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
  5185. uint64_t total_err = 0;
  5186. for (uint32_t s = low_selector; s <= high_selector; s++)
  5187. {
  5188. int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
  5189. int err_scale = 1;
  5190. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  5191. // the low/high selectors which are clamping to either 0 or 255.
  5192. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  5193. err_scale = 5;
  5194. total_err += (err * err) * err_scale;
  5195. }
  5196. if (total_err < best_err)
  5197. {
  5198. best_err = total_err;
  5199. best_lo = lo;
  5200. best_hi = hi;
  5201. }
  5202. }
  5203. }
  5204. //assert(best_err <= 0xFFFF);
  5205. best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
  5206. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  5207. n++;
  5208. if ((n & 31) == 31)
  5209. fprintf(pFile, "\n");
  5210. } // m
  5211. } // sr
  5212. } // g
  5213. } // inten
  5214. fclose(pFile);
  5215. pFile = nullptr;
  5216. // ATC 56
  5217. fopen_s(&pFile, "basisu_transcoder_tables_atc_56.inc", "w");
  5218. n = 0;
  5219. for (int inten = 0; inten < 8; inten++)
  5220. {
  5221. for (uint32_t g = 0; g < 32; g++)
  5222. {
  5223. color32 block_colors[4];
  5224. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  5225. for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
  5226. {
  5227. const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
  5228. const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
  5229. for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
  5230. {
  5231. uint32_t best_lo = 0;
  5232. uint32_t best_hi = 0;
  5233. uint64_t best_err = UINT64_MAX;
  5234. for (uint32_t hi = 0; hi <= 63; hi++)
  5235. {
  5236. for (uint32_t lo = 0; lo <= 31; lo++)
  5237. {
  5238. uint32_t colors[4];
  5239. colors[0] = (lo << 3) | (lo >> 2);
  5240. colors[3] = (hi << 2) | (hi >> 4);
  5241. colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
  5242. colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
  5243. uint64_t total_err = 0;
  5244. for (uint32_t s = low_selector; s <= high_selector; s++)
  5245. {
  5246. int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
  5247. int err_scale = 1;
  5248. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  5249. // the low/high selectors which are clamping to either 0 or 255.
  5250. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  5251. err_scale = 5;
  5252. total_err += (err * err) * err_scale;
  5253. }
  5254. if (total_err < best_err)
  5255. {
  5256. best_err = total_err;
  5257. best_lo = lo;
  5258. best_hi = hi;
  5259. }
  5260. }
  5261. }
  5262. //assert(best_err <= 0xFFFF);
  5263. best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
  5264. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  5265. n++;
  5266. if ((n & 31) == 31)
  5267. fprintf(pFile, "\n");
  5268. } // m
  5269. } // sr
  5270. } // g
  5271. } // inten
  5272. fclose(pFile);
  5273. // PVRTC2 45
  5274. fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w");
  5275. n = 0;
  5276. for (int inten = 0; inten < 8; inten++)
  5277. {
  5278. for (uint32_t g = 0; g < 32; g++)
  5279. {
  5280. color32 block_colors[4];
  5281. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  5282. for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
  5283. {
  5284. const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
  5285. const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
  5286. for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
  5287. {
  5288. uint32_t best_lo = 0;
  5289. uint32_t best_hi = 0;
  5290. uint64_t best_err = UINT64_MAX;
  5291. for (uint32_t hi = 0; hi <= 31; hi++)
  5292. {
  5293. for (uint32_t lo = 0; lo <= 15; lo++)
  5294. {
  5295. uint32_t colors[4];
  5296. colors[0] = (lo << 1) | (lo >> 3);
  5297. colors[0] = (colors[0] << 3) | (colors[0] >> 2);
  5298. colors[3] = (hi << 3) | (hi >> 2);
  5299. colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
  5300. colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
  5301. uint64_t total_err = 0;
  5302. for (uint32_t s = low_selector; s <= high_selector; s++)
  5303. {
  5304. int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
  5305. int err_scale = 1;
  5306. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  5307. // the low/high selectors which are clamping to either 0 or 255.
  5308. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  5309. err_scale = 5;
  5310. total_err += (err * err) * err_scale;
  5311. }
  5312. if (total_err < best_err)
  5313. {
  5314. best_err = total_err;
  5315. best_lo = lo;
  5316. best_hi = hi;
  5317. }
  5318. }
  5319. }
  5320. //assert(best_err <= 0xFFFF);
  5321. best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
  5322. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  5323. n++;
  5324. if ((n & 31) == 31)
  5325. fprintf(pFile, "\n");
  5326. } // m
  5327. } // sr
  5328. } // g
  5329. } // inten
  5330. fclose(pFile);
  5331. #if 0
  5332. // PVRTC2 34
  5333. fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_34.inc", "w");
  5334. n = 0;
  5335. for (int inten = 0; inten < 8; inten++)
  5336. {
  5337. for (uint32_t g = 0; g < 32; g++)
  5338. {
  5339. color32 block_colors[4];
  5340. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  5341. for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
  5342. {
  5343. const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
  5344. const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
  5345. for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
  5346. {
  5347. uint32_t best_lo = 0;
  5348. uint32_t best_hi = 0;
  5349. uint64_t best_err = UINT64_MAX;
  5350. for (uint32_t hi = 0; hi <= 15; hi++)
  5351. {
  5352. for (uint32_t lo = 0; lo <= 7; lo++)
  5353. {
  5354. uint32_t colors[4];
  5355. colors[0] = (lo << 2) | (lo >> 1);
  5356. colors[0] = (colors[0] << 3) | (colors[0] >> 2);
  5357. colors[3] = (hi << 1) | (hi >> 3);
  5358. colors[3] = (colors[3] << 3) | (colors[3] >> 2);
  5359. colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
  5360. colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
  5361. uint64_t total_err = 0;
  5362. for (uint32_t s = low_selector; s <= high_selector; s++)
  5363. {
  5364. int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
  5365. int err_scale = 1;
  5366. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  5367. // the low/high selectors which are clamping to either 0 or 255.
  5368. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  5369. err_scale = 5;
  5370. total_err += (err * err) * err_scale;
  5371. }
  5372. if (total_err < best_err)
  5373. {
  5374. best_err = total_err;
  5375. best_lo = lo;
  5376. best_hi = hi;
  5377. }
  5378. }
  5379. }
  5380. //assert(best_err <= 0xFFFF);
  5381. best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
  5382. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  5383. n++;
  5384. if ((n & 31) == 31)
  5385. fprintf(pFile, "\n");
  5386. } // m
  5387. } // sr
  5388. } // g
  5389. } // inten
  5390. fclose(pFile);
  5391. #endif
  5392. #if 0
  5393. // PVRTC2 44
  5394. fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_44.inc", "w");
  5395. n = 0;
  5396. for (int inten = 0; inten < 8; inten++)
  5397. {
  5398. for (uint32_t g = 0; g < 32; g++)
  5399. {
  5400. color32 block_colors[4];
  5401. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  5402. for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
  5403. {
  5404. const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
  5405. const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
  5406. for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
  5407. {
  5408. uint32_t best_lo = 0;
  5409. uint32_t best_hi = 0;
  5410. uint64_t best_err = UINT64_MAX;
  5411. for (uint32_t hi = 0; hi <= 15; hi++)
  5412. {
  5413. for (uint32_t lo = 0; lo <= 15; lo++)
  5414. {
  5415. uint32_t colors[4];
  5416. colors[0] = (lo << 1) | (lo >> 3);
  5417. colors[0] = (colors[0] << 3) | (colors[0] >> 2);
  5418. colors[3] = (hi << 1) | (hi >> 3);
  5419. colors[3] = (colors[3] << 3) | (colors[3] >> 2);
  5420. colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
  5421. colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
  5422. uint64_t total_err = 0;
  5423. for (uint32_t s = low_selector; s <= high_selector; s++)
  5424. {
  5425. int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
  5426. int err_scale = 1;
  5427. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  5428. // the low/high selectors which are clamping to either 0 or 255.
  5429. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  5430. err_scale = 5;
  5431. total_err += (err * err) * err_scale;
  5432. }
  5433. if (total_err < best_err)
  5434. {
  5435. best_err = total_err;
  5436. best_lo = lo;
  5437. best_hi = hi;
  5438. }
  5439. }
  5440. }
  5441. //assert(best_err <= 0xFFFF);
  5442. best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
  5443. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  5444. n++;
  5445. if ((n & 31) == 31)
  5446. fprintf(pFile, "\n");
  5447. } // m
  5448. } // sr
  5449. } // g
  5450. } // inten
  5451. fclose(pFile);
  5452. #endif
  5453. // PVRTC2 alpha 33
  5454. fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_alpha_33.inc", "w");
  5455. n = 0;
  5456. for (int inten = 0; inten < 8; inten++)
  5457. {
  5458. for (uint32_t g = 0; g < 32; g++)
  5459. {
  5460. color32 block_colors[4];
  5461. decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
  5462. for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
  5463. {
  5464. const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
  5465. const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
  5466. for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
  5467. {
  5468. uint32_t best_lo = 0;
  5469. uint32_t best_hi = 0;
  5470. uint64_t best_err = UINT64_MAX;
  5471. for (uint32_t hi = 0; hi <= 7; hi++)
  5472. {
  5473. for (uint32_t lo = 0; lo <= 7; lo++)
  5474. {
  5475. uint32_t colors[4];
  5476. colors[0] = (lo << 1);
  5477. colors[0] = (colors[0] << 4) | colors[0];
  5478. colors[3] = (hi << 1) | 1;
  5479. colors[3] = (colors[3] << 4) | colors[3];
  5480. colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
  5481. colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
  5482. uint64_t total_err = 0;
  5483. for (uint32_t s = low_selector; s <= high_selector; s++)
  5484. {
  5485. int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
  5486. int err_scale = 1;
  5487. // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
  5488. // the low/high selectors which are clamping to either 0 or 255.
  5489. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
  5490. err_scale = 5;
  5491. total_err += (err * err) * err_scale;
  5492. }
  5493. if (total_err < best_err)
  5494. {
  5495. best_err = total_err;
  5496. best_lo = lo;
  5497. best_hi = hi;
  5498. }
  5499. }
  5500. }
  5501. //assert(best_err <= 0xFFFF);
  5502. best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
  5503. fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
  5504. n++;
  5505. if ((n & 31) == 31)
  5506. fprintf(pFile, "\n");
  5507. } // m
  5508. } // sr
  5509. } // g
  5510. } // inten
  5511. fclose(pFile);
  5512. }
  5513. #endif // BASISD_WRITE_NEW_ATC_TABLES
  5514. #endif // BASISD_SUPPORT_ATC
  5515. #if BASISD_SUPPORT_PVRTC2
  5516. struct pvrtc2_block
  5517. {
  5518. uint8_t m_modulation[4];
  5519. union
  5520. {
  5521. union
  5522. {
  5523. // Opaque mode: RGB colora=554 and colorb=555
  5524. struct
  5525. {
  5526. uint32_t m_mod_flag : 1;
  5527. uint32_t m_blue_a : 4;
  5528. uint32_t m_green_a : 5;
  5529. uint32_t m_red_a : 5;
  5530. uint32_t m_hard_flag : 1;
  5531. uint32_t m_blue_b : 5;
  5532. uint32_t m_green_b : 5;
  5533. uint32_t m_red_b : 5;
  5534. uint32_t m_opaque_flag : 1;
  5535. } m_opaque_color_data;
  5536. // Transparent mode: RGBA colora=4433 and colorb=4443
  5537. struct
  5538. {
  5539. uint32_t m_mod_flag : 1;
  5540. uint32_t m_blue_a : 3;
  5541. uint32_t m_green_a : 4;
  5542. uint32_t m_red_a : 4;
  5543. uint32_t m_alpha_a : 3;
  5544. uint32_t m_hard_flag : 1;
  5545. uint32_t m_blue_b : 4;
  5546. uint32_t m_green_b : 4;
  5547. uint32_t m_red_b : 4;
  5548. uint32_t m_alpha_b : 3;
  5549. uint32_t m_opaque_flag : 1;
  5550. } m_trans_color_data;
  5551. };
  5552. uint32_t m_color_data_bits;
  5553. };
  5554. // 554
  5555. void set_low_color(uint32_t r, uint32_t g, uint32_t b)
  5556. {
  5557. assert((r < 32) && (g < 32) && (b < 16));
  5558. m_opaque_color_data.m_red_a = r;
  5559. m_opaque_color_data.m_green_a = g;
  5560. m_opaque_color_data.m_blue_a = b;
  5561. }
  5562. // 555
  5563. void set_high_color(uint32_t r, uint32_t g, uint32_t b)
  5564. {
  5565. assert((r < 32) && (g < 32) && (b < 32));
  5566. m_opaque_color_data.m_red_b = r;
  5567. m_opaque_color_data.m_green_b = g;
  5568. m_opaque_color_data.m_blue_b = b;
  5569. }
  5570. // 4433
  5571. void set_trans_low_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
  5572. {
  5573. assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
  5574. m_trans_color_data.m_red_a = r;
  5575. m_trans_color_data.m_green_a = g;
  5576. m_trans_color_data.m_blue_a = b;
  5577. m_trans_color_data.m_alpha_a = a;
  5578. }
  5579. // 4443
  5580. void set_trans_high_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
  5581. {
  5582. assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
  5583. m_trans_color_data.m_red_b = r;
  5584. m_trans_color_data.m_green_b = g;
  5585. m_trans_color_data.m_blue_b = b;
  5586. m_trans_color_data.m_alpha_b = a;
  5587. }
  5588. };
  5589. static struct
  5590. {
  5591. uint8_t m_l, m_h;
  5592. } g_pvrtc2_trans_match34[256];
  5593. static struct
  5594. {
  5595. uint8_t m_l, m_h;
  5596. } g_pvrtc2_trans_match44[256];
  5597. static struct
  5598. {
  5599. uint8_t m_l, m_h;
  5600. } g_pvrtc2_alpha_match33[256];
  5601. static struct
  5602. {
  5603. uint8_t m_l, m_h;
  5604. } g_pvrtc2_alpha_match33_0[256];
  5605. static struct
  5606. {
  5607. uint8_t m_l, m_h;
  5608. } g_pvrtc2_alpha_match33_3[256];
  5609. // PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity.
  5610. static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
  5611. {
  5612. pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
  5613. pBlock->m_opaque_color_data.m_hard_flag = 1;
  5614. pBlock->m_opaque_color_data.m_mod_flag = 0;
  5615. pBlock->m_opaque_color_data.m_opaque_flag = 1;
  5616. const uint32_t low_selector = pSelector->m_lo_selector;
  5617. const uint32_t high_selector = pSelector->m_hi_selector;
  5618. const color32& base_color = pEndpoints->m_color5;
  5619. const uint32_t inten_table = pEndpoints->m_inten5;
  5620. if (low_selector == high_selector)
  5621. {
  5622. uint32_t r, g, b;
  5623. decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
  5624. pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match55_equals_1[g].m_lo, g_pvrtc2_match45_equals_1[b].m_lo);
  5625. pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match55_equals_1[g].m_hi, g_pvrtc2_match45_equals_1[b].m_hi);
  5626. pBlock->m_modulation[0] = 0x55;
  5627. pBlock->m_modulation[1] = 0x55;
  5628. pBlock->m_modulation[2] = 0x55;
  5629. pBlock->m_modulation[3] = 0x55;
  5630. return;
  5631. }
  5632. else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
  5633. {
  5634. color32 block_colors[4];
  5635. decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
  5636. const uint32_t r0 = block_colors[0].r;
  5637. const uint32_t g0 = block_colors[0].g;
  5638. const uint32_t b0 = block_colors[0].b;
  5639. const uint32_t r1 = block_colors[3].r;
  5640. const uint32_t g1 = block_colors[3].g;
  5641. const uint32_t b1 = block_colors[3].b;
  5642. pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_pvrtc2_match4[b0].m_hi);
  5643. pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match5[g1].m_hi, g_atc_match5[b1].m_hi);
  5644. pBlock->m_modulation[0] = pSelector->m_selectors[0];
  5645. pBlock->m_modulation[1] = pSelector->m_selectors[1];
  5646. pBlock->m_modulation[2] = pSelector->m_selectors[2];
  5647. pBlock->m_modulation[3] = pSelector->m_selectors[3];
  5648. return;
  5649. }
  5650. const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
  5651. //[32][8][RANGES][MAPPING]
  5652. const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
  5653. const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
  5654. const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_pvrtc2_45[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
  5655. uint32_t best_err = UINT_MAX;
  5656. uint32_t best_mapping = 0;
  5657. assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
  5658. #define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
  5659. DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
  5660. DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
  5661. #undef DO_ITER
  5662. pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
  5663. pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
  5664. if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
  5665. {
  5666. pBlock->m_modulation[0] = pSelector->m_selectors[0];
  5667. pBlock->m_modulation[1] = pSelector->m_selectors[1];
  5668. pBlock->m_modulation[2] = pSelector->m_selectors[2];
  5669. pBlock->m_modulation[3] = pSelector->m_selectors[3];
  5670. }
  5671. else
  5672. {
  5673. // TODO: We could make this faster using several precomputed 256 entry tables, like ETC1S->BC1 does.
  5674. const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
  5675. const uint32_t sel_bits0 = pSelector->m_selectors[0];
  5676. const uint32_t sel_bits1 = pSelector->m_selectors[1];
  5677. const uint32_t sel_bits2 = pSelector->m_selectors[2];
  5678. const uint32_t sel_bits3 = pSelector->m_selectors[3];
  5679. uint32_t sels0 = 0, sels1 = 0, sels2 = 0, sels3 = 0;
  5680. #define DO_X(x) { \
  5681. const uint32_t x_shift = (x) * 2; \
  5682. sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
  5683. sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
  5684. sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
  5685. sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
  5686. DO_X(0);
  5687. DO_X(1);
  5688. DO_X(2);
  5689. DO_X(3);
  5690. #undef DO_X
  5691. pBlock->m_modulation[0] = (uint8_t)sels0;
  5692. pBlock->m_modulation[1] = (uint8_t)sels1;
  5693. pBlock->m_modulation[2] = (uint8_t)sels2;
  5694. pBlock->m_modulation[3] = (uint8_t)sels3;
  5695. }
  5696. }
  5697. typedef struct { float c[4]; } vec4F;
  5698. static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; }
  5699. static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; }
  5700. static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; }
  5701. static inline vec4F vec4F_saturate(const vec4F* pV) { vec4F res; res.c[0] = saturate(pV->c[0]); res.c[1] = saturate(pV->c[1]); res.c[2] = saturate(pV->c[2]); res.c[3] = saturate(pV->c[3]); return res; }
  5702. static inline vec4F vec4F_from_color(const color32* pC) { vec4F res; vec4F_set(&res, pC->c[0], pC->c[1], pC->c[2], pC->c[3]); return res; }
  5703. static inline vec4F vec4F_add(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] + pRHS->c[0], pLHS->c[1] + pRHS->c[1], pLHS->c[2] + pRHS->c[2], pLHS->c[3] + pRHS->c[3]); return res; }
  5704. static inline vec4F vec4F_sub(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] - pRHS->c[0], pLHS->c[1] - pRHS->c[1], pLHS->c[2] - pRHS->c[2], pLHS->c[3] - pRHS->c[3]); return res; }
  5705. static inline float vec4F_dot(const vec4F* pLHS, const vec4F* pRHS) { return pLHS->c[0] * pRHS->c[0] + pLHS->c[1] * pRHS->c[1] + pLHS->c[2] * pRHS->c[2] + pLHS->c[3] * pRHS->c[3]; }
  5706. static inline vec4F vec4F_mul(const vec4F* pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->c[0] * s, pLHS->c[1] * s, pLHS->c[2] * s, pLHS->c[3] * s); return res; }
  5707. static inline vec4F* vec4F_normalize_in_place(vec4F* pV) { float s = pV->c[0] * pV->c[0] + pV->c[1] * pV->c[1] + pV->c[2] * pV->c[2] + pV->c[3] * pV->c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->c[0] *= s; pV->c[1] *= s; pV->c[2] *= s; pV->c[3] *= s; } return pV; }
  5708. static color32 convert_rgba_5554_to_8888(const color32& col)
  5709. {
  5710. return color32((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
  5711. }
  5712. static inline int sq(int x) { return x * x; }
  5713. // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0.
  5714. // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha!
  5715. // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
  5716. static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
  5717. {
  5718. pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
  5719. const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pBlock)[0]];
  5720. const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pBlock)[1]];
  5721. pBlock->m_opaque_color_data.m_hard_flag = 1;
  5722. pBlock->m_opaque_color_data.m_mod_flag = 0;
  5723. pBlock->m_opaque_color_data.m_opaque_flag = 0;
  5724. const int num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
  5725. const color32& alpha_base_color = alpha_endpoint.m_color5;
  5726. const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
  5727. int constant_alpha_val = -1;
  5728. int alpha_block_colors[4];
  5729. decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
  5730. if (num_unique_alpha_selectors == 1)
  5731. {
  5732. constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
  5733. }
  5734. else
  5735. {
  5736. constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
  5737. for (uint32_t i = alpha_selectors.m_lo_selector + 1; i <= alpha_selectors.m_hi_selector; i++)
  5738. {
  5739. if (constant_alpha_val != alpha_block_colors[i])
  5740. {
  5741. constant_alpha_val = -1;
  5742. break;
  5743. }
  5744. }
  5745. }
  5746. if (constant_alpha_val >= 250)
  5747. {
  5748. // It's opaque enough, so don't bother trying to encode it as an alpha block.
  5749. convert_etc1s_to_pvrtc2_rgb(pDst, pEndpoints, pSelector);
  5750. return;
  5751. }
  5752. const color32& base_color = pEndpoints->m_color5;
  5753. const uint32_t inten_table = pEndpoints->m_inten5;
  5754. const uint32_t low_selector = pSelector->m_lo_selector;
  5755. const uint32_t high_selector = pSelector->m_hi_selector;
  5756. const int num_unique_color_selectors = pSelector->m_num_unique_selectors;
  5757. // We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes.
  5758. // Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values.
  5759. const int br = (base_color.r << 3) | (base_color.r >> 2);
  5760. const int bg = (base_color.g << 3) | (base_color.g >> 2);
  5761. const int bb = (base_color.b << 3) | (base_color.b >> 2);
  5762. color32 block_cols[4];
  5763. for (uint32_t i = 0; i < 4; i++)
  5764. {
  5765. const int ci = g_etc1_inten_tables[inten_table][i];
  5766. block_cols[i].set_clamped(br + ci, bg + ci, bb + ci, alpha_block_colors[i]);
  5767. }
  5768. bool solid_color_block = true;
  5769. if (num_unique_color_selectors > 1)
  5770. {
  5771. for (uint32_t i = low_selector + 1; i <= high_selector; i++)
  5772. {
  5773. if ((block_cols[low_selector].r != block_cols[i].r) || (block_cols[low_selector].g != block_cols[i].g) || (block_cols[low_selector].b != block_cols[i].b))
  5774. {
  5775. solid_color_block = false;
  5776. break;
  5777. }
  5778. }
  5779. }
  5780. if ((solid_color_block) && (constant_alpha_val >= 0))
  5781. {
  5782. // Constant color/alpha block.
  5783. // This is more complex than it may seem because of the way color and alpha are packed in PVRTC2. We need to evaluate mod0, mod1 and mod3 encodings to find the best one.
  5784. uint32_t r, g, b;
  5785. decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
  5786. // Mod 0
  5787. uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255;
  5788. uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l;
  5789. uint32_t cr0 = (lr0 << 1) | (lr0 >> 3);
  5790. uint32_t cg0 = (lg0 << 1) | (lg0 >> 3);
  5791. uint32_t cb0 = (lb0 << 2) | (lb0 >> 1);
  5792. uint32_t ca0 = (la0 << 1);
  5793. cr0 = (cr0 << 3) | (cr0 >> 2);
  5794. cg0 = (cg0 << 3) | (cg0 >> 2);
  5795. cb0 = (cb0 << 3) | (cb0 >> 2);
  5796. ca0 = (ca0 << 4) | ca0;
  5797. uint32_t err0 = sq(cr0 - r) + sq(cg0 - g) + sq(cb0 - b) + sq(ca0 - constant_alpha_val) * 2;
  5798. // If the alpha is < 3 or so we're kinda screwed. It's better to have some RGB error than it is to turn a 100% transparent area slightly opaque.
  5799. if ((err0 == 0) || (constant_alpha_val < 3))
  5800. {
  5801. pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
  5802. pBlock->set_trans_high_color(0, 0, 0, 0);
  5803. pBlock->m_modulation[0] = 0;
  5804. pBlock->m_modulation[1] = 0;
  5805. pBlock->m_modulation[2] = 0;
  5806. pBlock->m_modulation[3] = 0;
  5807. return;
  5808. }
  5809. // Mod 3
  5810. uint32_t lr3 = (r * 15 + 128) / 255, lg3 = (g * 15 + 128) / 255, lb3 = (b * 15 + 128) / 255;
  5811. uint32_t la3 = g_pvrtc2_alpha_match33_3[constant_alpha_val].m_l;
  5812. uint32_t cr3 = (lr3 << 1) | (lr3 >> 3);
  5813. uint32_t cg3 = (lg3 << 1) | (lg3 >> 3);
  5814. uint32_t cb3 = (lb3 << 1) | (lb3 >> 3);
  5815. uint32_t ca3 = (la3 << 1) | 1;
  5816. cr3 = (cr3 << 3) | (cr3 >> 2);
  5817. cg3 = (cg3 << 3) | (cg3 >> 2);
  5818. cb3 = (cb3 << 3) | (cb3 >> 2);
  5819. ca3 = (ca3 << 4) | ca3;
  5820. uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2;
  5821. // Mod 1
  5822. uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l;
  5823. uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h;
  5824. uint32_t la1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_l, ha1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_h;
  5825. uint32_t clr1 = (lr1 << 1) | (lr1 >> 3);
  5826. uint32_t clg1 = (lg1 << 1) | (lg1 >> 3);
  5827. uint32_t clb1 = (lb1 << 2) | (lb1 >> 1);
  5828. uint32_t cla1 = (la1 << 1);
  5829. clr1 = (clr1 << 3) | (clr1 >> 2);
  5830. clg1 = (clg1 << 3) | (clg1 >> 2);
  5831. clb1 = (clb1 << 3) | (clb1 >> 2);
  5832. cla1 = (cla1 << 4) | cla1;
  5833. uint32_t chr1 = (hr1 << 1) | (hr1 >> 3);
  5834. uint32_t chg1 = (hg1 << 1) | (hg1 >> 3);
  5835. uint32_t chb1 = (hb1 << 1) | (hb1 >> 3);
  5836. uint32_t cha1 = (ha1 << 1) | 1;
  5837. chr1 = (chr1 << 3) | (chr1 >> 2);
  5838. chg1 = (chg1 << 3) | (chg1 >> 2);
  5839. chb1 = (chb1 << 3) | (chb1 >> 2);
  5840. cha1 = (cha1 << 4) | cha1;
  5841. uint32_t r1 = (clr1 * 5 + chr1 * 3) / 8;
  5842. uint32_t g1 = (clg1 * 5 + chg1 * 3) / 8;
  5843. uint32_t b1 = (clb1 * 5 + chb1 * 3) / 8;
  5844. uint32_t a1 = (cla1 * 5 + cha1 * 3) / 8;
  5845. uint32_t err1 = sq(r1 - r) + sq(g1 - g) + sq(b1 - b) + sq(a1 - constant_alpha_val) * 2;
  5846. if ((err1 < err0) && (err1 < err3))
  5847. {
  5848. pBlock->set_trans_low_color(lr1, lg1, lb1, la1);
  5849. pBlock->set_trans_high_color(hr1, hg1, hb1, ha1);
  5850. pBlock->m_modulation[0] = 0x55;
  5851. pBlock->m_modulation[1] = 0x55;
  5852. pBlock->m_modulation[2] = 0x55;
  5853. pBlock->m_modulation[3] = 0x55;
  5854. }
  5855. else if (err0 < err3)
  5856. {
  5857. pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
  5858. pBlock->set_trans_high_color(0, 0, 0, 0);
  5859. pBlock->m_modulation[0] = 0;
  5860. pBlock->m_modulation[1] = 0;
  5861. pBlock->m_modulation[2] = 0;
  5862. pBlock->m_modulation[3] = 0;
  5863. }
  5864. else
  5865. {
  5866. pBlock->set_trans_low_color(0, 0, 0, 0);
  5867. pBlock->set_trans_high_color(lr3, lg3, lb3, la3);
  5868. pBlock->m_modulation[0] = 0xFF;
  5869. pBlock->m_modulation[1] = 0xFF;
  5870. pBlock->m_modulation[2] = 0xFF;
  5871. pBlock->m_modulation[3] = 0xFF;
  5872. }
  5873. return;
  5874. }
  5875. // It's a complex block with non-solid color and/or alpha pixels.
  5876. vec4F minColor, maxColor;
  5877. if (solid_color_block)
  5878. {
  5879. // It's a solid color block.
  5880. uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a;
  5881. uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a;
  5882. const float S = 1.0f / 255.0f;
  5883. vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S);
  5884. vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S);
  5885. }
  5886. else if (constant_alpha_val >= 0)
  5887. {
  5888. // It's a solid alpha block.
  5889. const float S = 1.0f / 255.0f;
  5890. vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S);
  5891. vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S);
  5892. }
  5893. // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis).
  5894. // To keep quality up we need to use full 4D PCA in this case.
  5895. else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) ||
  5896. (block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) ||
  5897. (block_cols[low_selector].c[2] == 0) || (block_cols[high_selector].c[2] == 255) ||
  5898. (block_cols[alpha_selectors.m_lo_selector].c[3] == 0) || (block_cols[alpha_selectors.m_hi_selector].c[3] == 255))
  5899. {
  5900. // Find principle component of RGBA colors treated as 4D vectors.
  5901. color32 pixels[16];
  5902. uint32_t sum_r = 0, sum_g = 0, sum_b = 0, sum_a = 0;
  5903. for (uint32_t i = 0; i < 16; i++)
  5904. {
  5905. color32 rgb(block_cols[pSelector->get_selector(i & 3, i >> 2)]);
  5906. uint32_t a = block_cols[alpha_selectors.get_selector(i & 3, i >> 2)].a;
  5907. pixels[i].set(rgb.r, rgb.g, rgb.b, a);
  5908. sum_r += rgb.r;
  5909. sum_g += rgb.g;
  5910. sum_b += rgb.b;
  5911. sum_a += a;
  5912. }
  5913. vec4F meanColor;
  5914. vec4F_set(&meanColor, (float)sum_r, (float)sum_g, (float)sum_b, (float)sum_a);
  5915. vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / 16.0f);
  5916. meanColor = vec4F_mul(&meanColor, 1.0f / (float)(16.0f * 255.0f));
  5917. vec4F_saturate_in_place(&meanColor);
  5918. vec4F axis;
  5919. vec4F_set_scalar(&axis, 0.0f);
  5920. // Why this incremental method? Because it's stable and predictable. Covar+power method can require a lot of iterations to converge in 4D.
  5921. for (uint32_t i = 0; i < 16; i++)
  5922. {
  5923. vec4F color = vec4F_from_color(&pixels[i]);
  5924. color = vec4F_sub(&color, &meanColorScaled);
  5925. vec4F a = vec4F_mul(&color, color.c[0]);
  5926. vec4F b = vec4F_mul(&color, color.c[1]);
  5927. vec4F c = vec4F_mul(&color, color.c[2]);
  5928. vec4F d = vec4F_mul(&color, color.c[3]);
  5929. vec4F n = i ? axis : color;
  5930. vec4F_normalize_in_place(&n);
  5931. axis.c[0] += vec4F_dot(&a, &n);
  5932. axis.c[1] += vec4F_dot(&b, &n);
  5933. axis.c[2] += vec4F_dot(&c, &n);
  5934. axis.c[3] += vec4F_dot(&d, &n);
  5935. }
  5936. vec4F_normalize_in_place(&axis);
  5937. if (vec4F_dot(&axis, &axis) < .5f)
  5938. vec4F_set_scalar(&axis, .5f);
  5939. float l = 1e+9f, h = -1e+9f;
  5940. for (uint32_t i = 0; i < 16; i++)
  5941. {
  5942. vec4F color = vec4F_from_color(&pixels[i]);
  5943. vec4F q = vec4F_sub(&color, &meanColorScaled);
  5944. float d = vec4F_dot(&q, &axis);
  5945. l = basisu::minimum(l, d);
  5946. h = basisu::maximum(h, d);
  5947. }
  5948. l *= (1.0f / 255.0f);
  5949. h *= (1.0f / 255.0f);
  5950. vec4F b0 = vec4F_mul(&axis, l);
  5951. vec4F b1 = vec4F_mul(&axis, h);
  5952. vec4F c0 = vec4F_add(&meanColor, &b0);
  5953. vec4F c1 = vec4F_add(&meanColor, &b1);
  5954. minColor = vec4F_saturate(&c0);
  5955. maxColor = vec4F_saturate(&c1);
  5956. if (minColor.c[3] > maxColor.c[3])
  5957. {
  5958. // VS 2019 release Code Generator issue
  5959. //std::swap(minColor, maxColor);
  5960. float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3];
  5961. minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
  5962. minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
  5963. maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d;
  5964. }
  5965. }
  5966. else
  5967. {
  5968. // We know the RGB axis is luma, because it's an ETC1S block and none of the block colors got clamped. So we only need to use 2D PCA.
  5969. // We project each LA vector onto two 2D lines with axes (1,1) and (1,-1) and find the largest projection to determine if axis A is flipped relative to L.
  5970. uint32_t block_cols_l[4], block_cols_a[4];
  5971. for (uint32_t i = 0; i < 4; i++)
  5972. {
  5973. block_cols_l[i] = block_cols[i].r + block_cols[i].g + block_cols[i].b;
  5974. block_cols_a[i] = block_cols[i].a * 3;
  5975. }
  5976. int p0_min = INT_MAX, p0_max = INT_MIN;
  5977. int p1_min = INT_MAX, p1_max = INT_MIN;
  5978. for (uint32_t y = 0; y < 4; y++)
  5979. {
  5980. const uint32_t cs = pSelector->m_selectors[y];
  5981. const uint32_t as = alpha_selectors.m_selectors[y];
  5982. {
  5983. const int l = block_cols_l[cs & 3];
  5984. const int a = block_cols_a[as & 3];
  5985. const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
  5986. const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
  5987. }
  5988. {
  5989. const int l = block_cols_l[(cs >> 2) & 3];
  5990. const int a = block_cols_a[(as >> 2) & 3];
  5991. const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
  5992. const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
  5993. }
  5994. {
  5995. const int l = block_cols_l[(cs >> 4) & 3];
  5996. const int a = block_cols_a[(as >> 4) & 3];
  5997. const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
  5998. const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
  5999. }
  6000. {
  6001. const int l = block_cols_l[cs >> 6];
  6002. const int a = block_cols_a[as >> 6];
  6003. const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
  6004. const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
  6005. }
  6006. }
  6007. int dist0 = p0_max - p0_min;
  6008. int dist1 = p1_max - p1_min;
  6009. const float S = 1.0f / 255.0f;
  6010. vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, block_cols[alpha_selectors.m_lo_selector].a * S);
  6011. vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, block_cols[alpha_selectors.m_hi_selector].a * S);
  6012. // See if the A component of the principle axis is flipped relative to L. If so, we need to flip either RGB or A bounds.
  6013. if (dist1 > dist0)
  6014. {
  6015. std::swap(minColor.c[0], maxColor.c[0]);
  6016. std::swap(minColor.c[1], maxColor.c[1]);
  6017. std::swap(minColor.c[2], maxColor.c[2]);
  6018. }
  6019. }
  6020. // 4433 4443
  6021. color32 trialMinColor, trialMaxColor;
  6022. trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f));
  6023. trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f));
  6024. pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a);
  6025. pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a);
  6026. color32 color_a((trialMinColor.r << 1) | (trialMinColor.r >> 3), (trialMinColor.g << 1) | (trialMinColor.g >> 3), (trialMinColor.b << 2) | (trialMinColor.b >> 1), trialMinColor.a << 1);
  6027. color32 color_b((trialMaxColor.r << 1) | (trialMaxColor.r >> 3), (trialMaxColor.g << 1) | (trialMaxColor.g >> 3), (trialMaxColor.b << 1) | (trialMaxColor.b >> 3), (trialMaxColor.a << 1) | 1);
  6028. color32 color0(convert_rgba_5554_to_8888(color_a));
  6029. color32 color3(convert_rgba_5554_to_8888(color_b));
  6030. const int lr = color0.r;
  6031. const int lg = color0.g;
  6032. const int lb = color0.b;
  6033. const int la = color0.a;
  6034. const int axis_r = color3.r - lr;
  6035. const int axis_g = color3.g - lg;
  6036. const int axis_b = color3.b - lb;
  6037. const int axis_a = color3.a - la;
  6038. const int len_a = (axis_r * axis_r) + (axis_g * axis_g) + (axis_b * axis_b) + (axis_a * axis_a);
  6039. const int thresh01 = (len_a * 3) / 16;
  6040. const int thresh12 = len_a >> 1;
  6041. const int thresh23 = (len_a * 13) / 16;
  6042. if ((axis_r | axis_g | axis_b) == 0)
  6043. {
  6044. int ca_sel[4];
  6045. for (uint32_t i = 0; i < 4; i++)
  6046. {
  6047. int ca = (block_cols[i].a - la) * axis_a;
  6048. ca_sel[i] = (ca >= thresh23) + (ca >= thresh12) + (ca >= thresh01);
  6049. }
  6050. for (uint32_t y = 0; y < 4; y++)
  6051. {
  6052. const uint32_t a_sels = alpha_selectors.m_selectors[y];
  6053. uint32_t sel = ca_sel[a_sels & 3] | (ca_sel[(a_sels >> 2) & 3] << 2) | (ca_sel[(a_sels >> 4) & 3] << 4) | (ca_sel[a_sels >> 6] << 6);
  6054. pBlock->m_modulation[y] = (uint8_t)sel;
  6055. }
  6056. }
  6057. else
  6058. {
  6059. int cy[4], ca[4];
  6060. for (uint32_t i = 0; i < 4; i++)
  6061. {
  6062. cy[i] = (block_cols[i].r - lr) * axis_r + (block_cols[i].g - lg) * axis_g + (block_cols[i].b - lb) * axis_b;
  6063. ca[i] = (block_cols[i].a - la) * axis_a;
  6064. }
  6065. for (uint32_t y = 0; y < 4; y++)
  6066. {
  6067. const uint32_t c_sels = pSelector->m_selectors[y];
  6068. const uint32_t a_sels = alpha_selectors.m_selectors[y];
  6069. const int d0 = cy[c_sels & 3] + ca[a_sels & 3];
  6070. const int d1 = cy[(c_sels >> 2) & 3] + ca[(a_sels >> 2) & 3];
  6071. const int d2 = cy[(c_sels >> 4) & 3] + ca[(a_sels >> 4) & 3];
  6072. const int d3 = cy[c_sels >> 6] + ca[a_sels >> 6];
  6073. uint32_t sel = ((d0 >= thresh23) + (d0 >= thresh12) + (d0 >= thresh01)) |
  6074. (((d1 >= thresh23) + (d1 >= thresh12) + (d1 >= thresh01)) << 2) |
  6075. (((d2 >= thresh23) + (d2 >= thresh12) + (d2 >= thresh01)) << 4) |
  6076. (((d3 >= thresh23) + (d3 >= thresh12) + (d3 >= thresh01)) << 6);
  6077. pBlock->m_modulation[y] = (uint8_t)sel;
  6078. }
  6079. }
  6080. }
  6081. static void transcoder_init_pvrtc2()
  6082. {
  6083. for (uint32_t v = 0; v < 256; v++)
  6084. {
  6085. int best_l = 0, best_h = 0, lowest_err = INT_MAX;
  6086. for (uint32_t l = 0; l < 8; l++)
  6087. {
  6088. uint32_t le = (l << 1);
  6089. le = (le << 4) | le;
  6090. for (uint32_t h = 0; h < 8; h++)
  6091. {
  6092. uint32_t he = (h << 1) | 1;
  6093. he = (he << 4) | he;
  6094. uint32_t m = (le * 5 + he * 3) / 8;
  6095. int err = (int)labs((int)v - (int)m);
  6096. if (err < lowest_err)
  6097. {
  6098. lowest_err = err;
  6099. best_l = l;
  6100. best_h = h;
  6101. }
  6102. }
  6103. }
  6104. g_pvrtc2_alpha_match33[v].m_l = (uint8_t)best_l;
  6105. g_pvrtc2_alpha_match33[v].m_h = (uint8_t)best_h;
  6106. }
  6107. for (uint32_t v = 0; v < 256; v++)
  6108. {
  6109. int best_l = 0, best_h = 0, lowest_err = INT_MAX;
  6110. for (uint32_t l = 0; l < 8; l++)
  6111. {
  6112. uint32_t le = (l << 1);
  6113. le = (le << 4) | le;
  6114. int err = (int)labs((int)v - (int)le);
  6115. if (err < lowest_err)
  6116. {
  6117. lowest_err = err;
  6118. best_l = l;
  6119. best_h = l;
  6120. }
  6121. }
  6122. g_pvrtc2_alpha_match33_0[v].m_l = (uint8_t)best_l;
  6123. g_pvrtc2_alpha_match33_0[v].m_h = (uint8_t)best_h;
  6124. }
  6125. for (uint32_t v = 0; v < 256; v++)
  6126. {
  6127. int best_l = 0, best_h = 0, lowest_err = INT_MAX;
  6128. for (uint32_t h = 0; h < 8; h++)
  6129. {
  6130. uint32_t he = (h << 1) | 1;
  6131. he = (he << 4) | he;
  6132. int err = (int)labs((int)v - (int)he);
  6133. if (err < lowest_err)
  6134. {
  6135. lowest_err = err;
  6136. best_l = h;
  6137. best_h = h;
  6138. }
  6139. }
  6140. g_pvrtc2_alpha_match33_3[v].m_l = (uint8_t)best_l;
  6141. g_pvrtc2_alpha_match33_3[v].m_h = (uint8_t)best_h;
  6142. }
  6143. for (uint32_t v = 0; v < 256; v++)
  6144. {
  6145. int best_l = 0, best_h = 0, lowest_err = INT_MAX;
  6146. for (uint32_t l = 0; l < 8; l++)
  6147. {
  6148. uint32_t le = (l << 2) | (l >> 1);
  6149. le = (le << 3) | (le >> 2);
  6150. for (uint32_t h = 0; h < 16; h++)
  6151. {
  6152. uint32_t he = (h << 1) | (h >> 3);
  6153. he = (he << 3) | (he >> 2);
  6154. uint32_t m = (le * 5 + he * 3) / 8;
  6155. int err = (int)labs((int)v - (int)m);
  6156. if (err < lowest_err)
  6157. {
  6158. lowest_err = err;
  6159. best_l = l;
  6160. best_h = h;
  6161. }
  6162. }
  6163. }
  6164. g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l;
  6165. g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h;
  6166. }
  6167. for (uint32_t v = 0; v < 256; v++)
  6168. {
  6169. int best_l = 0, best_h = 0, lowest_err = INT_MAX;
  6170. for (uint32_t l = 0; l < 16; l++)
  6171. {
  6172. uint32_t le = (l << 1) | (l >> 3);
  6173. le = (le << 3) | (le >> 2);
  6174. for (uint32_t h = 0; h < 16; h++)
  6175. {
  6176. uint32_t he = (h << 1) | (h >> 3);
  6177. he = (he << 3) | (he >> 2);
  6178. uint32_t m = (le * 5 + he * 3) / 8;
  6179. int err = (int)labs((int)v - (int)m);
  6180. if (err < lowest_err)
  6181. {
  6182. lowest_err = err;
  6183. best_l = l;
  6184. best_h = h;
  6185. }
  6186. }
  6187. }
  6188. g_pvrtc2_trans_match44[v].m_l = (uint8_t)best_l;
  6189. g_pvrtc2_trans_match44[v].m_h = (uint8_t)best_h;
  6190. }
  6191. }
  6192. #endif // BASISD_SUPPORT_PVRTC2
  6193. //------------------------------------------------------------------------------------------------
  6194. basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
  6195. m_pGlobal_codebook(nullptr),
  6196. m_selector_history_buf_size(0)
  6197. {
  6198. }
  6199. bool basisu_lowlevel_etc1s_transcoder::decode_palettes(
  6200. uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size,
  6201. uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size)
  6202. {
  6203. if (m_pGlobal_codebook)
  6204. {
  6205. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 11\n");
  6206. return false;
  6207. }
  6208. bitwise_decoder sym_codec;
  6209. huffman_decoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model;
  6210. if (!sym_codec.init(pEndpoints_data, endpoints_data_size))
  6211. {
  6212. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 0\n");
  6213. return false;
  6214. }
  6215. if (!sym_codec.read_huffman_table(color5_delta_model0))
  6216. {
  6217. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1\n");
  6218. return false;
  6219. }
  6220. if (!sym_codec.read_huffman_table(color5_delta_model1))
  6221. {
  6222. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1a\n");
  6223. return false;
  6224. }
  6225. if (!sym_codec.read_huffman_table(color5_delta_model2))
  6226. {
  6227. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2a\n");
  6228. return false;
  6229. }
  6230. if (!sym_codec.read_huffman_table(inten_delta_model))
  6231. {
  6232. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
  6233. return false;
  6234. }
  6235. if (!color5_delta_model0.is_valid() || !color5_delta_model1.is_valid() || !color5_delta_model2.is_valid() || !inten_delta_model.is_valid())
  6236. {
  6237. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
  6238. return false;
  6239. }
  6240. const bool endpoints_are_grayscale = sym_codec.get_bits(1) != 0;
  6241. m_local_endpoints.resize(num_endpoints);
  6242. color32 prev_color5(16, 16, 16, 0);
  6243. uint32_t prev_inten = 0;
  6244. for (uint32_t i = 0; i < num_endpoints; i++)
  6245. {
  6246. uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model);
  6247. m_local_endpoints[i].m_inten5 = static_cast<uint8_t>((inten_delta + prev_inten) & 7);
  6248. prev_inten = m_local_endpoints[i].m_inten5;
  6249. for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++)
  6250. {
  6251. int delta;
  6252. if (prev_color5[c] <= basist::COLOR5_PAL0_PREV_HI)
  6253. delta = sym_codec.decode_huffman(color5_delta_model0);
  6254. else if (prev_color5[c] <= basist::COLOR5_PAL1_PREV_HI)
  6255. delta = sym_codec.decode_huffman(color5_delta_model1);
  6256. else
  6257. delta = sym_codec.decode_huffman(color5_delta_model2);
  6258. int v = (prev_color5[c] + delta) & 31;
  6259. m_local_endpoints[i].m_color5[c] = static_cast<uint8_t>(v);
  6260. prev_color5[c] = static_cast<uint8_t>(v);
  6261. }
  6262. if (endpoints_are_grayscale)
  6263. {
  6264. m_local_endpoints[i].m_color5[1] = m_local_endpoints[i].m_color5[0];
  6265. m_local_endpoints[i].m_color5[2] = m_local_endpoints[i].m_color5[0];
  6266. }
  6267. }
  6268. sym_codec.stop();
  6269. m_local_selectors.resize(num_selectors);
  6270. if (!sym_codec.init(pSelectors_data, selectors_data_size))
  6271. {
  6272. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n");
  6273. return false;
  6274. }
  6275. basist::huffman_decoding_table delta_selector_pal_model;
  6276. const bool used_global_selector_cb = (sym_codec.get_bits(1) == 1);
  6277. if (used_global_selector_cb)
  6278. {
  6279. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: global selector codebooks are unsupported\n");
  6280. return false;
  6281. }
  6282. else
  6283. {
  6284. const bool used_hybrid_selector_cb = (sym_codec.get_bits(1) == 1);
  6285. if (used_hybrid_selector_cb)
  6286. {
  6287. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: hybrid global selector codebooks are unsupported\n");
  6288. return false;
  6289. }
  6290. const bool used_raw_encoding = (sym_codec.get_bits(1) == 1);
  6291. if (used_raw_encoding)
  6292. {
  6293. for (uint32_t i = 0; i < num_selectors; i++)
  6294. {
  6295. for (uint32_t j = 0; j < 4; j++)
  6296. {
  6297. uint32_t cur_byte = sym_codec.get_bits(8);
  6298. for (uint32_t k = 0; k < 4; k++)
  6299. m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
  6300. }
  6301. m_local_selectors[i].init_flags();
  6302. }
  6303. }
  6304. else
  6305. {
  6306. if (!sym_codec.read_huffman_table(delta_selector_pal_model))
  6307. {
  6308. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10\n");
  6309. return false;
  6310. }
  6311. if ((num_selectors > 1) && (!delta_selector_pal_model.is_valid()))
  6312. {
  6313. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10a\n");
  6314. return false;
  6315. }
  6316. uint8_t prev_bytes[4] = { 0, 0, 0, 0 };
  6317. for (uint32_t i = 0; i < num_selectors; i++)
  6318. {
  6319. if (!i)
  6320. {
  6321. for (uint32_t j = 0; j < 4; j++)
  6322. {
  6323. uint32_t cur_byte = sym_codec.get_bits(8);
  6324. prev_bytes[j] = static_cast<uint8_t>(cur_byte);
  6325. for (uint32_t k = 0; k < 4; k++)
  6326. m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
  6327. }
  6328. m_local_selectors[i].init_flags();
  6329. continue;
  6330. }
  6331. for (uint32_t j = 0; j < 4; j++)
  6332. {
  6333. int delta_byte = sym_codec.decode_huffman(delta_selector_pal_model);
  6334. uint32_t cur_byte = delta_byte ^ prev_bytes[j];
  6335. prev_bytes[j] = static_cast<uint8_t>(cur_byte);
  6336. for (uint32_t k = 0; k < 4; k++)
  6337. m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
  6338. }
  6339. m_local_selectors[i].init_flags();
  6340. }
  6341. }
  6342. }
  6343. sym_codec.stop();
  6344. return true;
  6345. }
  6346. bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size)
  6347. {
  6348. basist::bitwise_decoder sym_codec;
  6349. if (!sym_codec.init(pTable_data, table_data_size))
  6350. {
  6351. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 0\n");
  6352. return false;
  6353. }
  6354. if (!sym_codec.read_huffman_table(m_endpoint_pred_model))
  6355. {
  6356. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1\n");
  6357. return false;
  6358. }
  6359. if (m_endpoint_pred_model.get_code_sizes().size() == 0)
  6360. {
  6361. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1a\n");
  6362. return false;
  6363. }
  6364. if (!sym_codec.read_huffman_table(m_delta_endpoint_model))
  6365. {
  6366. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2\n");
  6367. return false;
  6368. }
  6369. if (m_delta_endpoint_model.get_code_sizes().size() == 0)
  6370. {
  6371. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2a\n");
  6372. return false;
  6373. }
  6374. if (!sym_codec.read_huffman_table(m_selector_model))
  6375. {
  6376. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3\n");
  6377. return false;
  6378. }
  6379. if (m_selector_model.get_code_sizes().size() == 0)
  6380. {
  6381. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3a\n");
  6382. return false;
  6383. }
  6384. if (!sym_codec.read_huffman_table(m_selector_history_buf_rle_model))
  6385. {
  6386. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4\n");
  6387. return false;
  6388. }
  6389. if (m_selector_history_buf_rle_model.get_code_sizes().size() == 0)
  6390. {
  6391. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4a\n");
  6392. return false;
  6393. }
  6394. m_selector_history_buf_size = sym_codec.get_bits(13);
  6395. // Check for bogus values.
  6396. if (!m_selector_history_buf_size)
  6397. {
  6398. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 5\n");
  6399. return false;
  6400. }
  6401. sym_codec.stop();
  6402. return true;
  6403. }
  6404. bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
  6405. uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
  6406. basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels)
  6407. {
  6408. // 'pDst_blocks' unused when disabling *all* hardware transcode options
  6409. // (and 'bc1_allow_threecolor_blocks' when disabling DXT)
  6410. BASISU_NOTE_UNUSED(pDst_blocks);
  6411. BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
  6412. BASISU_NOTE_UNUSED(transcode_alpha);
  6413. BASISU_NOTE_UNUSED(pAlpha_blocks);
  6414. assert(g_transcoder_initialized);
  6415. if (!g_transcoder_initialized)
  6416. {
  6417. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: Transcoder not globally initialized.\n");
  6418. return false;
  6419. }
  6420. if (!pState)
  6421. pState = &m_def_state;
  6422. const uint32_t total_blocks = num_blocks_x * num_blocks_y;
  6423. if (!output_row_pitch_in_blocks_or_pixels)
  6424. {
  6425. if (basis_block_format_is_uncompressed(fmt))
  6426. output_row_pitch_in_blocks_or_pixels = orig_width;
  6427. else
  6428. {
  6429. if (fmt == block_format::cFXT1_RGB)
  6430. output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
  6431. else
  6432. output_row_pitch_in_blocks_or_pixels = num_blocks_x;
  6433. }
  6434. }
  6435. if (basis_block_format_is_uncompressed(fmt))
  6436. {
  6437. if (!output_rows_in_pixels)
  6438. output_rows_in_pixels = orig_height;
  6439. }
  6440. basisu::vector<uint32_t>* pPrev_frame_indices = nullptr;
  6441. if (is_video)
  6442. {
  6443. // TODO: Add check to make sure the caller hasn't tried skipping past p-frames
  6444. //const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
  6445. //const uint32_t level_index = slice_desc.m_level_index;
  6446. if (level_index >= basisu_transcoder_state::cMaxPrevFrameLevels)
  6447. {
  6448. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: unsupported level_index\n");
  6449. return false;
  6450. }
  6451. pPrev_frame_indices = &pState->m_prev_frame_indices[is_alpha_slice][level_index];
  6452. if (pPrev_frame_indices->size() < total_blocks)
  6453. pPrev_frame_indices->resize(total_blocks);
  6454. }
  6455. basist::bitwise_decoder sym_codec;
  6456. if (!sym_codec.init(pImage_data, image_data_size))
  6457. {
  6458. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: sym_codec.init failed\n");
  6459. return false;
  6460. }
  6461. approx_move_to_front selector_history_buf(m_selector_history_buf_size);
  6462. uint32_t cur_selector_rle_count = 0;
  6463. decoder_etc_block block;
  6464. memset(&block, 0, sizeof(block));
  6465. //block.set_flip_bit(true);
  6466. // Setting the flip bit to false to be compatible with the Khronos KDFS.
  6467. block.set_flip_bit(false);
  6468. block.set_diff_bit(true);
  6469. void* pPVRTC_work_mem = nullptr;
  6470. uint32_t* pPVRTC_endpoints = nullptr;
  6471. if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
  6472. {
  6473. pPVRTC_work_mem = malloc(num_blocks_x * num_blocks_y * (sizeof(decoder_etc_block) + sizeof(uint32_t)));
  6474. if (!pPVRTC_work_mem)
  6475. {
  6476. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: malloc failed\n");
  6477. return false;
  6478. }
  6479. pPVRTC_endpoints = (uint32_t*) & ((decoder_etc_block*)pPVRTC_work_mem)[num_blocks_x * num_blocks_y];
  6480. }
  6481. if (pState->m_block_endpoint_preds[0].size() < num_blocks_x)
  6482. {
  6483. pState->m_block_endpoint_preds[0].resize(num_blocks_x);
  6484. pState->m_block_endpoint_preds[1].resize(num_blocks_x);
  6485. }
  6486. uint32_t cur_pred_bits = 0;
  6487. int prev_endpoint_pred_sym = 0;
  6488. int endpoint_pred_repeat_count = 0;
  6489. uint32_t prev_endpoint_index = 0;
  6490. const endpoint_vec& endpoints = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_endpoints : m_local_endpoints;
  6491. const selector_vec& selectors = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_selectors : m_local_selectors;
  6492. if (!endpoints.size() || !selectors.size())
  6493. {
  6494. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n");
  6495. return false;
  6496. }
  6497. const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)selectors.size();
  6498. const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX;
  6499. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  6500. {
  6501. const uint32_t cur_block_endpoint_pred_array = block_y & 1;
  6502. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  6503. {
  6504. // Decode endpoint index predictor symbols
  6505. if ((block_x & 1) == 0)
  6506. {
  6507. if ((block_y & 1) == 0)
  6508. {
  6509. if (endpoint_pred_repeat_count)
  6510. {
  6511. endpoint_pred_repeat_count--;
  6512. cur_pred_bits = prev_endpoint_pred_sym;
  6513. }
  6514. else
  6515. {
  6516. cur_pred_bits = sym_codec.decode_huffman(m_endpoint_pred_model);
  6517. if (cur_pred_bits == ENDPOINT_PRED_REPEAT_LAST_SYMBOL)
  6518. {
  6519. endpoint_pred_repeat_count = sym_codec.decode_vlc(ENDPOINT_PRED_COUNT_VLC_BITS) + ENDPOINT_PRED_MIN_REPEAT_COUNT - 1;
  6520. cur_pred_bits = prev_endpoint_pred_sym;
  6521. }
  6522. else
  6523. {
  6524. prev_endpoint_pred_sym = cur_pred_bits;
  6525. }
  6526. }
  6527. pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_pred_bits = (uint8_t)(cur_pred_bits >> 4);
  6528. }
  6529. else
  6530. {
  6531. cur_pred_bits = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_pred_bits;
  6532. }
  6533. }
  6534. // Decode endpoint index
  6535. uint32_t endpoint_index, selector_index = 0;
  6536. const uint32_t pred = cur_pred_bits & 3;
  6537. cur_pred_bits >>= 2;
  6538. if (pred == 0)
  6539. {
  6540. // Left
  6541. if (!block_x)
  6542. {
  6543. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (0)\n");
  6544. if (pPVRTC_work_mem)
  6545. free(pPVRTC_work_mem);
  6546. return false;
  6547. }
  6548. endpoint_index = prev_endpoint_index;
  6549. }
  6550. else if (pred == 1)
  6551. {
  6552. // Upper
  6553. if (!block_y)
  6554. {
  6555. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (1)\n");
  6556. if (pPVRTC_work_mem)
  6557. free(pPVRTC_work_mem);
  6558. return false;
  6559. }
  6560. endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_endpoint_index;
  6561. }
  6562. else if (pred == 2)
  6563. {
  6564. if (is_video)
  6565. {
  6566. assert(pred == CR_ENDPOINT_PRED_INDEX);
  6567. endpoint_index = (*pPrev_frame_indices)[block_x + block_y * num_blocks_x];
  6568. selector_index = endpoint_index >> 16;
  6569. endpoint_index &= 0xFFFFU;
  6570. }
  6571. else
  6572. {
  6573. // Upper left
  6574. if ((!block_x) || (!block_y))
  6575. {
  6576. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (2)\n");
  6577. if (pPVRTC_work_mem)
  6578. free(pPVRTC_work_mem);
  6579. return false;
  6580. }
  6581. endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x - 1].m_endpoint_index;
  6582. }
  6583. }
  6584. else
  6585. {
  6586. // Decode and apply delta
  6587. const uint32_t delta_sym = sym_codec.decode_huffman(m_delta_endpoint_model);
  6588. endpoint_index = delta_sym + prev_endpoint_index;
  6589. if (endpoint_index >= endpoints.size())
  6590. endpoint_index -= (int)endpoints.size();
  6591. }
  6592. pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_endpoint_index = (uint16_t)endpoint_index;
  6593. prev_endpoint_index = endpoint_index;
  6594. // Decode selector index
  6595. if ((!is_video) || (pred != CR_ENDPOINT_PRED_INDEX))
  6596. {
  6597. int selector_sym;
  6598. if (cur_selector_rle_count > 0)
  6599. {
  6600. cur_selector_rle_count--;
  6601. selector_sym = (int)selectors.size();
  6602. }
  6603. else
  6604. {
  6605. selector_sym = sym_codec.decode_huffman(m_selector_model);
  6606. if (selector_sym == static_cast<int>(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX))
  6607. {
  6608. int run_sym = sym_codec.decode_huffman(m_selector_history_buf_rle_model);
  6609. if (run_sym == (SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
  6610. cur_selector_rle_count = sym_codec.decode_vlc(7) + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
  6611. else
  6612. cur_selector_rle_count = run_sym + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
  6613. if (cur_selector_rle_count > total_blocks)
  6614. {
  6615. // The file is corrupted or we've got a bug.
  6616. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (3)\n");
  6617. if (pPVRTC_work_mem)
  6618. free(pPVRTC_work_mem);
  6619. return false;
  6620. }
  6621. selector_sym = (int)selectors.size();
  6622. cur_selector_rle_count--;
  6623. }
  6624. }
  6625. if (selector_sym >= (int)selectors.size())
  6626. {
  6627. assert(m_selector_history_buf_size > 0);
  6628. int history_buf_index = selector_sym - (int)selectors.size();
  6629. if (history_buf_index >= (int)selector_history_buf.size())
  6630. {
  6631. // The file is corrupted or we've got a bug.
  6632. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (4)\n");
  6633. if (pPVRTC_work_mem)
  6634. free(pPVRTC_work_mem);
  6635. return false;
  6636. }
  6637. selector_index = selector_history_buf[history_buf_index];
  6638. if (history_buf_index != 0)
  6639. selector_history_buf.use(history_buf_index);
  6640. }
  6641. else
  6642. {
  6643. selector_index = selector_sym;
  6644. if (m_selector_history_buf_size)
  6645. selector_history_buf.add(selector_index);
  6646. }
  6647. }
  6648. if ((endpoint_index >= endpoints.size()) || (selector_index >= selectors.size()))
  6649. {
  6650. // The file is corrupted or we've got a bug.
  6651. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (5)\n");
  6652. if (pPVRTC_work_mem)
  6653. free(pPVRTC_work_mem);
  6654. return false;
  6655. }
  6656. if (is_video)
  6657. (*pPrev_frame_indices)[block_x + block_y * num_blocks_x] = endpoint_index | (selector_index << 16);
  6658. #if BASISD_ENABLE_DEBUG_FLAGS
  6659. if ((g_debug_flags & cDebugFlagVisCRs) && ((fmt == block_format::cETC1) || (fmt == block_format::cBC1)))
  6660. {
  6661. if ((is_video) && (pred == 2))
  6662. {
  6663. decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
  6664. memset(pDst_block, 0xFF, 8);
  6665. continue;
  6666. }
  6667. }
  6668. #endif
  6669. const endpoint* pEndpoints = &endpoints[endpoint_index];
  6670. const selector* pSelector = &selectors[selector_index];
  6671. switch (fmt)
  6672. {
  6673. case block_format::cETC1:
  6674. {
  6675. decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
  6676. block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
  6677. block.set_inten_table(0, pEndpoints->m_inten5);
  6678. block.set_inten_table(1, pEndpoints->m_inten5);
  6679. pDst_block->m_uint32[0] = block.m_uint32[0];
  6680. pDst_block->set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
  6681. break;
  6682. }
  6683. case block_format::cBC1:
  6684. {
  6685. #if BASISD_SUPPORT_DXT1
  6686. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6687. #if BASISD_ENABLE_DEBUG_FLAGS
  6688. if (g_debug_flags & (cDebugFlagVisBC1Sels | cDebugFlagVisBC1Endpoints))
  6689. convert_etc1s_to_dxt1_vis(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
  6690. else
  6691. #endif
  6692. convert_etc1s_to_dxt1(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
  6693. #else
  6694. assert(0);
  6695. #endif
  6696. break;
  6697. }
  6698. case block_format::cBC4:
  6699. {
  6700. #if BASISD_SUPPORT_DXT5A
  6701. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6702. convert_etc1s_to_dxt5a(static_cast<dxt5a_block*>(pDst_block), pEndpoints, pSelector);
  6703. #else
  6704. assert(0);
  6705. #endif
  6706. break;
  6707. }
  6708. case block_format::cPVRTC1_4_RGB:
  6709. {
  6710. #if BASISD_SUPPORT_PVRTC1
  6711. block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
  6712. block.set_inten_table(0, pEndpoints->m_inten5);
  6713. block.set_inten_table(1, pEndpoints->m_inten5);
  6714. block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
  6715. ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
  6716. const color32& base_color = pEndpoints->m_color5;
  6717. const uint32_t inten_table = pEndpoints->m_inten5;
  6718. const uint32_t low_selector = pSelector->m_lo_selector;
  6719. const uint32_t high_selector = pSelector->m_hi_selector;
  6720. // Get block's RGB bounding box
  6721. color32 block_colors[2];
  6722. decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
  6723. assert(block_colors[0][0] <= block_colors[1][0]);
  6724. assert(block_colors[0][1] <= block_colors[1][1]);
  6725. assert(block_colors[0][2] <= block_colors[1][2]);
  6726. // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
  6727. pvrtc4_block temp;
  6728. temp.set_opaque_endpoint_floor(0, block_colors[0]);
  6729. temp.set_opaque_endpoint_ceil(1, block_colors[1]);
  6730. pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
  6731. #else
  6732. assert(0);
  6733. #endif
  6734. break;
  6735. }
  6736. case block_format::cPVRTC1_4_RGBA:
  6737. {
  6738. #if BASISD_SUPPORT_PVRTC1
  6739. assert(pAlpha_blocks);
  6740. block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
  6741. block.set_inten_table(0, pEndpoints->m_inten5);
  6742. block.set_inten_table(1, pEndpoints->m_inten5);
  6743. block.set_raw_selector_bits(pSelector->m_selectors[0], pSelector->m_selectors[1], pSelector->m_selectors[2], pSelector->m_selectors[3]);
  6744. ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
  6745. // Get block's RGBA bounding box
  6746. const color32& base_color = pEndpoints->m_color5;
  6747. const uint32_t inten_table = pEndpoints->m_inten5;
  6748. const uint32_t low_selector = pSelector->m_lo_selector;
  6749. const uint32_t high_selector = pSelector->m_hi_selector;
  6750. color32 block_colors[2];
  6751. decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
  6752. assert(block_colors[0][0] <= block_colors[1][0]);
  6753. assert(block_colors[0][1] <= block_colors[1][1]);
  6754. assert(block_colors[0][2] <= block_colors[1][2]);
  6755. const uint16_t* pAlpha_block = reinterpret_cast<uint16_t*>(static_cast<uint8_t*>(pAlpha_blocks) + (block_x + block_y * num_blocks_x) * sizeof(uint32_t));
  6756. const endpoint* pAlpha_endpoints = &endpoints[pAlpha_block[0]];
  6757. const selector* pAlpha_selector = &selectors[pAlpha_block[1]];
  6758. const color32& alpha_base_color = pAlpha_endpoints->m_color5;
  6759. const uint32_t alpha_inten_table = pAlpha_endpoints->m_inten5;
  6760. const uint32_t alpha_low_selector = pAlpha_selector->m_lo_selector;
  6761. const uint32_t alpha_high_selector = pAlpha_selector->m_hi_selector;
  6762. uint32_t alpha_block_colors[2];
  6763. decoder_etc_block::get_block_colors5_bounds_g(alpha_block_colors, alpha_base_color, alpha_inten_table, alpha_low_selector, alpha_high_selector);
  6764. assert(alpha_block_colors[0] <= alpha_block_colors[1]);
  6765. block_colors[0].a = (uint8_t)alpha_block_colors[0];
  6766. block_colors[1].a = (uint8_t)alpha_block_colors[1];
  6767. // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
  6768. pvrtc4_block temp;
  6769. temp.set_endpoint_floor(0, block_colors[0]);
  6770. temp.set_endpoint_ceil(1, block_colors[1]);
  6771. pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
  6772. #else
  6773. assert(0);
  6774. #endif
  6775. break;
  6776. }
  6777. case block_format::cBC7: // for more consistency with UASTC
  6778. case block_format::cBC7_M5_COLOR:
  6779. {
  6780. #if BASISD_SUPPORT_BC7_MODE5
  6781. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6782. convert_etc1s_to_bc7_m5_color(pDst_block, pEndpoints, pSelector);
  6783. #else
  6784. assert(0);
  6785. #endif
  6786. break;
  6787. }
  6788. case block_format::cBC7_M5_ALPHA:
  6789. {
  6790. #if BASISD_SUPPORT_BC7_MODE5
  6791. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6792. convert_etc1s_to_bc7_m5_alpha(pDst_block, pEndpoints, pSelector);
  6793. #else
  6794. assert(0);
  6795. #endif
  6796. break;
  6797. }
  6798. case block_format::cETC2_EAC_A8:
  6799. {
  6800. #if BASISD_SUPPORT_ETC2_EAC_A8
  6801. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6802. convert_etc1s_to_etc2_eac_a8(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
  6803. #else
  6804. assert(0);
  6805. #endif
  6806. break;
  6807. }
  6808. case block_format::cASTC_4x4:
  6809. {
  6810. #if BASISD_SUPPORT_ASTC
  6811. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6812. convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &endpoints[0], &selectors[0]);
  6813. #else
  6814. assert(0);
  6815. #endif
  6816. break;
  6817. }
  6818. case block_format::cATC_RGB:
  6819. {
  6820. #if BASISD_SUPPORT_ATC
  6821. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6822. convert_etc1s_to_atc(pDst_block, pEndpoints, pSelector);
  6823. #else
  6824. assert(0);
  6825. #endif
  6826. break;
  6827. }
  6828. case block_format::cFXT1_RGB:
  6829. {
  6830. #if BASISD_SUPPORT_FXT1
  6831. const uint32_t fxt1_block_x = block_x >> 1;
  6832. const uint32_t fxt1_block_y = block_y;
  6833. const uint32_t fxt1_subblock = block_x & 1;
  6834. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (fxt1_block_x + fxt1_block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6835. convert_etc1s_to_fxt1(pDst_block, pEndpoints, pSelector, fxt1_subblock);
  6836. #else
  6837. assert(0);
  6838. #endif
  6839. break;
  6840. }
  6841. case block_format::cPVRTC2_4_RGB:
  6842. {
  6843. #if BASISD_SUPPORT_PVRTC2
  6844. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6845. convert_etc1s_to_pvrtc2_rgb(pDst_block, pEndpoints, pSelector);
  6846. #endif
  6847. break;
  6848. }
  6849. case block_format::cPVRTC2_4_RGBA:
  6850. {
  6851. #if BASISD_SUPPORT_PVRTC2
  6852. assert(transcode_alpha);
  6853. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  6854. convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]);
  6855. #endif
  6856. break;
  6857. }
  6858. case block_format::cIndices:
  6859. {
  6860. uint16_t* pDst_block = reinterpret_cast<uint16_t *>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
  6861. pDst_block[0] = static_cast<uint16_t>(endpoint_index);
  6862. pDst_block[1] = static_cast<uint16_t>(selector_index);
  6863. break;
  6864. }
  6865. case block_format::cA32:
  6866. {
  6867. assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
  6868. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
  6869. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  6870. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  6871. int colors[4];
  6872. decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
  6873. if (max_x == 4)
  6874. {
  6875. for (uint32_t y = 0; y < max_y; y++)
  6876. {
  6877. const uint32_t s = pSelector->m_selectors[y];
  6878. pDst_pixels[3] = static_cast<uint8_t>(colors[s & 3]);
  6879. pDst_pixels[3+4] = static_cast<uint8_t>(colors[(s >> 2) & 3]);
  6880. pDst_pixels[3+8] = static_cast<uint8_t>(colors[(s >> 4) & 3]);
  6881. pDst_pixels[3+12] = static_cast<uint8_t>(colors[(s >> 6) & 3]);
  6882. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
  6883. }
  6884. }
  6885. else
  6886. {
  6887. for (uint32_t y = 0; y < max_y; y++)
  6888. {
  6889. const uint32_t s = pSelector->m_selectors[y];
  6890. for (uint32_t x = 0; x < max_x; x++)
  6891. pDst_pixels[3 + 4 * x] = static_cast<uint8_t>(colors[(s >> (x * 2)) & 3]);
  6892. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
  6893. }
  6894. }
  6895. break;
  6896. }
  6897. case block_format::cRGB32:
  6898. {
  6899. assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
  6900. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
  6901. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  6902. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  6903. color32 colors[4];
  6904. decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
  6905. for (uint32_t y = 0; y < max_y; y++)
  6906. {
  6907. const uint32_t s = pSelector->m_selectors[y];
  6908. for (uint32_t x = 0; x < max_x; x++)
  6909. {
  6910. const color32& c = colors[(s >> (x * 2)) & 3];
  6911. pDst_pixels[0 + 4 * x] = c.r;
  6912. pDst_pixels[1 + 4 * x] = c.g;
  6913. pDst_pixels[2 + 4 * x] = c.b;
  6914. }
  6915. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
  6916. }
  6917. break;
  6918. }
  6919. case block_format::cRGBA32:
  6920. {
  6921. assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
  6922. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
  6923. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  6924. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  6925. color32 colors[4];
  6926. decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
  6927. for (uint32_t y = 0; y < max_y; y++)
  6928. {
  6929. const uint32_t s = pSelector->m_selectors[y];
  6930. for (uint32_t x = 0; x < max_x; x++)
  6931. {
  6932. const color32& c = colors[(s >> (x * 2)) & 3];
  6933. pDst_pixels[0 + 4 * x] = c.r;
  6934. pDst_pixels[1 + 4 * x] = c.g;
  6935. pDst_pixels[2 + 4 * x] = c.b;
  6936. pDst_pixels[3 + 4 * x] = 255;
  6937. }
  6938. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
  6939. }
  6940. break;
  6941. }
  6942. case block_format::cRGB565:
  6943. case block_format::cBGR565:
  6944. {
  6945. assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
  6946. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
  6947. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  6948. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  6949. color32 colors[4];
  6950. decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
  6951. uint16_t packed_colors[4];
  6952. if (fmt == block_format::cRGB565)
  6953. {
  6954. for (uint32_t i = 0; i < 4; i++)
  6955. {
  6956. packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31));
  6957. if (BASISD_IS_BIG_ENDIAN)
  6958. packed_colors[i] = byteswap_uint16(packed_colors[i]);
  6959. }
  6960. }
  6961. else
  6962. {
  6963. for (uint32_t i = 0; i < 4; i++)
  6964. {
  6965. packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31));
  6966. if (BASISD_IS_BIG_ENDIAN)
  6967. packed_colors[i] = byteswap_uint16(packed_colors[i]);
  6968. }
  6969. }
  6970. for (uint32_t y = 0; y < max_y; y++)
  6971. {
  6972. const uint32_t s = pSelector->m_selectors[y];
  6973. for (uint32_t x = 0; x < max_x; x++)
  6974. reinterpret_cast<uint16_t *>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
  6975. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
  6976. }
  6977. break;
  6978. }
  6979. case block_format::cRGBA4444_COLOR:
  6980. {
  6981. assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
  6982. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
  6983. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  6984. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  6985. color32 colors[4];
  6986. decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
  6987. uint16_t packed_colors[4];
  6988. for (uint32_t i = 0; i < 4; i++)
  6989. {
  6990. packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4));
  6991. }
  6992. for (uint32_t y = 0; y < max_y; y++)
  6993. {
  6994. const uint32_t s = pSelector->m_selectors[y];
  6995. for (uint32_t x = 0; x < max_x; x++)
  6996. {
  6997. uint16_t cur = reinterpret_cast<uint16_t*>(pDst_pixels)[x];
  6998. if (BASISD_IS_BIG_ENDIAN)
  6999. cur = byteswap_uint16(cur);
  7000. cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3];
  7001. if (BASISD_IS_BIG_ENDIAN)
  7002. cur = byteswap_uint16(cur);
  7003. reinterpret_cast<uint16_t*>(pDst_pixels)[x] = cur;
  7004. }
  7005. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
  7006. }
  7007. break;
  7008. }
  7009. case block_format::cRGBA4444_COLOR_OPAQUE:
  7010. {
  7011. assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
  7012. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
  7013. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  7014. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  7015. color32 colors[4];
  7016. decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
  7017. uint16_t packed_colors[4];
  7018. for (uint32_t i = 0; i < 4; i++)
  7019. {
  7020. packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF);
  7021. if (BASISD_IS_BIG_ENDIAN)
  7022. packed_colors[i] = byteswap_uint16(packed_colors[i]);
  7023. }
  7024. for (uint32_t y = 0; y < max_y; y++)
  7025. {
  7026. const uint32_t s = pSelector->m_selectors[y];
  7027. for (uint32_t x = 0; x < max_x; x++)
  7028. reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
  7029. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
  7030. }
  7031. break;
  7032. }
  7033. case block_format::cRGBA4444_ALPHA:
  7034. {
  7035. assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
  7036. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
  7037. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  7038. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  7039. color32 colors[4];
  7040. decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
  7041. uint16_t packed_colors[4];
  7042. for (uint32_t i = 0; i < 4; i++)
  7043. {
  7044. packed_colors[i] = mul_8(colors[i].g, 15);
  7045. if (BASISD_IS_BIG_ENDIAN)
  7046. packed_colors[i] = byteswap_uint16(packed_colors[i]);
  7047. }
  7048. for (uint32_t y = 0; y < max_y; y++)
  7049. {
  7050. const uint32_t s = pSelector->m_selectors[y];
  7051. for (uint32_t x = 0; x < max_x; x++)
  7052. {
  7053. reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
  7054. }
  7055. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
  7056. }
  7057. break;
  7058. }
  7059. case block_format::cETC2_EAC_R11:
  7060. {
  7061. #if BASISD_SUPPORT_ETC2_EAC_RG11
  7062. void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
  7063. convert_etc1s_to_etc2_eac_r11(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
  7064. #else
  7065. assert(0);
  7066. #endif
  7067. break;
  7068. }
  7069. default:
  7070. {
  7071. assert(0);
  7072. break;
  7073. }
  7074. }
  7075. } // block_x
  7076. } // block-y
  7077. if (endpoint_pred_repeat_count != 0)
  7078. {
  7079. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n");
  7080. return false;
  7081. }
  7082. //assert(endpoint_pred_repeat_count == 0);
  7083. #if BASISD_SUPPORT_PVRTC1
  7084. // PVRTC post process - create per-pixel modulation values.
  7085. if (fmt == block_format::cPVRTC1_4_RGB)
  7086. fixup_pvrtc1_4_modulation_rgb((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y);
  7087. else if (fmt == block_format::cPVRTC1_4_RGBA)
  7088. fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &endpoints[0], &selectors[0]);
  7089. #endif // BASISD_SUPPORT_PVRTC1
  7090. if (pPVRTC_work_mem)
  7091. free(pPVRTC_work_mem);
  7092. return true;
  7093. }
  7094. bool basis_validate_output_buffer_size(transcoder_texture_format target_format,
  7095. uint32_t output_blocks_buf_size_in_blocks_or_pixels,
  7096. uint32_t orig_width, uint32_t orig_height,
  7097. uint32_t output_row_pitch_in_blocks_or_pixels,
  7098. uint32_t output_rows_in_pixels,
  7099. uint32_t total_slice_blocks)
  7100. {
  7101. if (basis_transcoder_format_is_uncompressed(target_format))
  7102. {
  7103. // Assume the output buffer is orig_width by orig_height
  7104. if (!output_row_pitch_in_blocks_or_pixels)
  7105. output_row_pitch_in_blocks_or_pixels = orig_width;
  7106. if (!output_rows_in_pixels)
  7107. output_rows_in_pixels = orig_height;
  7108. // Now make sure the output buffer is large enough, or we'll overwrite memory.
  7109. if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
  7110. {
  7111. BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
  7112. return false;
  7113. }
  7114. }
  7115. else if (target_format == transcoder_texture_format::cTFFXT1_RGB)
  7116. {
  7117. const uint32_t num_blocks_fxt1_x = (orig_width + 7) / 8;
  7118. const uint32_t num_blocks_fxt1_y = (orig_height + 3) / 4;
  7119. const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y;
  7120. if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
  7121. {
  7122. BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
  7123. return false;
  7124. }
  7125. }
  7126. else
  7127. {
  7128. if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks)
  7129. {
  7130. BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
  7131. return false;
  7132. }
  7133. }
  7134. return true;
  7135. }
  7136. bool basisu_lowlevel_etc1s_transcoder::transcode_image(
  7137. transcoder_texture_format target_format,
  7138. void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
  7139. const uint8_t* pCompressed_data, uint32_t compressed_data_length,
  7140. uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
  7141. uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length,
  7142. uint32_t decode_flags,
  7143. bool basis_file_has_alpha_slices,
  7144. bool is_video,
  7145. uint32_t output_row_pitch_in_blocks_or_pixels,
  7146. basisu_transcoder_state* pState,
  7147. uint32_t output_rows_in_pixels)
  7148. {
  7149. if (((uint64_t)rgb_offset + rgb_length) > (uint64_t)compressed_data_length)
  7150. {
  7151. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (color)\n");
  7152. return false;
  7153. }
  7154. if (alpha_length)
  7155. {
  7156. if (((uint64_t)alpha_offset + alpha_length) > (uint64_t)compressed_data_length)
  7157. {
  7158. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (alpha)\n");
  7159. return false;
  7160. }
  7161. }
  7162. else
  7163. {
  7164. assert(!basis_file_has_alpha_slices);
  7165. }
  7166. if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
  7167. {
  7168. if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
  7169. {
  7170. // PVRTC1 only supports power of 2 dimensions
  7171. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
  7172. return false;
  7173. }
  7174. }
  7175. if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
  7176. {
  7177. // Switch to PVRTC1 RGB if the input doesn't have alpha.
  7178. target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
  7179. }
  7180. const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
  7181. const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
  7182. const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
  7183. if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
  7184. {
  7185. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n");
  7186. return false;
  7187. }
  7188. bool status = false;
  7189. const uint8_t* pData = pCompressed_data + rgb_offset;
  7190. uint32_t data_len = rgb_length;
  7191. bool is_alpha_slice = false;
  7192. // If the caller wants us to transcode the mip level's alpha data, then use the next slice.
  7193. if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
  7194. {
  7195. pData = pCompressed_data + alpha_offset;
  7196. data_len = alpha_length;
  7197. is_alpha_slice = true;
  7198. }
  7199. switch (target_format)
  7200. {
  7201. case transcoder_texture_format::cTFETC1_RGB:
  7202. {
  7203. //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7204. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7205. if (!status)
  7206. {
  7207. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
  7208. }
  7209. break;
  7210. }
  7211. case transcoder_texture_format::cTFBC1_RGB:
  7212. {
  7213. #if !BASISD_SUPPORT_DXT1
  7214. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC1/DXT1 unsupported\n");
  7215. return false;
  7216. #else
  7217. // status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7218. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC1, bytes_per_block_or_pixel, true, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7219. if (!status)
  7220. {
  7221. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
  7222. }
  7223. break;
  7224. #endif
  7225. }
  7226. case transcoder_texture_format::cTFBC4_R:
  7227. {
  7228. #if !BASISD_SUPPORT_DXT5A
  7229. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC4/DXT5A unsupported\n");
  7230. return false;
  7231. #else
  7232. //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7233. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7234. if (!status)
  7235. {
  7236. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
  7237. }
  7238. break;
  7239. #endif
  7240. }
  7241. case transcoder_texture_format::cTFPVRTC1_4_RGB:
  7242. {
  7243. #if !BASISD_SUPPORT_PVRTC1
  7244. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
  7245. return false;
  7246. #else
  7247. // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
  7248. //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7249. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7250. if (!status)
  7251. {
  7252. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGB failed\n");
  7253. }
  7254. break;
  7255. #endif
  7256. }
  7257. case transcoder_texture_format::cTFPVRTC1_4_RGBA:
  7258. {
  7259. #if !BASISD_SUPPORT_PVRTC1
  7260. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
  7261. return false;
  7262. #else
  7263. assert(basis_file_has_alpha_slices);
  7264. assert(alpha_length);
  7265. // Temp buffer to hold alpha block endpoint/selector indices
  7266. basisu::vector<uint32_t> temp_block_indices(total_slice_blocks);
  7267. // First transcode alpha data to temp buffer
  7268. //status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState);
  7269. status = transcode_slice(&temp_block_indices[0], num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, num_blocks_x, pState, false, nullptr, 0);
  7270. if (!status)
  7271. {
  7272. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (0)\n");
  7273. }
  7274. else
  7275. {
  7276. // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
  7277. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]);
  7278. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, &temp_block_indices[0], 0);
  7279. if (!status)
  7280. {
  7281. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (1)\n");
  7282. }
  7283. }
  7284. break;
  7285. #endif
  7286. }
  7287. case transcoder_texture_format::cTFBC7_RGBA:
  7288. case transcoder_texture_format::cTFBC7_ALT:
  7289. {
  7290. #if !BASISD_SUPPORT_BC7_MODE5
  7291. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC7 unsupported\n");
  7292. return false;
  7293. #else
  7294. assert(bytes_per_block_or_pixel == 16);
  7295. // We used to support transcoding just alpha to BC7 - but is that useful at all?
  7296. // First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks.
  7297. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7298. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC7_M5_COLOR, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7299. if ((status) && (basis_file_has_alpha_slices))
  7300. {
  7301. // Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha.
  7302. //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7303. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC7_M5_ALPHA, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7304. }
  7305. if (!status)
  7306. {
  7307. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC7 failed (0)\n");
  7308. }
  7309. break;
  7310. #endif
  7311. }
  7312. case transcoder_texture_format::cTFETC2_RGBA:
  7313. {
  7314. #if !BASISD_SUPPORT_ETC2_EAC_A8
  7315. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ETC2 EAC A8 unsupported\n");
  7316. return false;
  7317. #else
  7318. assert(bytes_per_block_or_pixel == 16);
  7319. if (basis_file_has_alpha_slices)
  7320. {
  7321. // First decode the alpha data
  7322. //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7323. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7324. }
  7325. else
  7326. {
  7327. //write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
  7328. basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
  7329. status = true;
  7330. }
  7331. if (status)
  7332. {
  7333. // Now decode the color data
  7334. //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7335. status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7336. if (!status)
  7337. {
  7338. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 RGB failed\n");
  7339. }
  7340. }
  7341. else
  7342. {
  7343. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 A failed\n");
  7344. }
  7345. break;
  7346. #endif
  7347. }
  7348. case transcoder_texture_format::cTFBC3_RGBA:
  7349. {
  7350. #if !BASISD_SUPPORT_DXT1
  7351. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT1 unsupported\n");
  7352. return false;
  7353. #elif !BASISD_SUPPORT_DXT5A
  7354. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
  7355. return false;
  7356. #else
  7357. assert(bytes_per_block_or_pixel == 16);
  7358. // First decode the alpha data
  7359. if (basis_file_has_alpha_slices)
  7360. {
  7361. //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7362. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7363. }
  7364. else
  7365. {
  7366. basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
  7367. status = true;
  7368. }
  7369. if (status)
  7370. {
  7371. // Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3.
  7372. //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState);
  7373. status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7374. if (!status)
  7375. {
  7376. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 RGB failed\n");
  7377. }
  7378. }
  7379. else
  7380. {
  7381. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 A failed\n");
  7382. }
  7383. break;
  7384. #endif
  7385. }
  7386. case transcoder_texture_format::cTFBC5_RG:
  7387. {
  7388. #if !BASISD_SUPPORT_DXT5A
  7389. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
  7390. return false;
  7391. #else
  7392. assert(bytes_per_block_or_pixel == 16);
  7393. //bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
  7394. // uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
  7395. // basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0);
  7396. // Decode the R data (actually the green channel of the color data slice in the basis file)
  7397. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7398. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7399. if (status)
  7400. {
  7401. if (basis_file_has_alpha_slices)
  7402. {
  7403. // Decode the G data (actually the green channel of the alpha data slice in the basis file)
  7404. //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7405. status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7406. if (!status)
  7407. {
  7408. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 1 failed\n");
  7409. }
  7410. }
  7411. else
  7412. {
  7413. basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
  7414. status = true;
  7415. }
  7416. }
  7417. else
  7418. {
  7419. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 channel 0 failed\n");
  7420. }
  7421. break;
  7422. #endif
  7423. }
  7424. case transcoder_texture_format::cTFASTC_4x4_RGBA:
  7425. {
  7426. #if !BASISD_SUPPORT_ASTC
  7427. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n");
  7428. return false;
  7429. #else
  7430. assert(bytes_per_block_or_pixel == 16);
  7431. if (basis_file_has_alpha_slices)
  7432. {
  7433. // First decode the alpha data to the output (we're using the output texture as a temp buffer here).
  7434. //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7435. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7436. if (status)
  7437. {
  7438. // Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and
  7439. // transcode both the alpha and color data at the same time to ASTC.
  7440. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
  7441. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels);
  7442. }
  7443. }
  7444. else
  7445. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7446. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7447. if (!status)
  7448. {
  7449. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ASTC failed (0)\n");
  7450. }
  7451. break;
  7452. #endif
  7453. }
  7454. case transcoder_texture_format::cTFATC_RGB:
  7455. {
  7456. #if !BASISD_SUPPORT_ATC
  7457. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
  7458. return false;
  7459. #else
  7460. //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7461. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7462. if (!status)
  7463. {
  7464. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC_RGB failed\n");
  7465. }
  7466. break;
  7467. #endif
  7468. }
  7469. case transcoder_texture_format::cTFATC_RGBA:
  7470. {
  7471. #if !BASISD_SUPPORT_ATC
  7472. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
  7473. return false;
  7474. #elif !BASISD_SUPPORT_DXT5A
  7475. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
  7476. return false;
  7477. #else
  7478. assert(bytes_per_block_or_pixel == 16);
  7479. // First decode the alpha data
  7480. if (basis_file_has_alpha_slices)
  7481. {
  7482. //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7483. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7484. }
  7485. else
  7486. {
  7487. basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
  7488. status = true;
  7489. }
  7490. if (status)
  7491. {
  7492. //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7493. status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7494. if (!status)
  7495. {
  7496. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC RGB failed\n");
  7497. }
  7498. }
  7499. else
  7500. {
  7501. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC A failed\n");
  7502. }
  7503. break;
  7504. #endif
  7505. }
  7506. case transcoder_texture_format::cTFPVRTC2_4_RGB:
  7507. {
  7508. #if !BASISD_SUPPORT_PVRTC2
  7509. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
  7510. return false;
  7511. #else
  7512. //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7513. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7514. if (!status)
  7515. {
  7516. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGB failed\n");
  7517. }
  7518. break;
  7519. #endif
  7520. }
  7521. case transcoder_texture_format::cTFPVRTC2_4_RGBA:
  7522. {
  7523. #if !BASISD_SUPPORT_PVRTC2
  7524. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
  7525. return false;
  7526. #else
  7527. if (basis_file_has_alpha_slices)
  7528. {
  7529. // First decode the alpha data to the output (we're using the output texture as a temp buffer here).
  7530. //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7531. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7532. if (!status)
  7533. {
  7534. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to failed\n");
  7535. }
  7536. else
  7537. {
  7538. // Now decode the color data and transcode to PVRTC2 RGBA.
  7539. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
  7540. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels);
  7541. }
  7542. }
  7543. else
  7544. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7545. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7546. if (!status)
  7547. {
  7548. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGBA failed\n");
  7549. }
  7550. break;
  7551. #endif
  7552. }
  7553. case transcoder_texture_format::cTFRGBA32:
  7554. {
  7555. // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
  7556. // First decode the alpha data
  7557. if (basis_file_has_alpha_slices)
  7558. //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
  7559. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7560. else
  7561. status = true;
  7562. if (status)
  7563. {
  7564. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
  7565. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7566. if (!status)
  7567. {
  7568. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 RGB failed\n");
  7569. }
  7570. }
  7571. else
  7572. {
  7573. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 A failed\n");
  7574. }
  7575. break;
  7576. }
  7577. case transcoder_texture_format::cTFRGB565:
  7578. case transcoder_texture_format::cTFBGR565:
  7579. {
  7580. // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
  7581. //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
  7582. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, (target_format == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7583. if (!status)
  7584. {
  7585. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGB565 RGB failed\n");
  7586. }
  7587. break;
  7588. }
  7589. case transcoder_texture_format::cTFRGBA4444:
  7590. {
  7591. // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
  7592. // First decode the alpha data
  7593. if (basis_file_has_alpha_slices)
  7594. //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
  7595. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7596. else
  7597. status = true;
  7598. if (status)
  7599. {
  7600. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
  7601. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7602. if (!status)
  7603. {
  7604. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 RGB failed\n");
  7605. }
  7606. }
  7607. else
  7608. {
  7609. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 A failed\n");
  7610. }
  7611. break;
  7612. }
  7613. case transcoder_texture_format::cTFFXT1_RGB:
  7614. {
  7615. #if !BASISD_SUPPORT_FXT1
  7616. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: FXT1 unsupported\n");
  7617. return false;
  7618. #else
  7619. //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7620. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cFXT1_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7621. if (!status)
  7622. {
  7623. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to FXT1_RGB failed\n");
  7624. }
  7625. break;
  7626. #endif
  7627. }
  7628. case transcoder_texture_format::cTFETC2_EAC_R11:
  7629. {
  7630. #if !BASISD_SUPPORT_ETC2_EAC_RG11
  7631. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
  7632. return false;
  7633. #else
  7634. //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7635. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7636. if (!status)
  7637. {
  7638. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 failed\n");
  7639. }
  7640. break;
  7641. #endif
  7642. }
  7643. case transcoder_texture_format::cTFETC2_EAC_RG11:
  7644. {
  7645. #if !BASISD_SUPPORT_ETC2_EAC_RG11
  7646. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
  7647. return false;
  7648. #else
  7649. assert(bytes_per_block_or_pixel == 16);
  7650. if (basis_file_has_alpha_slices)
  7651. {
  7652. // First decode the alpha data to G
  7653. //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7654. status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7655. }
  7656. else
  7657. {
  7658. basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels);
  7659. status = true;
  7660. }
  7661. if (status)
  7662. {
  7663. // Now decode the color data to R
  7664. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7665. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
  7666. if (!status)
  7667. {
  7668. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 R failed\n");
  7669. }
  7670. }
  7671. else
  7672. {
  7673. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 G failed\n");
  7674. }
  7675. break;
  7676. #endif
  7677. }
  7678. default:
  7679. {
  7680. assert(0);
  7681. BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: Invalid fmt\n");
  7682. break;
  7683. }
  7684. }
  7685. return status;
  7686. }
  7687. //------------------------------------------------------------------------------------------------
  7688. basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder()
  7689. {
  7690. }
  7691. bool basisu_lowlevel_uastc_transcoder::transcode_slice(
  7692. void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
  7693. uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
  7694. const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
  7695. basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
  7696. {
  7697. BASISU_NOTE_UNUSED(pState);
  7698. BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
  7699. assert(g_transcoder_initialized);
  7700. if (!g_transcoder_initialized)
  7701. {
  7702. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder not globally initialized.\n");
  7703. return false;
  7704. }
  7705. #if BASISD_SUPPORT_UASTC
  7706. const uint32_t total_blocks = num_blocks_x * num_blocks_y;
  7707. if (!output_row_pitch_in_blocks_or_pixels)
  7708. {
  7709. if (basis_block_format_is_uncompressed(fmt))
  7710. output_row_pitch_in_blocks_or_pixels = orig_width;
  7711. else
  7712. {
  7713. if (fmt == block_format::cFXT1_RGB)
  7714. output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
  7715. else
  7716. output_row_pitch_in_blocks_or_pixels = num_blocks_x;
  7717. }
  7718. }
  7719. if (basis_block_format_is_uncompressed(fmt))
  7720. {
  7721. if (!output_rows_in_pixels)
  7722. output_rows_in_pixels = orig_height;
  7723. }
  7724. uint32_t total_expected_block_bytes = sizeof(uastc_block) * total_blocks;
  7725. if (image_data_size < total_expected_block_bytes)
  7726. {
  7727. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
  7728. return false;
  7729. }
  7730. const uastc_block* pSource_block = reinterpret_cast<const uastc_block *>(pImage_data);
  7731. const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0;
  7732. const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
  7733. bool status = false;
  7734. if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
  7735. {
  7736. if (fmt == block_format::cPVRTC1_4_RGBA)
  7737. transcode_uastc_to_pvrtc1_4_rgba((const uastc_block*)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality);
  7738. else
  7739. transcode_uastc_to_pvrtc1_4_rgb((const uastc_block *)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality, from_alpha);
  7740. }
  7741. else
  7742. {
  7743. for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
  7744. {
  7745. void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
  7746. for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes)
  7747. {
  7748. switch (fmt)
  7749. {
  7750. case block_format::cUASTC_4x4:
  7751. {
  7752. memcpy(pDst_block, pSource_block, sizeof(uastc_block));
  7753. status = true;
  7754. break;
  7755. }
  7756. case block_format::cETC1:
  7757. {
  7758. if (from_alpha)
  7759. status = transcode_uastc_to_etc1(*pSource_block, pDst_block, 3);
  7760. else
  7761. status = transcode_uastc_to_etc1(*pSource_block, pDst_block);
  7762. break;
  7763. }
  7764. case block_format::cETC2_RGBA:
  7765. {
  7766. status = transcode_uastc_to_etc2_rgba(*pSource_block, pDst_block);
  7767. break;
  7768. }
  7769. case block_format::cBC1:
  7770. {
  7771. status = transcode_uastc_to_bc1(*pSource_block, pDst_block, high_quality);
  7772. break;
  7773. }
  7774. case block_format::cBC3:
  7775. {
  7776. status = transcode_uastc_to_bc3(*pSource_block, pDst_block, high_quality);
  7777. break;
  7778. }
  7779. case block_format::cBC4:
  7780. {
  7781. if (channel0 < 0)
  7782. channel0 = 0;
  7783. status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0);
  7784. break;
  7785. }
  7786. case block_format::cBC5:
  7787. {
  7788. if (channel0 < 0)
  7789. channel0 = 0;
  7790. if (channel1 < 0)
  7791. channel1 = 3;
  7792. status = transcode_uastc_to_bc5(*pSource_block, pDst_block, high_quality, channel0, channel1);
  7793. break;
  7794. }
  7795. case block_format::cBC7:
  7796. case block_format::cBC7_M5_COLOR: // for consistently with ETC1S
  7797. {
  7798. status = transcode_uastc_to_bc7(*pSource_block, pDst_block);
  7799. break;
  7800. }
  7801. case block_format::cASTC_4x4:
  7802. {
  7803. status = transcode_uastc_to_astc(*pSource_block, pDst_block);
  7804. break;
  7805. }
  7806. case block_format::cETC2_EAC_R11:
  7807. {
  7808. if (channel0 < 0)
  7809. channel0 = 0;
  7810. status = transcode_uastc_to_etc2_eac_r11(*pSource_block, pDst_block, high_quality, channel0);
  7811. break;
  7812. }
  7813. case block_format::cETC2_EAC_RG11:
  7814. {
  7815. if (channel0 < 0)
  7816. channel0 = 0;
  7817. if (channel1 < 0)
  7818. channel1 = 3;
  7819. status = transcode_uastc_to_etc2_eac_rg11(*pSource_block, pDst_block, high_quality, channel0, channel1);
  7820. break;
  7821. }
  7822. case block_format::cRGBA32:
  7823. {
  7824. color32 block_pixels[4][4];
  7825. status = unpack_uastc(*pSource_block, (color32 *)block_pixels, false);
  7826. assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
  7827. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
  7828. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  7829. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  7830. for (uint32_t y = 0; y < max_y; y++)
  7831. {
  7832. for (uint32_t x = 0; x < max_x; x++)
  7833. {
  7834. const color32& c = block_pixels[y][x];
  7835. pDst_pixels[0 + 4 * x] = c.r;
  7836. pDst_pixels[1 + 4 * x] = c.g;
  7837. pDst_pixels[2 + 4 * x] = c.b;
  7838. pDst_pixels[3 + 4 * x] = c.a;
  7839. }
  7840. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
  7841. }
  7842. break;
  7843. }
  7844. case block_format::cRGB565:
  7845. case block_format::cBGR565:
  7846. {
  7847. color32 block_pixels[4][4];
  7848. status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
  7849. assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
  7850. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
  7851. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  7852. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  7853. for (uint32_t y = 0; y < max_y; y++)
  7854. {
  7855. for (uint32_t x = 0; x < max_x; x++)
  7856. {
  7857. const color32& c = block_pixels[y][x];
  7858. const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast<uint16_t>((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) :
  7859. static_cast<uint16_t>((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31));
  7860. pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
  7861. pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
  7862. }
  7863. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
  7864. }
  7865. break;
  7866. }
  7867. case block_format::cRGBA4444:
  7868. {
  7869. color32 block_pixels[4][4];
  7870. status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
  7871. assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
  7872. uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
  7873. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  7874. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  7875. for (uint32_t y = 0; y < max_y; y++)
  7876. {
  7877. for (uint32_t x = 0; x < max_x; x++)
  7878. {
  7879. const color32& c = block_pixels[y][x];
  7880. const uint16_t packed = static_cast<uint16_t>((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15));
  7881. pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
  7882. pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
  7883. }
  7884. pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
  7885. }
  7886. break;
  7887. }
  7888. default:
  7889. assert(0);
  7890. break;
  7891. }
  7892. if (!status)
  7893. {
  7894. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder failed to unpack a UASTC block - this is a bug, or the data was corrupted\n");
  7895. return false;
  7896. }
  7897. } // block_x
  7898. } // block_y
  7899. }
  7900. return true;
  7901. #else
  7902. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: UASTC is unsupported\n");
  7903. BASISU_NOTE_UNUSED(decode_flags);
  7904. BASISU_NOTE_UNUSED(channel0);
  7905. BASISU_NOTE_UNUSED(channel1);
  7906. BASISU_NOTE_UNUSED(output_rows_in_pixels);
  7907. BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
  7908. BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
  7909. BASISU_NOTE_UNUSED(fmt);
  7910. BASISU_NOTE_UNUSED(image_data_size);
  7911. BASISU_NOTE_UNUSED(pImage_data);
  7912. BASISU_NOTE_UNUSED(num_blocks_x);
  7913. BASISU_NOTE_UNUSED(num_blocks_y);
  7914. BASISU_NOTE_UNUSED(pDst_blocks);
  7915. return false;
  7916. #endif
  7917. }
  7918. bool basisu_lowlevel_uastc_transcoder::transcode_image(
  7919. transcoder_texture_format target_format,
  7920. void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
  7921. const uint8_t* pCompressed_data, uint32_t compressed_data_length,
  7922. uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
  7923. uint32_t slice_offset, uint32_t slice_length,
  7924. uint32_t decode_flags,
  7925. bool has_alpha,
  7926. bool is_video,
  7927. uint32_t output_row_pitch_in_blocks_or_pixels,
  7928. basisu_transcoder_state* pState,
  7929. uint32_t output_rows_in_pixels,
  7930. int channel0, int channel1)
  7931. {
  7932. BASISU_NOTE_UNUSED(is_video);
  7933. BASISU_NOTE_UNUSED(level_index);
  7934. if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
  7935. {
  7936. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: source data buffer too small\n");
  7937. return false;
  7938. }
  7939. if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
  7940. {
  7941. if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
  7942. {
  7943. // PVRTC1 only supports power of 2 dimensions
  7944. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
  7945. return false;
  7946. }
  7947. }
  7948. if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!has_alpha))
  7949. {
  7950. // Switch to PVRTC1 RGB if the input doesn't have alpha.
  7951. target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
  7952. }
  7953. const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
  7954. const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
  7955. const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
  7956. if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
  7957. {
  7958. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: output buffer size too small\n");
  7959. return false;
  7960. }
  7961. bool status = false;
  7962. // UASTC4x4
  7963. switch (target_format)
  7964. {
  7965. case transcoder_texture_format::cTFETC1_RGB:
  7966. {
  7967. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7968. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1,
  7969. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
  7970. if (!status)
  7971. {
  7972. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
  7973. }
  7974. break;
  7975. }
  7976. case transcoder_texture_format::cTFETC2_RGBA:
  7977. {
  7978. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7979. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA,
  7980. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
  7981. if (!status)
  7982. {
  7983. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC2 failed\n");
  7984. }
  7985. break;
  7986. }
  7987. case transcoder_texture_format::cTFBC1_RGB:
  7988. {
  7989. // TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though.
  7990. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  7991. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1,
  7992. bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
  7993. if (!status)
  7994. {
  7995. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
  7996. }
  7997. break;
  7998. }
  7999. case transcoder_texture_format::cTFBC3_RGBA:
  8000. {
  8001. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8002. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3,
  8003. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
  8004. if (!status)
  8005. {
  8006. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC3 failed\n");
  8007. }
  8008. break;
  8009. }
  8010. case transcoder_texture_format::cTFBC4_R:
  8011. {
  8012. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
  8013. // nullptr, 0,
  8014. // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
  8015. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4,
  8016. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
  8017. ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
  8018. if (!status)
  8019. {
  8020. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
  8021. }
  8022. break;
  8023. }
  8024. case transcoder_texture_format::cTFBC5_RG:
  8025. {
  8026. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
  8027. // nullptr, 0,
  8028. // 0, 3);
  8029. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5,
  8030. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
  8031. 0, 3);
  8032. if (!status)
  8033. {
  8034. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC5 failed\n");
  8035. }
  8036. break;
  8037. }
  8038. case transcoder_texture_format::cTFBC7_RGBA:
  8039. case transcoder_texture_format::cTFBC7_ALT:
  8040. {
  8041. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8042. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7,
  8043. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8044. if (!status)
  8045. {
  8046. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC7 failed\n");
  8047. }
  8048. break;
  8049. }
  8050. case transcoder_texture_format::cTFPVRTC1_4_RGB:
  8051. {
  8052. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8053. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB,
  8054. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8055. if (!status)
  8056. {
  8057. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGB 4bpp failed\n");
  8058. }
  8059. break;
  8060. }
  8061. case transcoder_texture_format::cTFPVRTC1_4_RGBA:
  8062. {
  8063. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8064. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA,
  8065. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8066. if (!status)
  8067. {
  8068. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGBA 4bpp failed\n");
  8069. }
  8070. break;
  8071. }
  8072. case transcoder_texture_format::cTFASTC_4x4_RGBA:
  8073. {
  8074. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8075. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4,
  8076. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8077. if (!status)
  8078. {
  8079. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n");
  8080. }
  8081. break;
  8082. }
  8083. case transcoder_texture_format::cTFATC_RGB:
  8084. case transcoder_texture_format::cTFATC_RGBA:
  8085. {
  8086. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->ATC currently unsupported\n");
  8087. return false;
  8088. }
  8089. case transcoder_texture_format::cTFFXT1_RGB:
  8090. {
  8091. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n");
  8092. return false;
  8093. }
  8094. case transcoder_texture_format::cTFPVRTC2_4_RGB:
  8095. {
  8096. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
  8097. return false;
  8098. }
  8099. case transcoder_texture_format::cTFPVRTC2_4_RGBA:
  8100. {
  8101. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
  8102. return false;
  8103. }
  8104. case transcoder_texture_format::cTFETC2_EAC_R11:
  8105. {
  8106. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
  8107. // nullptr, 0,
  8108. // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
  8109. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11,
  8110. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
  8111. ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
  8112. if (!status)
  8113. {
  8114. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n");
  8115. }
  8116. break;
  8117. }
  8118. case transcoder_texture_format::cTFETC2_EAC_RG11:
  8119. {
  8120. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
  8121. // nullptr, 0,
  8122. // 0, 3);
  8123. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11,
  8124. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
  8125. 0, 3);
  8126. if (!status)
  8127. {
  8128. BASISU_DEVEL_ERROR("basisu_basisu_lowlevel_uastc_transcodertranscoder::transcode_image: transcode_slice() to EAC RG11 failed\n");
  8129. }
  8130. break;
  8131. }
  8132. case transcoder_texture_format::cTFRGBA32:
  8133. {
  8134. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8135. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32,
  8136. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8137. if (!status)
  8138. {
  8139. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n");
  8140. }
  8141. break;
  8142. }
  8143. case transcoder_texture_format::cTFRGB565:
  8144. {
  8145. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8146. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565,
  8147. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8148. if (!status)
  8149. {
  8150. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
  8151. }
  8152. break;
  8153. }
  8154. case transcoder_texture_format::cTFBGR565:
  8155. {
  8156. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8157. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565,
  8158. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8159. if (!status)
  8160. {
  8161. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
  8162. }
  8163. break;
  8164. }
  8165. case transcoder_texture_format::cTFRGBA4444:
  8166. {
  8167. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
  8168. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444,
  8169. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8170. if (!status)
  8171. {
  8172. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n");
  8173. }
  8174. break;
  8175. }
  8176. default:
  8177. {
  8178. assert(0);
  8179. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: Invalid format\n");
  8180. break;
  8181. }
  8182. }
  8183. return status;
  8184. }
  8185. //------------------------------------------------------------------------------------------------
  8186. basisu_lowlevel_uastc_hdr_transcoder::basisu_lowlevel_uastc_hdr_transcoder()
  8187. {
  8188. }
  8189. bool basisu_lowlevel_uastc_hdr_transcoder::transcode_slice(
  8190. void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
  8191. uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
  8192. const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
  8193. basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
  8194. {
  8195. BASISU_NOTE_UNUSED(pState);
  8196. BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
  8197. BASISU_NOTE_UNUSED(has_alpha);
  8198. BASISU_NOTE_UNUSED(channel0);
  8199. BASISU_NOTE_UNUSED(channel1);
  8200. BASISU_NOTE_UNUSED(decode_flags);
  8201. assert(g_transcoder_initialized);
  8202. if (!g_transcoder_initialized)
  8203. {
  8204. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder not globally initialized.\n");
  8205. return false;
  8206. }
  8207. #if BASISD_SUPPORT_UASTC_HDR
  8208. const uint32_t total_blocks = num_blocks_x * num_blocks_y;
  8209. if (!output_row_pitch_in_blocks_or_pixels)
  8210. {
  8211. if (basis_block_format_is_uncompressed(fmt))
  8212. output_row_pitch_in_blocks_or_pixels = orig_width;
  8213. else
  8214. output_row_pitch_in_blocks_or_pixels = num_blocks_x;
  8215. }
  8216. if (basis_block_format_is_uncompressed(fmt))
  8217. {
  8218. if (!output_rows_in_pixels)
  8219. output_rows_in_pixels = orig_height;
  8220. }
  8221. uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks;
  8222. if (image_data_size < total_expected_block_bytes)
  8223. {
  8224. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
  8225. return false;
  8226. }
  8227. const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
  8228. bool status = false;
  8229. // TODO: Optimize pure memcpy() case.
  8230. for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
  8231. {
  8232. void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
  8233. for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
  8234. {
  8235. switch (fmt)
  8236. {
  8237. case block_format::cUASTC_HDR_4x4:
  8238. case block_format::cASTC_HDR_4x4:
  8239. {
  8240. // Nothing to do, UASTC HDR is just ASTC.
  8241. memcpy(pDst_block, pSource_block, sizeof(uastc_block));
  8242. status = true;
  8243. break;
  8244. }
  8245. case block_format::cBC6H:
  8246. {
  8247. status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block);
  8248. break;
  8249. }
  8250. case block_format::cRGB_9E5:
  8251. {
  8252. astc_helpers::log_astc_block log_blk;
  8253. status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
  8254. if (status)
  8255. {
  8256. uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
  8257. static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
  8258. );
  8259. uint32_t blk_texels[4][4];
  8260. status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5);
  8261. if (status)
  8262. {
  8263. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  8264. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  8265. for (uint32_t y = 0; y < max_y; y++)
  8266. {
  8267. memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
  8268. pDst_pixels += output_row_pitch_in_blocks_or_pixels;
  8269. } // y
  8270. }
  8271. }
  8272. break;
  8273. }
  8274. case block_format::cRGBA_HALF:
  8275. {
  8276. astc_helpers::log_astc_block log_blk;
  8277. status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
  8278. if (status)
  8279. {
  8280. half_float* pDst_pixels = reinterpret_cast<half_float*>(
  8281. static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
  8282. );
  8283. half_float blk_texels[4][4][4];
  8284. status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
  8285. if (status)
  8286. {
  8287. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  8288. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  8289. for (uint32_t y = 0; y < max_y; y++)
  8290. {
  8291. for (uint32_t x = 0; x < max_x; x++)
  8292. {
  8293. pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
  8294. pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
  8295. pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
  8296. pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
  8297. } // x
  8298. pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
  8299. } // y
  8300. }
  8301. }
  8302. break;
  8303. }
  8304. case block_format::cRGB_HALF:
  8305. {
  8306. astc_helpers:: log_astc_block log_blk;
  8307. status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
  8308. if (status)
  8309. {
  8310. half_float* pDst_pixels =
  8311. reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
  8312. half_float blk_texels[4][4][4];
  8313. status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
  8314. if (status)
  8315. {
  8316. const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
  8317. const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
  8318. for (uint32_t y = 0; y < max_y; y++)
  8319. {
  8320. for (uint32_t x = 0; x < max_x; x++)
  8321. {
  8322. pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
  8323. pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
  8324. pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
  8325. } // x
  8326. pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
  8327. } // y
  8328. }
  8329. }
  8330. break;
  8331. }
  8332. default:
  8333. assert(0);
  8334. break;
  8335. }
  8336. if (!status)
  8337. {
  8338. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n"); return false;
  8339. }
  8340. } // block_x
  8341. } // block_y
  8342. return true;
  8343. #else
  8344. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: UASTC_HDR is unsupported\n");
  8345. BASISU_NOTE_UNUSED(decode_flags);
  8346. BASISU_NOTE_UNUSED(channel0);
  8347. BASISU_NOTE_UNUSED(channel1);
  8348. BASISU_NOTE_UNUSED(output_rows_in_pixels);
  8349. BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
  8350. BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
  8351. BASISU_NOTE_UNUSED(fmt);
  8352. BASISU_NOTE_UNUSED(image_data_size);
  8353. BASISU_NOTE_UNUSED(pImage_data);
  8354. BASISU_NOTE_UNUSED(num_blocks_x);
  8355. BASISU_NOTE_UNUSED(num_blocks_y);
  8356. BASISU_NOTE_UNUSED(pDst_blocks);
  8357. return false;
  8358. #endif
  8359. }
  8360. bool basisu_lowlevel_uastc_hdr_transcoder::transcode_image(
  8361. transcoder_texture_format target_format,
  8362. void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
  8363. const uint8_t* pCompressed_data, uint32_t compressed_data_length,
  8364. uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
  8365. uint32_t slice_offset, uint32_t slice_length,
  8366. uint32_t decode_flags,
  8367. bool has_alpha,
  8368. bool is_video,
  8369. uint32_t output_row_pitch_in_blocks_or_pixels,
  8370. basisu_transcoder_state* pState,
  8371. uint32_t output_rows_in_pixels,
  8372. int channel0, int channel1)
  8373. {
  8374. BASISU_NOTE_UNUSED(is_video);
  8375. BASISU_NOTE_UNUSED(level_index);
  8376. BASISU_NOTE_UNUSED(decode_flags);
  8377. if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
  8378. {
  8379. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: source data buffer too small\n");
  8380. return false;
  8381. }
  8382. const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
  8383. const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
  8384. if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
  8385. {
  8386. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: output buffer size too small\n");
  8387. return false;
  8388. }
  8389. bool status = false;
  8390. switch (target_format)
  8391. {
  8392. case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
  8393. {
  8394. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4,
  8395. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
  8396. if (!status)
  8397. {
  8398. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
  8399. }
  8400. break;
  8401. }
  8402. case transcoder_texture_format::cTFBC6H:
  8403. {
  8404. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
  8405. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
  8406. if (!status)
  8407. {
  8408. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
  8409. }
  8410. break;
  8411. }
  8412. case transcoder_texture_format::cTFRGB_HALF:
  8413. {
  8414. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
  8415. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8416. if (!status)
  8417. {
  8418. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
  8419. }
  8420. break;
  8421. }
  8422. case transcoder_texture_format::cTFRGBA_HALF:
  8423. {
  8424. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
  8425. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8426. if (!status)
  8427. {
  8428. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
  8429. }
  8430. break;
  8431. }
  8432. case transcoder_texture_format::cTFRGB_9E5:
  8433. {
  8434. status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
  8435. bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  8436. if (!status)
  8437. {
  8438. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
  8439. }
  8440. break;
  8441. }
  8442. default:
  8443. {
  8444. assert(0);
  8445. BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: Invalid format\n");
  8446. break;
  8447. }
  8448. }
  8449. return status;
  8450. }
  8451. //------------------------------------------------------------------------------------------------
  8452. basisu_transcoder::basisu_transcoder() :
  8453. m_ready_to_transcode(false)
  8454. {
  8455. }
  8456. bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const
  8457. {
  8458. if (!validate_header(pData, data_size))
  8459. return false;
  8460. const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
  8461. #if !BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
  8462. if (crc16(&pHeader->m_data_size, sizeof(basis_file_header) - BASISU_OFFSETOF(basis_file_header, m_data_size), 0) != pHeader->m_header_crc16)
  8463. {
  8464. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header CRC check failed\n");
  8465. return false;
  8466. }
  8467. if (full_validation)
  8468. {
  8469. if (crc16(reinterpret_cast<const uint8_t*>(pData) + sizeof(basis_file_header), pHeader->m_data_size, 0) != pHeader->m_data_crc16)
  8470. {
  8471. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: data CRC check failed\n");
  8472. return false;
  8473. }
  8474. }
  8475. #endif
  8476. return true;
  8477. }
  8478. bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const
  8479. {
  8480. if (data_size <= sizeof(basis_file_header))
  8481. return false;
  8482. const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
  8483. if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
  8484. {
  8485. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
  8486. return false;
  8487. }
  8488. uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
  8489. if (data_size < expected_file_size)
  8490. {
  8491. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: source buffer is too small\n");
  8492. return false;
  8493. }
  8494. if ((!pHeader->m_total_slices) || (!pHeader->m_total_images))
  8495. {
  8496. BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header is invalid\n");
  8497. return false;
  8498. }
  8499. if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
  8500. ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
  8501. )
  8502. {
  8503. BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
  8504. return false;
  8505. }
  8506. return true;
  8507. }
  8508. bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const
  8509. {
  8510. if (data_size <= sizeof(basis_file_header))
  8511. {
  8512. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small\n");
  8513. return false;
  8514. }
  8515. const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
  8516. if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
  8517. {
  8518. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
  8519. return false;
  8520. }
  8521. uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
  8522. if (data_size < expected_file_size)
  8523. {
  8524. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small, or header is corrupted\n");
  8525. return false;
  8526. }
  8527. if ((!pHeader->m_total_images) || (!pHeader->m_total_slices))
  8528. {
  8529. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (total images or slices are 0)\n");
  8530. return false;
  8531. }
  8532. if (pHeader->m_total_images > pHeader->m_total_slices)
  8533. {
  8534. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (too many images)\n");
  8535. return false;
  8536. }
  8537. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
  8538. {
  8539. if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
  8540. {
  8541. if (pHeader->m_total_slices & 1)
  8542. {
  8543. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha .basis file\n");
  8544. return false;
  8545. }
  8546. }
  8547. // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too.
  8548. if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0)
  8549. {
  8550. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
  8551. return false;
  8552. }
  8553. }
  8554. else
  8555. {
  8556. if ((pHeader->m_flags & cBASISHeaderFlagETC1S) != 0)
  8557. {
  8558. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
  8559. return false;
  8560. }
  8561. }
  8562. if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
  8563. ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
  8564. )
  8565. {
  8566. BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
  8567. return false;
  8568. }
  8569. return true;
  8570. }
  8571. basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const
  8572. {
  8573. if (!validate_header_quick(pData, data_size))
  8574. {
  8575. BASISU_DEVEL_ERROR("basisu_transcoder::get_texture_type: header validation failed\n");
  8576. return cBASISTexType2DArray;
  8577. }
  8578. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8579. basis_texture_type btt = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
  8580. if (btt >= cBASISTexTypeTotal)
  8581. {
  8582. BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header's texture type field is invalid\n");
  8583. return cBASISTexType2DArray;
  8584. }
  8585. return btt;
  8586. }
  8587. bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const
  8588. {
  8589. if (!validate_header_quick(pData, data_size))
  8590. {
  8591. BASISU_DEVEL_ERROR("basisu_transcoder::get_userdata: header validation failed\n");
  8592. return false;
  8593. }
  8594. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8595. userdata0 = pHeader->m_userdata0;
  8596. userdata1 = pHeader->m_userdata1;
  8597. return true;
  8598. }
  8599. uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const
  8600. {
  8601. if (!validate_header_quick(pData, data_size))
  8602. {
  8603. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n");
  8604. return 0;
  8605. }
  8606. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8607. return pHeader->m_total_images;
  8608. }
  8609. basis_tex_format basisu_transcoder::get_tex_format(const void* pData, uint32_t data_size) const
  8610. {
  8611. if (!validate_header_quick(pData, data_size))
  8612. {
  8613. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n");
  8614. return basis_tex_format::cETC1S;
  8615. }
  8616. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8617. return (basis_tex_format)(uint32_t)pHeader->m_tex_format;
  8618. }
  8619. bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const
  8620. {
  8621. if (!validate_header_quick(pData, data_size))
  8622. {
  8623. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: header validation failed\n");
  8624. return false;
  8625. }
  8626. int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
  8627. if (slice_index < 0)
  8628. {
  8629. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid slice index\n");
  8630. return false;
  8631. }
  8632. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8633. if (image_index >= pHeader->m_total_images)
  8634. {
  8635. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
  8636. return false;
  8637. }
  8638. const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
  8639. uint32_t total_levels = 1;
  8640. for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
  8641. if (pSlice_descs[i].m_image_index == image_index)
  8642. total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
  8643. else
  8644. break;
  8645. if (total_levels > 16)
  8646. {
  8647. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
  8648. return false;
  8649. }
  8650. const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
  8651. image_info.m_image_index = image_index;
  8652. image_info.m_total_levels = total_levels;
  8653. image_info.m_alpha_flag = false;
  8654. // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
  8655. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
  8656. image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
  8657. else
  8658. image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
  8659. image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
  8660. image_info.m_width = slice_desc.m_num_blocks_x * 4;
  8661. image_info.m_height = slice_desc.m_num_blocks_y * 4;
  8662. image_info.m_orig_width = slice_desc.m_orig_width;
  8663. image_info.m_orig_height = slice_desc.m_orig_height;
  8664. image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
  8665. image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
  8666. image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
  8667. image_info.m_first_slice_index = slice_index;
  8668. return true;
  8669. }
  8670. uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const
  8671. {
  8672. if (!validate_header_quick(pData, data_size))
  8673. {
  8674. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: header validation failed\n");
  8675. return false;
  8676. }
  8677. int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
  8678. if (slice_index < 0)
  8679. {
  8680. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: failed finding slice\n");
  8681. return false;
  8682. }
  8683. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8684. if (image_index >= pHeader->m_total_images)
  8685. {
  8686. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image_index\n");
  8687. return false;
  8688. }
  8689. const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
  8690. uint32_t total_levels = 1;
  8691. for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
  8692. if (pSlice_descs[i].m_image_index == image_index)
  8693. total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
  8694. else
  8695. break;
  8696. const uint32_t cMaxSupportedLevels = 16;
  8697. if (total_levels > cMaxSupportedLevels)
  8698. {
  8699. BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image levels!\n");
  8700. return false;
  8701. }
  8702. return total_levels;
  8703. }
  8704. bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const
  8705. {
  8706. if (!validate_header_quick(pData, data_size))
  8707. {
  8708. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: header validation failed\n");
  8709. return false;
  8710. }
  8711. int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
  8712. if (slice_index < 0)
  8713. {
  8714. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: failed finding slice\n");
  8715. return false;
  8716. }
  8717. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8718. if (image_index >= pHeader->m_total_images)
  8719. {
  8720. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: invalid image_index\n");
  8721. return false;
  8722. }
  8723. const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
  8724. const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
  8725. orig_width = slice_desc.m_orig_width;
  8726. orig_height = slice_desc.m_orig_height;
  8727. total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
  8728. return true;
  8729. }
  8730. bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const
  8731. {
  8732. if (!validate_header_quick(pData, data_size))
  8733. {
  8734. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: validate_file_checksums failed\n");
  8735. return false;
  8736. }
  8737. int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
  8738. if (slice_index < 0)
  8739. {
  8740. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: failed finding slice\n");
  8741. return false;
  8742. }
  8743. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8744. if (image_index >= pHeader->m_total_images)
  8745. {
  8746. BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: invalid image_index\n");
  8747. return false;
  8748. }
  8749. const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
  8750. const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
  8751. image_info.m_image_index = image_index;
  8752. image_info.m_level_index = level_index;
  8753. // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
  8754. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
  8755. image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
  8756. else
  8757. image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
  8758. image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
  8759. image_info.m_width = slice_desc.m_num_blocks_x * 4;
  8760. image_info.m_height = slice_desc.m_num_blocks_y * 4;
  8761. image_info.m_orig_width = slice_desc.m_orig_width;
  8762. image_info.m_orig_height = slice_desc.m_orig_height;
  8763. image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
  8764. image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
  8765. image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
  8766. image_info.m_first_slice_index = slice_index;
  8767. image_info.m_rgb_file_ofs = slice_desc.m_file_ofs;
  8768. image_info.m_rgb_file_len = slice_desc.m_file_size;
  8769. image_info.m_alpha_file_ofs = 0;
  8770. image_info.m_alpha_file_len = 0;
  8771. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
  8772. {
  8773. if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
  8774. {
  8775. assert((slice_index + 1) < (int)pHeader->m_total_slices);
  8776. image_info.m_alpha_file_ofs = pSlice_descs[slice_index + 1].m_file_ofs;
  8777. image_info.m_alpha_file_len = pSlice_descs[slice_index + 1].m_file_size;
  8778. }
  8779. }
  8780. return true;
  8781. }
  8782. bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const
  8783. {
  8784. if (!validate_file_checksums(pData, data_size, false))
  8785. {
  8786. BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: validate_file_checksums failed\n");
  8787. return false;
  8788. }
  8789. const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
  8790. const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
  8791. file_info.m_version = pHeader->m_ver;
  8792. file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc);
  8793. file_info.m_total_selectors = pHeader->m_total_selectors;
  8794. file_info.m_selector_codebook_ofs = pHeader->m_selector_cb_file_ofs;
  8795. file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size;
  8796. file_info.m_total_endpoints = pHeader->m_total_endpoints;
  8797. file_info.m_endpoint_codebook_ofs = pHeader->m_endpoint_cb_file_ofs;
  8798. file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size;
  8799. file_info.m_tables_ofs = pHeader->m_tables_file_ofs;
  8800. file_info.m_tables_size = pHeader->m_tables_file_size;
  8801. file_info.m_tex_format = static_cast<basis_tex_format>(static_cast<int>(pHeader->m_tex_format));
  8802. file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S);
  8803. file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0;
  8804. file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
  8805. const uint32_t total_slices = pHeader->m_total_slices;
  8806. file_info.m_slice_info.resize(total_slices);
  8807. file_info.m_slices_size = 0;
  8808. file_info.m_tex_type = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
  8809. if (file_info.m_tex_type > cBASISTexTypeTotal)
  8810. {
  8811. BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: invalid texture type, file is corrupted\n");
  8812. return false;
  8813. }
  8814. file_info.m_us_per_frame = pHeader->m_us_per_frame;
  8815. file_info.m_userdata0 = pHeader->m_userdata0;
  8816. file_info.m_userdata1 = pHeader->m_userdata1;
  8817. file_info.m_image_mipmap_levels.resize(0);
  8818. file_info.m_image_mipmap_levels.resize(pHeader->m_total_images);
  8819. file_info.m_total_images = pHeader->m_total_images;
  8820. for (uint32_t i = 0; i < total_slices; i++)
  8821. {
  8822. file_info.m_slices_size += pSlice_descs[i].m_file_size;
  8823. basisu_slice_info& slice_info = file_info.m_slice_info[i];
  8824. slice_info.m_orig_width = pSlice_descs[i].m_orig_width;
  8825. slice_info.m_orig_height = pSlice_descs[i].m_orig_height;
  8826. slice_info.m_width = pSlice_descs[i].m_num_blocks_x * 4;
  8827. slice_info.m_height = pSlice_descs[i].m_num_blocks_y * 4;
  8828. slice_info.m_num_blocks_x = pSlice_descs[i].m_num_blocks_x;
  8829. slice_info.m_num_blocks_y = pSlice_descs[i].m_num_blocks_y;
  8830. slice_info.m_total_blocks = slice_info.m_num_blocks_x * slice_info.m_num_blocks_y;
  8831. slice_info.m_compressed_size = pSlice_descs[i].m_file_size;
  8832. slice_info.m_slice_index = i;
  8833. slice_info.m_image_index = pSlice_descs[i].m_image_index;
  8834. slice_info.m_level_index = pSlice_descs[i].m_level_index;
  8835. slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16;
  8836. slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsHasAlpha) != 0;
  8837. slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
  8838. if (pSlice_descs[i].m_image_index >= pHeader->m_total_images)
  8839. {
  8840. BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice desc's image index is invalid\n");
  8841. return false;
  8842. }
  8843. file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] = basisu::maximum<uint32_t>(file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index], pSlice_descs[i].m_level_index + 1);
  8844. if (file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] > 16)
  8845. {
  8846. BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice mipmap level is invalid\n");
  8847. return false;
  8848. }
  8849. }
  8850. return true;
  8851. }
  8852. bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size)
  8853. {
  8854. if (!validate_header_quick(pData, data_size))
  8855. {
  8856. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n");
  8857. return false;
  8858. }
  8859. const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
  8860. const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
  8861. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
  8862. {
  8863. if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
  8864. {
  8865. m_lowlevel_etc1s_decoder.clear();
  8866. }
  8867. if (pHeader->m_flags & cBASISHeaderFlagUsesGlobalCodebook)
  8868. {
  8869. if (!m_lowlevel_etc1s_decoder.get_global_codebooks())
  8870. {
  8871. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: File uses global codebooks, but set_global_codebooks() has not been called\n");
  8872. return false;
  8873. }
  8874. if (!m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size())
  8875. {
  8876. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebooks must be unpacked first by calling start_transcoding()\n");
  8877. return false;
  8878. }
  8879. if ((m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size() != pHeader->m_total_endpoints) ||
  8880. (m_lowlevel_etc1s_decoder.get_global_codebooks()->get_selectors().size() != pHeader->m_total_selectors))
  8881. {
  8882. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebook size mismatch (wrong codebooks for file).\n");
  8883. return false;
  8884. }
  8885. if (!pHeader->m_tables_file_size)
  8886. {
  8887. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (2)\n");
  8888. return false;
  8889. }
  8890. if (pHeader->m_tables_file_ofs > data_size)
  8891. {
  8892. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (4)\n");
  8893. return false;
  8894. }
  8895. if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
  8896. {
  8897. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (5)\n");
  8898. return false;
  8899. }
  8900. }
  8901. else
  8902. {
  8903. if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size)
  8904. {
  8905. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n");
  8906. return false;
  8907. }
  8908. if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size))
  8909. {
  8910. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n");
  8911. return false;
  8912. }
  8913. if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs))
  8914. {
  8915. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n");
  8916. return false;
  8917. }
  8918. if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs))
  8919. {
  8920. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
  8921. return false;
  8922. }
  8923. if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
  8924. {
  8925. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
  8926. return false;
  8927. }
  8928. if (!m_lowlevel_etc1s_decoder.decode_palettes(
  8929. pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size,
  8930. pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size))
  8931. {
  8932. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n");
  8933. return false;
  8934. }
  8935. }
  8936. if (!m_lowlevel_etc1s_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size))
  8937. {
  8938. BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n");
  8939. return false;
  8940. }
  8941. }
  8942. else
  8943. {
  8944. // Nothing special to do for UASTC/UASTC HDR.
  8945. if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
  8946. {
  8947. m_lowlevel_etc1s_decoder.clear();
  8948. }
  8949. }
  8950. m_ready_to_transcode = true;
  8951. return true;
  8952. }
  8953. bool basisu_transcoder::stop_transcoding()
  8954. {
  8955. m_lowlevel_etc1s_decoder.clear();
  8956. m_ready_to_transcode = false;
  8957. return true;
  8958. }
  8959. bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt,
  8960. uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const
  8961. {
  8962. if (!m_ready_to_transcode)
  8963. {
  8964. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n");
  8965. return false;
  8966. }
  8967. if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
  8968. {
  8969. // TODO: Not yet supported
  8970. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
  8971. return false;
  8972. }
  8973. if (!validate_header_quick(pData, data_size))
  8974. {
  8975. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: header validation failed\n");
  8976. return false;
  8977. }
  8978. const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
  8979. const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
  8980. if (slice_index >= pHeader->m_total_slices)
  8981. {
  8982. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: slice_index >= pHeader->m_total_slices\n");
  8983. return false;
  8984. }
  8985. const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index];
  8986. uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
  8987. if (basis_block_format_is_uncompressed(fmt))
  8988. {
  8989. // Assume the output buffer is orig_width by orig_height
  8990. if (!output_row_pitch_in_blocks_or_pixels)
  8991. output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width;
  8992. if (!output_rows_in_pixels)
  8993. output_rows_in_pixels = slice_desc.m_orig_height;
  8994. // Now make sure the output buffer is large enough, or we'll overwrite memory.
  8995. if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
  8996. {
  8997. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
  8998. return false;
  8999. }
  9000. }
  9001. else if (fmt == block_format::cFXT1_RGB)
  9002. {
  9003. const uint32_t num_blocks_fxt1_x = (slice_desc.m_orig_width + 7) / 8;
  9004. const uint32_t num_blocks_fxt1_y = (slice_desc.m_orig_height + 3) / 4;
  9005. const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y;
  9006. if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
  9007. {
  9008. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
  9009. return false;
  9010. }
  9011. }
  9012. else
  9013. {
  9014. if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks)
  9015. {
  9016. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n");
  9017. return false;
  9018. }
  9019. }
  9020. if (fmt != block_format::cETC1)
  9021. {
  9022. if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
  9023. {
  9024. if ((!basisu::is_pow2(slice_desc.m_num_blocks_x * 4)) || (!basisu::is_pow2(slice_desc.m_num_blocks_y * 4)))
  9025. {
  9026. // PVRTC1 only supports power of 2 dimensions
  9027. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: PVRTC1 only supports power of 2 dimensions\n");
  9028. return false;
  9029. }
  9030. }
  9031. }
  9032. if (slice_desc.m_file_ofs > data_size)
  9033. {
  9034. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_ofs, or passed in buffer too small\n");
  9035. return false;
  9036. }
  9037. const uint32_t data_size_left = data_size - slice_desc.m_file_ofs;
  9038. if (data_size_left < slice_desc.m_file_size)
  9039. {
  9040. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n");
  9041. return false;
  9042. }
  9043. if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
  9044. {
  9045. return m_lowlevel_uastc_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
  9046. pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
  9047. fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
  9048. output_rows_in_pixels, channel0, channel1, decode_flags);
  9049. }
  9050. else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
  9051. {
  9052. return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
  9053. pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
  9054. fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
  9055. output_rows_in_pixels, channel0, channel1, decode_flags);
  9056. }
  9057. else
  9058. {
  9059. return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
  9060. pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
  9061. fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
  9062. (decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels);
  9063. }
  9064. }
  9065. int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const
  9066. {
  9067. BASISU_NOTE_UNUSED(data_size);
  9068. const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
  9069. const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
  9070. // For very large basis files this search could be painful
  9071. // TODO: Binary search this
  9072. for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
  9073. {
  9074. const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_iter];
  9075. if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
  9076. return slice_iter;
  9077. }
  9078. BASISU_DEVEL_ERROR("basisu_transcoder::find_first_slice_index: didn't find slice\n");
  9079. return -1;
  9080. }
  9081. int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const
  9082. {
  9083. if (!validate_header_quick(pData, data_size))
  9084. {
  9085. BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: header validation failed\n");
  9086. return false;
  9087. }
  9088. const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
  9089. const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
  9090. const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
  9091. // For very large basis files this search could be painful
  9092. // TODO: Binary search this
  9093. for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
  9094. {
  9095. const basis_slice_desc& slice_desc = pSlice_descs[slice_iter];
  9096. if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
  9097. {
  9098. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
  9099. {
  9100. const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
  9101. if (slice_alpha == alpha_data)
  9102. return slice_iter;
  9103. }
  9104. else
  9105. {
  9106. return slice_iter;
  9107. }
  9108. }
  9109. }
  9110. BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: didn't find slice\n");
  9111. return -1;
  9112. }
  9113. void basisu_transcoder::write_opaque_alpha_blocks(
  9114. uint32_t num_blocks_x, uint32_t num_blocks_y,
  9115. void* pOutput_blocks, block_format fmt,
  9116. uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels)
  9117. {
  9118. // 'num_blocks_y', 'pOutput_blocks' & 'block_stride_in_bytes' unused
  9119. // when disabling BASISD_SUPPORT_ETC2_EAC_A8 *and* BASISD_SUPPORT_DXT5A
  9120. BASISU_NOTE_UNUSED(num_blocks_y);
  9121. BASISU_NOTE_UNUSED(pOutput_blocks);
  9122. BASISU_NOTE_UNUSED(block_stride_in_bytes);
  9123. if (!output_row_pitch_in_blocks_or_pixels)
  9124. output_row_pitch_in_blocks_or_pixels = num_blocks_x;
  9125. if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11))
  9126. {
  9127. #if BASISD_SUPPORT_ETC2_EAC_A8
  9128. eac_block blk;
  9129. blk.m_base = 255;
  9130. blk.m_multiplier = 1;
  9131. blk.m_table = 13;
  9132. // Selectors are all 4's
  9133. memcpy(&blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
  9134. for (uint32_t y = 0; y < num_blocks_y; y++)
  9135. {
  9136. uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
  9137. for (uint32_t x = 0; x < num_blocks_x; x++)
  9138. {
  9139. memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
  9140. dst_ofs += block_stride_in_bytes;
  9141. }
  9142. }
  9143. #endif
  9144. }
  9145. else if (fmt == block_format::cBC4)
  9146. {
  9147. #if BASISD_SUPPORT_DXT5A
  9148. dxt5a_block blk;
  9149. blk.m_endpoints[0] = 255;
  9150. blk.m_endpoints[1] = 255;
  9151. memset(blk.m_selectors, 0, sizeof(blk.m_selectors));
  9152. for (uint32_t y = 0; y < num_blocks_y; y++)
  9153. {
  9154. uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
  9155. for (uint32_t x = 0; x < num_blocks_x; x++)
  9156. {
  9157. memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
  9158. dst_ofs += block_stride_in_bytes;
  9159. }
  9160. }
  9161. #endif
  9162. }
  9163. }
  9164. bool basisu_transcoder::transcode_image_level(
  9165. const void* pData, uint32_t data_size,
  9166. uint32_t image_index, uint32_t level_index,
  9167. void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
  9168. transcoder_texture_format fmt,
  9169. uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const
  9170. {
  9171. const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(fmt);
  9172. if (!m_ready_to_transcode)
  9173. {
  9174. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n");
  9175. return false;
  9176. }
  9177. //const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
  9178. if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
  9179. {
  9180. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
  9181. // TODO: Not yet supported
  9182. return false;
  9183. }
  9184. if (!validate_header_quick(pData, data_size))
  9185. {
  9186. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: header validation failed\n");
  9187. return false;
  9188. }
  9189. const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
  9190. const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
  9191. const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
  9192. const bool basis_file_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
  9193. int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
  9194. if (slice_index < 0)
  9195. {
  9196. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n");
  9197. // Unable to find the requested image/level
  9198. return false;
  9199. }
  9200. if ((fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
  9201. {
  9202. // Switch to PVRTC1 RGB if the input doesn't have alpha.
  9203. fmt = transcoder_texture_format::cTFPVRTC1_4_RGB;
  9204. }
  9205. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
  9206. {
  9207. if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha)
  9208. {
  9209. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n");
  9210. // The first slice shouldn't have alpha data in a properly formed basis file
  9211. return false;
  9212. }
  9213. if (basis_file_has_alpha_slices)
  9214. {
  9215. // The alpha data should immediately follow the color data, and have the same resolution.
  9216. if ((slice_index + 1U) >= pHeader->m_total_slices)
  9217. {
  9218. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n");
  9219. // basis file is missing the alpha slice
  9220. return false;
  9221. }
  9222. // Basic sanity checks
  9223. if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsHasAlpha) == 0)
  9224. {
  9225. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n");
  9226. // This slice should have alpha data
  9227. return false;
  9228. }
  9229. if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y))
  9230. {
  9231. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n");
  9232. // Alpha slice should have been the same res as the color slice
  9233. return false;
  9234. }
  9235. }
  9236. }
  9237. bool status = false;
  9238. const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y;
  9239. if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks))
  9240. {
  9241. // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves.
  9242. // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8.
  9243. // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory.
  9244. memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
  9245. }
  9246. if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
  9247. {
  9248. const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
  9249. // Use the container independent image transcode method.
  9250. status = m_lowlevel_uastc_hdr_decoder.transcode_image(fmt,
  9251. pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
  9252. (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
  9253. pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
  9254. decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  9255. }
  9256. else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
  9257. {
  9258. const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
  9259. // Use the container independent image transcode method.
  9260. status = m_lowlevel_uastc_decoder.transcode_image(fmt,
  9261. pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
  9262. (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
  9263. pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
  9264. decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  9265. }
  9266. else
  9267. {
  9268. // ETC1S
  9269. const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
  9270. const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr;
  9271. assert((pSlice_desc->m_flags & cSliceDescFlagsHasAlpha) == 0);
  9272. if (pAlpha_slice_desc)
  9273. {
  9274. // Basic sanity checks
  9275. assert((pAlpha_slice_desc->m_flags & cSliceDescFlagsHasAlpha) != 0);
  9276. assert(pSlice_desc->m_num_blocks_x == pAlpha_slice_desc->m_num_blocks_x);
  9277. assert(pSlice_desc->m_num_blocks_y == pAlpha_slice_desc->m_num_blocks_y);
  9278. assert(pSlice_desc->m_level_index == pAlpha_slice_desc->m_level_index);
  9279. }
  9280. // Use the container independent image transcode method.
  9281. status = m_lowlevel_etc1s_decoder.transcode_image(fmt,
  9282. pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
  9283. (const uint8_t *)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
  9284. pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
  9285. (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_ofs : 0U, (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_size : 0U,
  9286. decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
  9287. } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
  9288. if (!status)
  9289. {
  9290. BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n");
  9291. }
  9292. else
  9293. {
  9294. //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n");
  9295. }
  9296. return status;
  9297. }
  9298. uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt)
  9299. {
  9300. switch (fmt)
  9301. {
  9302. case transcoder_texture_format::cTFETC1_RGB:
  9303. case transcoder_texture_format::cTFBC1_RGB:
  9304. case transcoder_texture_format::cTFBC4_R:
  9305. case transcoder_texture_format::cTFPVRTC1_4_RGB:
  9306. case transcoder_texture_format::cTFPVRTC1_4_RGBA:
  9307. case transcoder_texture_format::cTFATC_RGB:
  9308. case transcoder_texture_format::cTFPVRTC2_4_RGB:
  9309. case transcoder_texture_format::cTFPVRTC2_4_RGBA:
  9310. case transcoder_texture_format::cTFETC2_EAC_R11:
  9311. return 8;
  9312. case transcoder_texture_format::cTFBC7_RGBA:
  9313. case transcoder_texture_format::cTFBC7_ALT:
  9314. case transcoder_texture_format::cTFBC6H:
  9315. case transcoder_texture_format::cTFETC2_RGBA:
  9316. case transcoder_texture_format::cTFBC3_RGBA:
  9317. case transcoder_texture_format::cTFBC5_RG:
  9318. case transcoder_texture_format::cTFASTC_4x4_RGBA:
  9319. case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
  9320. case transcoder_texture_format::cTFATC_RGBA:
  9321. case transcoder_texture_format::cTFFXT1_RGB:
  9322. case transcoder_texture_format::cTFETC2_EAC_RG11:
  9323. return 16;
  9324. case transcoder_texture_format::cTFRGBA32:
  9325. case transcoder_texture_format::cTFRGB_9E5:
  9326. return sizeof(uint32_t);
  9327. case transcoder_texture_format::cTFRGB565:
  9328. case transcoder_texture_format::cTFBGR565:
  9329. case transcoder_texture_format::cTFRGBA4444:
  9330. return sizeof(uint16_t);
  9331. case transcoder_texture_format::cTFRGB_HALF:
  9332. return sizeof(half_float) * 3;
  9333. case transcoder_texture_format::cTFRGBA_HALF:
  9334. return sizeof(half_float) * 4;
  9335. default:
  9336. assert(0);
  9337. BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
  9338. break;
  9339. }
  9340. return 0;
  9341. }
  9342. const char* basis_get_format_name(transcoder_texture_format fmt)
  9343. {
  9344. switch (fmt)
  9345. {
  9346. case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB";
  9347. case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB";
  9348. case transcoder_texture_format::cTFBC4_R: return "BC4_R";
  9349. case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB";
  9350. case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
  9351. case transcoder_texture_format::cTFBC7_RGBA: return "BC7_RGBA";
  9352. case transcoder_texture_format::cTFBC7_ALT: return "BC7_RGBA";
  9353. case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA";
  9354. case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
  9355. case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
  9356. case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
  9357. case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_RGBA";
  9358. case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
  9359. case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
  9360. case transcoder_texture_format::cTFRGBA32: return "RGBA32";
  9361. case transcoder_texture_format::cTFRGB565: return "RGB565";
  9362. case transcoder_texture_format::cTFBGR565: return "BGR565";
  9363. case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
  9364. case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF";
  9365. case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5";
  9366. case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF";
  9367. case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
  9368. case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
  9369. case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
  9370. case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
  9371. case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
  9372. case transcoder_texture_format::cTFBC6H: return "BC6H";
  9373. default:
  9374. assert(0);
  9375. BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
  9376. break;
  9377. }
  9378. return "";
  9379. }
  9380. const char* basis_get_block_format_name(block_format fmt)
  9381. {
  9382. switch (fmt)
  9383. {
  9384. case block_format::cETC1: return "ETC1";
  9385. case block_format::cBC1: return "BC1";
  9386. case block_format::cPVRTC1_4_RGB: return "PVRTC1_4_RGB";
  9387. case block_format::cPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
  9388. case block_format::cBC7: return "BC7";
  9389. case block_format::cETC2_RGBA: return "ETC2_RGBA";
  9390. case block_format::cBC3: return "BC3";
  9391. case block_format::cASTC_4x4: return "ASTC_4x4";
  9392. case block_format::cATC_RGB: return "ATC_RGB";
  9393. case block_format::cRGBA32: return "RGBA32";
  9394. case block_format::cRGB565: return "RGB565";
  9395. case block_format::cBGR565: return "BGR565";
  9396. case block_format::cRGBA4444: return "RGBA4444";
  9397. case block_format::cRGBA_HALF: return "RGBA_HALF";
  9398. case block_format::cRGB_HALF: return "RGB_HALF";
  9399. case block_format::cRGB_9E5: return "RGB_9E5";
  9400. case block_format::cUASTC_4x4: return "UASTC_4x4";
  9401. case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4";
  9402. case block_format::cBC6H: return "BC6H";
  9403. case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4";
  9404. case block_format::cFXT1_RGB: return "FXT1_RGB";
  9405. case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
  9406. case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
  9407. case block_format::cETC2_EAC_R11: return "ETC2_EAC_R11";
  9408. case block_format::cETC2_EAC_RG11: return "ETC2_EAC_RG11";
  9409. default:
  9410. assert(0);
  9411. BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
  9412. break;
  9413. }
  9414. return "";
  9415. }
  9416. const char* basis_get_texture_type_name(basis_texture_type tex_type)
  9417. {
  9418. switch (tex_type)
  9419. {
  9420. case cBASISTexType2D: return "2D";
  9421. case cBASISTexType2DArray: return "2D array";
  9422. case cBASISTexTypeCubemapArray: return "cubemap array";
  9423. case cBASISTexTypeVideoFrames: return "video";
  9424. case cBASISTexTypeVolume: return "3D";
  9425. default:
  9426. assert(0);
  9427. BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n");
  9428. break;
  9429. }
  9430. return "";
  9431. }
  9432. bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
  9433. {
  9434. // TODO: Technically ASTC_HDR does support alpha, but UASTC_HDR doesn't yet support it. Unsure what to do here.
  9435. switch (fmt)
  9436. {
  9437. case transcoder_texture_format::cTFETC2_RGBA:
  9438. case transcoder_texture_format::cTFBC3_RGBA:
  9439. case transcoder_texture_format::cTFASTC_4x4_RGBA:
  9440. case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
  9441. case transcoder_texture_format::cTFBC7_RGBA:
  9442. case transcoder_texture_format::cTFBC7_ALT:
  9443. case transcoder_texture_format::cTFPVRTC1_4_RGBA:
  9444. case transcoder_texture_format::cTFPVRTC2_4_RGBA:
  9445. case transcoder_texture_format::cTFATC_RGBA:
  9446. case transcoder_texture_format::cTFRGBA32:
  9447. case transcoder_texture_format::cTFRGBA4444:
  9448. case transcoder_texture_format::cTFRGBA_HALF:
  9449. return true;
  9450. default:
  9451. break;
  9452. }
  9453. return false;
  9454. }
  9455. bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt)
  9456. {
  9457. switch (fmt)
  9458. {
  9459. case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
  9460. case transcoder_texture_format::cTFBC6H:
  9461. case transcoder_texture_format::cTFRGBA_HALF:
  9462. case transcoder_texture_format::cTFRGB_HALF:
  9463. case transcoder_texture_format::cTFRGB_9E5:
  9464. return true;
  9465. default:
  9466. break;
  9467. }
  9468. return false;
  9469. }
  9470. basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt)
  9471. {
  9472. switch (fmt)
  9473. {
  9474. case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1;
  9475. case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1;
  9476. case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4;
  9477. case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB;
  9478. case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA;
  9479. case transcoder_texture_format::cTFBC7_RGBA: return basisu::texture_format::cBC7;
  9480. case transcoder_texture_format::cTFBC7_ALT: return basisu::texture_format::cBC7;
  9481. case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
  9482. case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
  9483. case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
  9484. case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4;
  9485. case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4;
  9486. case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned;
  9487. case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
  9488. case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
  9489. case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
  9490. case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
  9491. case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
  9492. case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
  9493. case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF;
  9494. case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5;
  9495. case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF;
  9496. case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
  9497. case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
  9498. case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
  9499. case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC;
  9500. case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC;
  9501. default:
  9502. assert(0);
  9503. BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
  9504. break;
  9505. }
  9506. return basisu::texture_format::cInvalidTextureFormat;
  9507. }
  9508. bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type)
  9509. {
  9510. switch (tex_type)
  9511. {
  9512. case transcoder_texture_format::cTFRGBA32:
  9513. case transcoder_texture_format::cTFRGB565:
  9514. case transcoder_texture_format::cTFBGR565:
  9515. case transcoder_texture_format::cTFRGBA4444:
  9516. case transcoder_texture_format::cTFRGB_HALF:
  9517. case transcoder_texture_format::cTFRGBA_HALF:
  9518. case transcoder_texture_format::cTFRGB_9E5:
  9519. return true;
  9520. default:
  9521. break;
  9522. }
  9523. return false;
  9524. }
  9525. bool basis_block_format_is_uncompressed(block_format blk_fmt)
  9526. {
  9527. switch (blk_fmt)
  9528. {
  9529. case block_format::cRGB32:
  9530. case block_format::cRGBA32:
  9531. case block_format::cA32:
  9532. case block_format::cRGB565:
  9533. case block_format::cBGR565:
  9534. case block_format::cRGBA4444:
  9535. case block_format::cRGBA4444_COLOR:
  9536. case block_format::cRGBA4444_ALPHA:
  9537. case block_format::cRGBA4444_COLOR_OPAQUE:
  9538. case block_format::cRGBA_HALF:
  9539. case block_format::cRGB_HALF:
  9540. case block_format::cRGB_9E5:
  9541. return true;
  9542. default:
  9543. break;
  9544. }
  9545. return false;
  9546. }
  9547. uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt)
  9548. {
  9549. switch (fmt)
  9550. {
  9551. case transcoder_texture_format::cTFRGBA32:
  9552. case transcoder_texture_format::cTFRGB_9E5:
  9553. return sizeof(uint32_t);
  9554. case transcoder_texture_format::cTFRGB565:
  9555. case transcoder_texture_format::cTFBGR565:
  9556. case transcoder_texture_format::cTFRGBA4444:
  9557. return sizeof(uint16_t);
  9558. case transcoder_texture_format::cTFRGB_HALF:
  9559. return sizeof(half_float) * 3;
  9560. case transcoder_texture_format::cTFRGBA_HALF:
  9561. return sizeof(half_float) * 4;
  9562. default:
  9563. break;
  9564. }
  9565. return 0;
  9566. }
  9567. uint32_t basis_get_block_width(transcoder_texture_format tex_type)
  9568. {
  9569. switch (tex_type)
  9570. {
  9571. case transcoder_texture_format::cTFFXT1_RGB:
  9572. return 8;
  9573. default:
  9574. break;
  9575. }
  9576. return 4;
  9577. }
  9578. uint32_t basis_get_block_height(transcoder_texture_format tex_type)
  9579. {
  9580. BASISU_NOTE_UNUSED(tex_type);
  9581. return 4;
  9582. }
  9583. bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
  9584. {
  9585. if (fmt == basis_tex_format::cUASTC_HDR_4x4)
  9586. {
  9587. // UASTC HDR
  9588. #if BASISD_SUPPORT_UASTC_HDR
  9589. switch (tex_type)
  9590. {
  9591. case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
  9592. case transcoder_texture_format::cTFBC6H:
  9593. case transcoder_texture_format::cTFRGBA_HALF:
  9594. case transcoder_texture_format::cTFRGB_HALF:
  9595. case transcoder_texture_format::cTFRGB_9E5:
  9596. return true;
  9597. default:
  9598. break;
  9599. }
  9600. #endif
  9601. }
  9602. else if (fmt == basis_tex_format::cUASTC4x4)
  9603. {
  9604. // UASTC LDR
  9605. #if BASISD_SUPPORT_UASTC
  9606. switch (tex_type)
  9607. {
  9608. // These niche formats aren't currently supported for UASTC - everything else is.
  9609. case transcoder_texture_format::cTFPVRTC2_4_RGB:
  9610. case transcoder_texture_format::cTFPVRTC2_4_RGBA:
  9611. case transcoder_texture_format::cTFATC_RGB:
  9612. case transcoder_texture_format::cTFATC_RGBA:
  9613. case transcoder_texture_format::cTFFXT1_RGB:
  9614. // UASTC LDR doesn't support transcoding to HDR formats
  9615. case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
  9616. case transcoder_texture_format::cTFBC6H:
  9617. case transcoder_texture_format::cTFRGBA_HALF:
  9618. case transcoder_texture_format::cTFRGB_HALF:
  9619. case transcoder_texture_format::cTFRGB_9E5:
  9620. return false;
  9621. default:
  9622. return true;
  9623. }
  9624. #endif
  9625. }
  9626. else
  9627. {
  9628. // ETC1S
  9629. switch (tex_type)
  9630. {
  9631. // ETC1 and uncompressed are always supported.
  9632. case transcoder_texture_format::cTFETC1_RGB:
  9633. case transcoder_texture_format::cTFRGBA32:
  9634. case transcoder_texture_format::cTFRGB565:
  9635. case transcoder_texture_format::cTFBGR565:
  9636. case transcoder_texture_format::cTFRGBA4444:
  9637. return true;
  9638. #if BASISD_SUPPORT_DXT1
  9639. case transcoder_texture_format::cTFBC1_RGB:
  9640. return true;
  9641. #endif
  9642. #if BASISD_SUPPORT_DXT5A
  9643. case transcoder_texture_format::cTFBC4_R:
  9644. case transcoder_texture_format::cTFBC5_RG:
  9645. return true;
  9646. #endif
  9647. #if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A
  9648. case transcoder_texture_format::cTFBC3_RGBA:
  9649. return true;
  9650. #endif
  9651. #if BASISD_SUPPORT_PVRTC1
  9652. case transcoder_texture_format::cTFPVRTC1_4_RGB:
  9653. case transcoder_texture_format::cTFPVRTC1_4_RGBA:
  9654. return true;
  9655. #endif
  9656. #if BASISD_SUPPORT_BC7_MODE5
  9657. case transcoder_texture_format::cTFBC7_RGBA:
  9658. case transcoder_texture_format::cTFBC7_ALT:
  9659. return true;
  9660. #endif
  9661. #if BASISD_SUPPORT_ETC2_EAC_A8
  9662. case transcoder_texture_format::cTFETC2_RGBA:
  9663. return true;
  9664. #endif
  9665. #if BASISD_SUPPORT_ASTC
  9666. case transcoder_texture_format::cTFASTC_4x4_RGBA:
  9667. return true;
  9668. #endif
  9669. #if BASISD_SUPPORT_ATC
  9670. case transcoder_texture_format::cTFATC_RGB:
  9671. case transcoder_texture_format::cTFATC_RGBA:
  9672. return true;
  9673. #endif
  9674. #if BASISD_SUPPORT_FXT1
  9675. case transcoder_texture_format::cTFFXT1_RGB:
  9676. return true;
  9677. #endif
  9678. #if BASISD_SUPPORT_PVRTC2
  9679. case transcoder_texture_format::cTFPVRTC2_4_RGB:
  9680. case transcoder_texture_format::cTFPVRTC2_4_RGBA:
  9681. return true;
  9682. #endif
  9683. #if BASISD_SUPPORT_ETC2_EAC_RG11
  9684. case transcoder_texture_format::cTFETC2_EAC_R11:
  9685. case transcoder_texture_format::cTFETC2_EAC_RG11:
  9686. return true;
  9687. #endif
  9688. default:
  9689. break;
  9690. }
  9691. }
  9692. return false;
  9693. }
  9694. // ------------------------------------------------------------------------------------------------------
  9695. // UASTC
  9696. // ------------------------------------------------------------------------------------------------------
  9697. #if BASISD_SUPPORT_UASTC
  9698. const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] =
  9699. {
  9700. { 0, 28, false }, { 1, 20, false }, { 2, 16, true }, { 3, 29, false },
  9701. { 4, 91, true }, { 5, 9, false }, { 6, 107, true }, { 7, 72, true },
  9702. { 8, 149, false }, { 9, 204, true }, { 10, 50, false }, { 11, 114, true },
  9703. { 12, 496, true }, { 13, 17, true }, { 14, 78, false }, { 15, 39, true },
  9704. { 17, 252, true }, { 18, 828, true }, { 19, 43, false }, { 20, 156, false },
  9705. { 21, 116, false }, { 22, 210, true }, { 23, 476, true }, { 24, 273, false },
  9706. { 25, 684, true }, { 26, 359, false }, { 29, 246, true }, { 32, 195, true },
  9707. { 33, 694, true }, { 52, 524, true }
  9708. };
  9709. const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] =
  9710. {
  9711. { 10, 36, 4 }, { 11, 48, 4 }, { 0, 61, 3 }, { 2, 137, 4 },
  9712. { 8, 161, 5 }, { 13, 183, 4 }, { 1, 226, 2 }, { 33, 281, 2 },
  9713. { 40, 302, 3 }, { 20, 307, 4 }, { 21, 479, 0 }, { 58, 495, 3 },
  9714. { 3, 593, 0 }, { 32, 594, 2 }, { 59, 605, 1 }, { 34, 799, 3 },
  9715. { 20, 812, 1 }, { 14, 988, 4 }, { 31, 993, 3 }
  9716. };
  9717. const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] =
  9718. {
  9719. { 4, 260, 0 }, { 8, 74, 5 }, { 9, 32, 5 }, { 10, 156, 2 },
  9720. { 11, 183, 2 }, { 12, 15, 0 }, { 13, 745, 4 }, { 20, 0, 1 },
  9721. { 35, 335, 1 }, { 36, 902, 5 }, { 57, 254, 0 }
  9722. };
  9723. const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 1, 2, 0 }, { 2, 0, 1 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
  9724. const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 2, 0, 1 }, { 1, 2, 0 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
  9725. uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k)
  9726. {
  9727. assert(k < 6);
  9728. switch (k >> 1)
  9729. {
  9730. case 0:
  9731. if (p <= 1)
  9732. p = 0;
  9733. else
  9734. p = 1;
  9735. break;
  9736. case 1:
  9737. if (p == 0)
  9738. p = 0;
  9739. else
  9740. p = 1;
  9741. break;
  9742. case 2:
  9743. if ((p == 0) || (p == 2))
  9744. p = 0;
  9745. else
  9746. p = 1;
  9747. break;
  9748. }
  9749. if (k & 1)
  9750. p = 1 - p;
  9751. return p;
  9752. }
  9753. static const uint8_t g_zero_pattern[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
  9754. const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] =
  9755. {
  9756. { 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, { 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 }, { 1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 }, { 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 },
  9757. { 1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0 }, { 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1 }, { 1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0 },
  9758. { 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 }, { 1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0 },
  9759. { 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 },
  9760. { 1,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1 }, { 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 }, { 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 },
  9761. { 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1 }, { 1,0,0,0,1,1,0,0,1,1,0,0,1,1,1,0 }, { 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 },
  9762. { 1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1 }, { 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 }, { 1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1 }, { 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 },
  9763. { 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0 }, { 1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0 }
  9764. };
  9765. const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] =
  9766. {
  9767. { 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2 }, { 1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2 }, { 1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2 }, { 1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0 },
  9768. { 1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,0 }, { 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2 }, { 0,2,1,1,0,2,1,1,0,2,1,1,0,2,1,1 }, { 2,0,0,0,2,0,0,0,2,1,1,1,2,1,1,1 },
  9769. { 2,0,1,2,2,0,1,2,2,0,1,2,2,0,1,2 }, { 1,1,1,1,0,0,0,0,2,2,2,2,1,1,1,1 }, { 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2 }
  9770. };
  9771. const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] =
  9772. {
  9773. { 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0 }, { 1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1 },
  9774. { 1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1 }, { 0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0 }, { 0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1 }, { 0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1 },
  9775. { 1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0 }, { 0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0 }, { 1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 },
  9776. { 0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0 }, { 1,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0 },
  9777. { 1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0 }, { 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0 }, { 1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }
  9778. };
  9779. const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] =
  9780. {
  9781. { 0, 2 }, { 0, 3 }, { 1, 0 }, { 0, 3 }, { 7, 0 }, { 0, 2 }, { 3, 0 }, { 7, 0 },
  9782. { 0, 11 }, { 2, 0 }, { 0, 7 }, { 11, 0 }, { 3, 0 }, { 8, 0 }, { 0, 4 }, { 12, 0 },
  9783. { 1, 0 }, { 8, 0 }, { 0, 1 }, { 0, 2 }, { 0, 4 }, { 8, 0 }, { 1, 0 }, { 0, 2 },
  9784. { 4, 0 }, { 0, 1 }, { 4, 0 }, { 1, 0 }, { 4, 0 }, { 1, 0 }
  9785. };
  9786. const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] =
  9787. {
  9788. { 0, 8, 10 }, { 8, 0, 12 }, { 4, 0, 12 }, { 8, 0, 4 }, { 3, 0, 2 }, { 0, 1, 3 }, { 0, 2, 1 }, { 1, 9, 0 }, { 1, 2, 0 }, { 4, 0, 8 }, { 0, 6, 2 }
  9789. };
  9790. const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] =
  9791. {
  9792. { 0, 4 }, { 0, 2 }, { 2, 0 }, { 0, 7 }, { 8, 0 }, { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 0 }, { 0, 1 }, { 0, 8 }, { 2, 0 }, { 0, 1 }, { 0, 7 }, { 12, 0 }, { 2, 0 }, { 9, 0 }, { 0, 2 }, { 4, 0 }
  9793. };
  9794. const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] =
  9795. {
  9796. { 0x1, 4 },
  9797. { 0x35, 6 },
  9798. { 0x1D, 5 },
  9799. { 0x3, 5 },
  9800. { 0x13, 5 },
  9801. { 0xB, 5 },
  9802. { 0x1B, 5 },
  9803. { 0x7, 5 },
  9804. { 0x17, 5 },
  9805. { 0xF, 5 },
  9806. { 0x2, 3 },
  9807. { 0x0, 2 },
  9808. { 0x6, 3 },
  9809. { 0x1F, 5 },
  9810. { 0xD, 5 },
  9811. { 0x5, 7 },
  9812. { 0x15, 6 },
  9813. { 0x25, 6 },
  9814. { 0x9, 4 },
  9815. { 0x45, 7 } // future expansion
  9816. };
  9817. // If g_uastc_mode_huff_codes[] changes this table must be updated!
  9818. static const uint8_t g_uastc_huff_modes[128] =
  9819. {
  9820. 11,0,10,3,11,15,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,
  9821. 19,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13
  9822. };
  9823. const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] = { 4, 2, 3, 2, 2, 3, 2, 2, 0, 2, 4, 2, 3, 1, 2, 4, 2, 2, 5 };
  9824. const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] = { 8, 2, 5, 2, 2, 5, 2, 2, 0, 2, 8, 2, 5, 0, 2, 8, 2, 2, 11 };
  9825. const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] = { 19, 20, 8, 7, 12, 20, 18, 12, 0, 8, 13, 13, 19, 20, 20, 20, 20, 20, 11 };
  9826. const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] = { 1, 1, 2, 3, 2, 1, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1 };
  9827. const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1 };
  9828. const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] = { 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 3 };
  9829. const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
  9830. const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
  9831. const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
  9832. const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] = { 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 12, 12, 4, 4, 4, 8 };
  9833. const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 };
  9834. const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0 };
  9835. const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] = { 15, 15, 15, 15, 15, 15, 15, 15, 0, 23, 17, 17, 17, 23, 23, 23, 23, 23, 15 };
  9836. // bits, trits, quints
  9837. const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] =
  9838. {
  9839. { 1, 0, 0 }, // 0-1 0
  9840. { 0, 1, 0 }, // 0-2 1
  9841. { 2, 0, 0 }, // 0-3 2
  9842. { 0, 0, 1 }, // 0-4 3
  9843. { 1, 1, 0 }, // 0-5 4
  9844. { 3, 0, 0 }, // 0-7 5
  9845. { 1, 0, 1 }, // 0-9 6
  9846. { 2, 1, 0 }, // 0-11 7
  9847. { 4, 0, 0 }, // 0-15 8
  9848. { 2, 0, 1 }, // 0-19 9
  9849. { 3, 1, 0 }, // 0-23 10
  9850. { 5, 0, 0 }, // 0-31 11
  9851. { 3, 0, 1 }, // 0-39 12
  9852. { 4, 1, 0 }, // 0-47 13
  9853. { 6, 0, 0 }, // 0-63 14
  9854. { 4, 0, 1 }, // 0-79 15
  9855. { 5, 1, 0 }, // 0-95 16
  9856. { 7, 0, 0 }, // 0-127 17
  9857. { 5, 0, 1 }, // 0-159 18
  9858. { 6, 1, 0 }, // 0-191 19
  9859. { 8, 0, 0 }, // 0-255 20
  9860. };
  9861. int astc_get_levels(int range)
  9862. {
  9863. assert(range < (int)BC7ENC_TOTAL_ASTC_RANGES);
  9864. return (1 + 2 * g_astc_bise_range_table[range][1] + 4 * g_astc_bise_range_table[range][2]) << g_astc_bise_range_table[range][0];
  9865. }
  9866. // g_astc_unquant[] is the inverse of g_astc_sorted_order_unquant[]
  9867. astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index]
  9868. // Taken right from the ASTC spec.
  9869. static struct
  9870. {
  9871. const char* m_pB_str;
  9872. uint32_t m_c;
  9873. } g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] =
  9874. {
  9875. { "", 0 },
  9876. { "", 0 },
  9877. { "", 0 },
  9878. { "", 0 },
  9879. { "000000000", 204, }, // 0-5
  9880. { "", 0 },
  9881. { "000000000", 113, }, // 0-9
  9882. { "b000b0bb0", 93 }, // 0-11
  9883. { "", 0 },
  9884. { "b0000bb00", 54 }, // 0-19
  9885. { "cb000cbcb", 44 }, // 0-23
  9886. { "", 0 },
  9887. { "cb0000cbc", 26 }, // 0-39
  9888. { "dcb000dcb", 22 }, // 0-47
  9889. { "", 0 },
  9890. { "dcb0000dc", 13 }, // 0-79
  9891. { "edcb000ed", 11 }, // 0-95
  9892. { "", 0 },
  9893. { "edcb0000e", 6 }, // 0-159
  9894. { "fedcb000f", 5 }, // 0-191
  9895. { "", 0 },
  9896. };
  9897. bool astc_is_valid_endpoint_range(uint32_t range)
  9898. {
  9899. if ((g_astc_bise_range_table[range][1] == 0) && (g_astc_bise_range_table[range][2] == 0))
  9900. return true;
  9901. return g_astc_endpoint_unquant_params[range].m_c != 0;
  9902. }
  9903. uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range)
  9904. {
  9905. assert(range < BC7ENC_TOTAL_ASTC_RANGES);
  9906. const uint32_t bits = g_astc_bise_range_table[range][0];
  9907. const uint32_t trits = g_astc_bise_range_table[range][1];
  9908. const uint32_t quints = g_astc_bise_range_table[range][2];
  9909. uint32_t val = 0;
  9910. if ((!trits) && (!quints))
  9911. {
  9912. assert(!packed_trits && !packed_quints);
  9913. int bits_left = 8;
  9914. while (bits_left > 0)
  9915. {
  9916. uint32_t v = packed_bits;
  9917. int n = basisu::minimumi(bits_left, bits);
  9918. if (n < (int)bits)
  9919. v >>= (bits - n);
  9920. assert(v < (1U << n));
  9921. val |= (v << (bits_left - n));
  9922. bits_left -= n;
  9923. }
  9924. }
  9925. else
  9926. {
  9927. const uint32_t A = (packed_bits & 1) ? 511 : 0;
  9928. const uint32_t C = g_astc_endpoint_unquant_params[range].m_c;
  9929. const uint32_t D = trits ? packed_trits : packed_quints;
  9930. assert(C);
  9931. uint32_t B = 0;
  9932. for (uint32_t i = 0; i < 9; i++)
  9933. {
  9934. B <<= 1;
  9935. char c = g_astc_endpoint_unquant_params[range].m_pB_str[i];
  9936. if (c != '0')
  9937. {
  9938. c -= 'a';
  9939. B |= ((packed_bits >> c) & 1);
  9940. }
  9941. }
  9942. val = D * C + B;
  9943. val = val ^ A;
  9944. val = (A & 0x80) | (val >> 2);
  9945. }
  9946. return val;
  9947. }
  9948. uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range)
  9949. {
  9950. assert(range < BC7ENC_TOTAL_ASTC_RANGES);
  9951. assert(packed_val < (uint32_t)astc_get_levels(range));
  9952. const uint32_t bits = g_astc_bise_range_table[range][0];
  9953. const uint32_t trits = g_astc_bise_range_table[range][1];
  9954. const uint32_t quints = g_astc_bise_range_table[range][2];
  9955. if ((!trits) && (!quints))
  9956. return unquant_astc_endpoint(packed_val, 0, 0, range);
  9957. else if (trits)
  9958. return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), packed_val >> bits, 0, range);
  9959. else
  9960. return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), 0, packed_val >> bits, range);
  9961. }
  9962. // BC7 - Various BC7 tables/helpers
  9963. const uint32_t g_bc7_weights1[2] = { 0, 64 };
  9964. const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
  9965. const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
  9966. const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
  9967. const uint32_t g_astc_weights4[16] = { 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 };
  9968. const uint32_t g_astc_weights5[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 };
  9969. const uint32_t g_astc_weights_3levels[3] = { 0, 32, 64 };
  9970. const uint8_t g_bc7_partition1[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
  9971. const uint8_t g_bc7_partition2[64 * 16] =
  9972. {
  9973. 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
  9974. 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
  9975. 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
  9976. 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
  9977. 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
  9978. 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
  9979. 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
  9980. 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
  9981. };
  9982. const uint8_t g_bc7_partition3[64 * 16] =
  9983. {
  9984. 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
  9985. 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
  9986. 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
  9987. 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
  9988. 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
  9989. 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
  9990. 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
  9991. 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
  9992. };
  9993. const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 };
  9994. const uint8_t g_bc7_table_anchor_index_third_subset_1[64] =
  9995. {
  9996. 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3
  9997. };
  9998. const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =
  9999. {
  10000. 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8
  10001. };
  10002. const uint8_t g_bc7_num_subsets[8] = { 3, 2, 3, 2, 1, 1, 1, 2 };
  10003. const uint8_t g_bc7_partition_bits[8] = { 4, 6, 6, 6, 0, 0, 0, 6 };
  10004. const uint8_t g_bc7_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
  10005. const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 };
  10006. const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 };
  10007. const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 };
  10008. const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 };
  10009. const uint8_t g_bc7_alpha_index_bitcount[8] = { 0, 0, 0, 0, 3, 2, 4, 2 };
  10010. endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit]
  10011. endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c]
  10012. static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs)
  10013. {
  10014. assert((num_bits <= 32) && (val < (1ULL << num_bits)));
  10015. while (num_bits)
  10016. {
  10017. const uint32_t n = basisu::minimumu(8 - (*pCur_ofs & 7), num_bits);
  10018. pBytes[*pCur_ofs >> 3] |= (uint8_t)(val << (*pCur_ofs & 7));
  10019. val >>= n;
  10020. num_bits -= n;
  10021. *pCur_ofs += n;
  10022. }
  10023. assert(*pCur_ofs <= 128);
  10024. }
  10025. // TODO: Optimize this.
  10026. void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults)
  10027. {
  10028. const uint32_t best_mode = pResults->m_mode;
  10029. const uint32_t total_subsets = g_bc7_num_subsets[best_mode];
  10030. const uint32_t total_partitions = 1 << g_bc7_partition_bits[best_mode];
  10031. //const uint32_t num_rotations = 1 << g_bc7_rotation_bits[best_mode];
  10032. //const uint32_t num_index_selectors = (best_mode == 4) ? 2 : 1;
  10033. const uint8_t* pPartition;
  10034. if (total_subsets == 1)
  10035. pPartition = &g_bc7_partition1[0];
  10036. else if (total_subsets == 2)
  10037. pPartition = &g_bc7_partition2[pResults->m_partition * 16];
  10038. else
  10039. pPartition = &g_bc7_partition3[pResults->m_partition * 16];
  10040. uint8_t color_selectors[16];
  10041. memcpy(color_selectors, pResults->m_selectors, 16);
  10042. uint8_t alpha_selectors[16];
  10043. memcpy(alpha_selectors, pResults->m_alpha_selectors, 16);
  10044. color_quad_u8 low[3], high[3];
  10045. memcpy(low, pResults->m_low, sizeof(low));
  10046. memcpy(high, pResults->m_high, sizeof(high));
  10047. uint32_t pbits[3][2];
  10048. memcpy(pbits, pResults->m_pbits, sizeof(pbits));
  10049. int anchor[3] = { -1, -1, -1 };
  10050. for (uint32_t k = 0; k < total_subsets; k++)
  10051. {
  10052. uint32_t anchor_index = 0;
  10053. if (k)
  10054. {
  10055. if ((total_subsets == 3) && (k == 1))
  10056. anchor_index = g_bc7_table_anchor_index_third_subset_1[pResults->m_partition];
  10057. else if ((total_subsets == 3) && (k == 2))
  10058. anchor_index = g_bc7_table_anchor_index_third_subset_2[pResults->m_partition];
  10059. else
  10060. anchor_index = g_bc7_table_anchor_index_second_subset[pResults->m_partition];
  10061. }
  10062. anchor[k] = anchor_index;
  10063. const uint32_t color_index_bits = get_bc7_color_index_size(best_mode, pResults->m_index_selector);
  10064. const uint32_t num_color_indices = 1 << color_index_bits;
  10065. if (color_selectors[anchor_index] & (num_color_indices >> 1))
  10066. {
  10067. for (uint32_t i = 0; i < 16; i++)
  10068. if (pPartition[i] == k)
  10069. color_selectors[i] = (uint8_t)((num_color_indices - 1) - color_selectors[i]);
  10070. if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
  10071. {
  10072. for (uint32_t q = 0; q < 3; q++)
  10073. {
  10074. uint8_t t = low[k].m_c[q];
  10075. low[k].m_c[q] = high[k].m_c[q];
  10076. high[k].m_c[q] = t;
  10077. }
  10078. }
  10079. else
  10080. {
  10081. color_quad_u8 tmp = low[k];
  10082. low[k] = high[k];
  10083. high[k] = tmp;
  10084. }
  10085. if (!g_bc7_mode_has_shared_p_bits[best_mode])
  10086. {
  10087. uint32_t t = pbits[k][0];
  10088. pbits[k][0] = pbits[k][1];
  10089. pbits[k][1] = t;
  10090. }
  10091. }
  10092. if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
  10093. {
  10094. const uint32_t alpha_index_bits = get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
  10095. const uint32_t num_alpha_indices = 1 << alpha_index_bits;
  10096. if (alpha_selectors[anchor_index] & (num_alpha_indices >> 1))
  10097. {
  10098. for (uint32_t i = 0; i < 16; i++)
  10099. if (pPartition[i] == k)
  10100. alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]);
  10101. uint8_t t = low[k].m_c[3];
  10102. low[k].m_c[3] = high[k].m_c[3];
  10103. high[k].m_c[3] = t;
  10104. }
  10105. }
  10106. }
  10107. uint8_t* pBlock_bytes = (uint8_t*)(pBlock);
  10108. memset(pBlock_bytes, 0, BC7ENC_BLOCK_SIZE);
  10109. uint32_t cur_bit_ofs = 0;
  10110. bc7_set_block_bits(pBlock_bytes, 1 << best_mode, best_mode + 1, &cur_bit_ofs);
  10111. if ((best_mode == 4) || (best_mode == 5))
  10112. bc7_set_block_bits(pBlock_bytes, pResults->m_rotation, 2, &cur_bit_ofs);
  10113. if (best_mode == 4)
  10114. bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector, 1, &cur_bit_ofs);
  10115. if (total_partitions > 1)
  10116. bc7_set_block_bits(pBlock_bytes, pResults->m_partition, (total_partitions == 64) ? 6 : 4, &cur_bit_ofs);
  10117. const uint32_t total_comps = (best_mode >= 4) ? 4 : 3;
  10118. for (uint32_t comp = 0; comp < total_comps; comp++)
  10119. {
  10120. for (uint32_t subset = 0; subset < total_subsets; subset++)
  10121. {
  10122. bc7_set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
  10123. bc7_set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
  10124. }
  10125. }
  10126. if (g_bc7_mode_has_p_bits[best_mode])
  10127. {
  10128. for (uint32_t subset = 0; subset < total_subsets; subset++)
  10129. {
  10130. bc7_set_block_bits(pBlock_bytes, pbits[subset][0], 1, &cur_bit_ofs);
  10131. if (!g_bc7_mode_has_shared_p_bits[best_mode])
  10132. bc7_set_block_bits(pBlock_bytes, pbits[subset][1], 1, &cur_bit_ofs);
  10133. }
  10134. }
  10135. for (uint32_t y = 0; y < 4; y++)
  10136. {
  10137. for (uint32_t x = 0; x < 4; x++)
  10138. {
  10139. int idx = x + y * 4;
  10140. uint32_t n = pResults->m_index_selector ? get_bc7_alpha_index_size(best_mode, pResults->m_index_selector) : get_bc7_color_index_size(best_mode, pResults->m_index_selector);
  10141. if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
  10142. n--;
  10143. bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? alpha_selectors[idx] : color_selectors[idx], n, &cur_bit_ofs);
  10144. }
  10145. }
  10146. if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
  10147. {
  10148. for (uint32_t y = 0; y < 4; y++)
  10149. {
  10150. for (uint32_t x = 0; x < 4; x++)
  10151. {
  10152. int idx = x + y * 4;
  10153. uint32_t n = pResults->m_index_selector ? get_bc7_color_index_size(best_mode, pResults->m_index_selector) : get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
  10154. if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
  10155. n--;
  10156. bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? color_selectors[idx] : alpha_selectors[idx], n, &cur_bit_ofs);
  10157. }
  10158. }
  10159. }
  10160. assert(cur_bit_ofs == 128);
  10161. }
  10162. // ASTC
  10163. static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize)
  10164. {
  10165. uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
  10166. assert(codesize <= 9);
  10167. if (codesize)
  10168. {
  10169. uint32_t byte_bit_offset = bit_offset & 7;
  10170. uint32_t val = code << byte_bit_offset;
  10171. uint32_t index = bit_offset >> 3;
  10172. pBuf[index] |= (uint8_t)val;
  10173. if (codesize > (8 - byte_bit_offset))
  10174. pBuf[index + 1] |= (uint8_t)(val >> 8);
  10175. bit_offset += codesize;
  10176. }
  10177. }
  10178. void pack_astc_solid_block(void* pDst_block, const color32& color)
  10179. {
  10180. uint32_t r = color[0], g = color[1], b = color[2];
  10181. uint32_t a = color[3];
  10182. uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
  10183. uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
  10184. pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
  10185. pOutput[1] = 0xffffffff;
  10186. pOutput[2] = 0;
  10187. pOutput[3] = 0;
  10188. int bit_pos = 64;
  10189. astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, r | (r << 8), 16);
  10190. astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, g | (g << 8), 16);
  10191. astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, b | (b << 8), 16);
  10192. astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, a | (a << 8), 16);
  10193. }
  10194. // See 23.21 https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_partition_pattern_generation
  10195. #ifdef _DEBUG
  10196. static inline uint32_t astc_hash52(uint32_t v)
  10197. {
  10198. uint32_t p = v;
  10199. p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
  10200. p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
  10201. p ^= p << 6; p ^= p >> 17;
  10202. return p;
  10203. }
  10204. int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block)
  10205. {
  10206. if (small_block)
  10207. {
  10208. x <<= 1; y <<= 1; z <<= 1;
  10209. }
  10210. seed += (partitioncount - 1) * 1024;
  10211. uint32_t rnum = astc_hash52(seed);
  10212. uint8_t seed1 = rnum & 0xF;
  10213. uint8_t seed2 = (rnum >> 4) & 0xF;
  10214. uint8_t seed3 = (rnum >> 8) & 0xF;
  10215. uint8_t seed4 = (rnum >> 12) & 0xF;
  10216. uint8_t seed5 = (rnum >> 16) & 0xF;
  10217. uint8_t seed6 = (rnum >> 20) & 0xF;
  10218. uint8_t seed7 = (rnum >> 24) & 0xF;
  10219. uint8_t seed8 = (rnum >> 28) & 0xF;
  10220. uint8_t seed9 = (rnum >> 18) & 0xF;
  10221. uint8_t seed10 = (rnum >> 22) & 0xF;
  10222. uint8_t seed11 = (rnum >> 26) & 0xF;
  10223. uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
  10224. seed1 *= seed1; seed2 *= seed2;
  10225. seed3 *= seed3; seed4 *= seed4;
  10226. seed5 *= seed5; seed6 *= seed6;
  10227. seed7 *= seed7; seed8 *= seed8;
  10228. seed9 *= seed9; seed10 *= seed10;
  10229. seed11 *= seed11; seed12 *= seed12;
  10230. int sh1, sh2, sh3;
  10231. if (seed & 1)
  10232. {
  10233. sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5);
  10234. }
  10235. else
  10236. {
  10237. sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5);
  10238. }
  10239. sh3 = (seed & 0x10) ? sh1 : sh2;
  10240. seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
  10241. seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
  10242. seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
  10243. int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
  10244. int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
  10245. int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
  10246. int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
  10247. a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
  10248. if (partitioncount < 4) d = 0;
  10249. if (partitioncount < 3) c = 0;
  10250. if (a >= b && a >= c && a >= d)
  10251. return 0;
  10252. else if (b >= c && b >= d)
  10253. return 1;
  10254. else if (c >= d)
  10255. return 2;
  10256. else
  10257. return 3;
  10258. }
  10259. #endif
  10260. static const uint8_t g_astc_quint_encode[125] =
  10261. {
  10262. 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
  10263. 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
  10264. 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
  10265. 126, 127, 94, 95, 62, 39, 47, 55, 63, 31
  10266. };
  10267. // Encodes 3 values to output, usable for any range that uses quints and bits
  10268. static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
  10269. {
  10270. // First extract the quints and the bits from the 3 input values
  10271. int quints = 0, bits[3];
  10272. const uint32_t bit_mask = (1 << n) - 1;
  10273. for (int i = 0; i < 3; i++)
  10274. {
  10275. static const int s_muls[3] = { 1, 5, 25 };
  10276. const int t = pValues[i] >> n;
  10277. quints += t * s_muls[i];
  10278. bits[i] = pValues[i] & bit_mask;
  10279. }
  10280. // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
  10281. // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
  10282. assert(quints < 125);
  10283. const int T = g_astc_quint_encode[quints];
  10284. // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
  10285. astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
  10286. (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
  10287. }
  10288. // Packs values using ASTC's BISE to output buffer.
  10289. static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range)
  10290. {
  10291. uint32_t temp[5] = { 0, 0, 0, 0, 0 };
  10292. const int num_bits = g_astc_bise_range_table[range][0];
  10293. int group_size = 0;
  10294. if (g_astc_bise_range_table[range][1])
  10295. group_size = 5;
  10296. else if (g_astc_bise_range_table[range][2])
  10297. group_size = 3;
  10298. if (group_size)
  10299. {
  10300. // Range has trits or quints - pack each group of 5 or 3 values
  10301. const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
  10302. for (int group_index = 0; group_index < total_groups; group_index++)
  10303. {
  10304. uint8_t vals[5] = { 0, 0, 0, 0, 0 };
  10305. const int limit = basisu::minimum(group_size, num_vals - group_index * group_size);
  10306. for (int i = 0; i < limit; i++)
  10307. vals[i] = pSrc_vals[group_index * group_size + i];
  10308. if (group_size == 5)
  10309. astc_encode_trits(temp, vals, bit_pos, num_bits);
  10310. else
  10311. astc_encode_quints(temp, vals, bit_pos, num_bits);
  10312. }
  10313. }
  10314. else
  10315. {
  10316. for (int i = 0; i < num_vals; i++)
  10317. astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
  10318. }
  10319. pDst[0] |= temp[0]; pDst[1] |= temp[1];
  10320. pDst[2] |= temp[2]; pDst[3] |= temp[3];
  10321. }
  10322. const uint32_t ASTC_BLOCK_MODE_BITS = 11;
  10323. const uint32_t ASTC_PART_BITS = 2;
  10324. const uint32_t ASTC_CEM_BITS = 4;
  10325. const uint32_t ASTC_PARTITION_INDEX_BITS = 10;
  10326. const uint32_t ASTC_CCS_BITS = 2;
  10327. const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] = { 0x242, 0x42, 0x53, 0x42, 0x42, 0x53, 0x442, 0x42, 0, 0x42, 0x242, 0x442, 0x53, 0x441, 0x42, 0x242, 0x42, 0x442, 0x253 };
  10328. bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode)
  10329. {
  10330. assert(uastc_mode < TOTAL_UASTC_MODES);
  10331. uint8_t* pDst_bytes = reinterpret_cast<uint8_t*>(pDst);
  10332. const int total_weights = pBlock->m_dual_plane ? 32 : 16;
  10333. // Set mode bits - see Table 146-147
  10334. uint32_t mode = g_uastc_mode_astc_block_mode[uastc_mode];
  10335. pDst_bytes[0] = (uint8_t)mode;
  10336. pDst_bytes[1] = (uint8_t)(mode >> 8);
  10337. memset(pDst_bytes + 2, 0, 16 - 2);
  10338. int bit_pos = ASTC_BLOCK_MODE_BITS;
  10339. // We only support 1-5 bit weight indices
  10340. assert(!g_astc_bise_range_table[pBlock->m_weight_range][1] && !g_astc_bise_range_table[pBlock->m_weight_range][2]);
  10341. const int bits_per_weight = g_astc_bise_range_table[pBlock->m_weight_range][0];
  10342. // See table 143 - PART
  10343. astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_subsets - 1, ASTC_PART_BITS);
  10344. if (pBlock->m_subsets == 1)
  10345. astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_cem, ASTC_CEM_BITS);
  10346. else
  10347. {
  10348. // See table 145
  10349. astc_set_bits(pDst, bit_pos, pBlock->m_partition_seed, ASTC_PARTITION_INDEX_BITS);
  10350. // Table 150 - we assume all CEM's are equal, so write 2 0's along with the CEM
  10351. astc_set_bits_1_to_9(pDst, bit_pos, (pBlock->m_cem << 2) & 63, ASTC_CEM_BITS + 2);
  10352. }
  10353. if (pBlock->m_dual_plane)
  10354. {
  10355. const int total_weight_bits = total_weights * bits_per_weight;
  10356. // See Illegal Encodings 23.24
  10357. // https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_illegal_encodings
  10358. assert((total_weight_bits >= 24) && (total_weight_bits <= 96));
  10359. int ccs_bit_pos = 128 - total_weight_bits - ASTC_CCS_BITS;
  10360. astc_set_bits_1_to_9(pDst, ccs_bit_pos, pBlock->m_ccs, ASTC_CCS_BITS);
  10361. }
  10362. const int num_cem_pairs = (1 + (pBlock->m_cem >> 2)) * pBlock->m_subsets;
  10363. assert(num_cem_pairs <= 9);
  10364. astc_pack_bise(pDst, pBlock->m_endpoints, bit_pos, num_cem_pairs * 2, g_uastc_mode_endpoint_ranges[uastc_mode]);
  10365. // Write the weight bits in reverse bit order.
  10366. switch (bits_per_weight)
  10367. {
  10368. case 1:
  10369. {
  10370. const uint32_t N = 1;
  10371. for (int i = 0; i < total_weights; i++)
  10372. {
  10373. const uint32_t ofs = 128 - N - i;
  10374. assert((ofs >> 3) < 16);
  10375. pDst_bytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
  10376. }
  10377. break;
  10378. }
  10379. case 2:
  10380. {
  10381. const uint32_t N = 2;
  10382. for (int i = 0; i < total_weights; i++)
  10383. {
  10384. static const uint8_t s_reverse_bits2[4] = { 0, 2, 1, 3 };
  10385. const uint32_t ofs = 128 - N - (i * N);
  10386. assert((ofs >> 3) < 16);
  10387. pDst_bytes[ofs >> 3] |= (s_reverse_bits2[pBlock->m_weights[i]] << (ofs & 7));
  10388. }
  10389. break;
  10390. }
  10391. case 3:
  10392. {
  10393. const uint32_t N = 3;
  10394. for (int i = 0; i < total_weights; i++)
  10395. {
  10396. static const uint8_t s_reverse_bits3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
  10397. const uint32_t ofs = 128 - N - (i * N);
  10398. const uint32_t rev = s_reverse_bits3[pBlock->m_weights[i]] << (ofs & 7);
  10399. uint32_t index = ofs >> 3;
  10400. assert(index < 16);
  10401. pDst_bytes[index++] |= rev & 0xFF;
  10402. if (index < 16)
  10403. pDst_bytes[index++] |= (rev >> 8);
  10404. }
  10405. break;
  10406. }
  10407. case 4:
  10408. {
  10409. const uint32_t N = 4;
  10410. for (int i = 0; i < total_weights; i++)
  10411. {
  10412. static const uint8_t s_reverse_bits4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
  10413. const int ofs = 128 - N - (i * N);
  10414. assert(ofs >= 0 && (ofs >> 3) < 16);
  10415. pDst_bytes[ofs >> 3] |= (s_reverse_bits4[pBlock->m_weights[i]] << (ofs & 7));
  10416. }
  10417. break;
  10418. }
  10419. case 5:
  10420. {
  10421. const uint32_t N = 5;
  10422. for (int i = 0; i < total_weights; i++)
  10423. {
  10424. static const uint8_t s_reverse_bits5[32] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31 };
  10425. const uint32_t ofs = 128 - N - (i * N);
  10426. const uint32_t rev = s_reverse_bits5[pBlock->m_weights[i]] << (ofs & 7);
  10427. uint32_t index = ofs >> 3;
  10428. assert(index < 16);
  10429. pDst_bytes[index++] |= rev & 0xFF;
  10430. if (index < 16)
  10431. pDst_bytes[index++] |= (rev >> 8);
  10432. }
  10433. break;
  10434. }
  10435. default:
  10436. assert(0);
  10437. break;
  10438. }
  10439. return true;
  10440. }
  10441. const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern)
  10442. {
  10443. const uint8_t* pSubset_anchor_indices = g_zero_pattern;
  10444. pPartition_pattern = g_zero_pattern;
  10445. if (subsets >= 2)
  10446. {
  10447. if (subsets == 3)
  10448. {
  10449. pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
  10450. pSubset_anchor_indices = &g_astc_bc7_pattern3_anchors[common_pattern][0];
  10451. }
  10452. else if (mode == 7)
  10453. {
  10454. pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
  10455. pSubset_anchor_indices = &g_bc7_3_astc2_patterns2_anchors[common_pattern][0];
  10456. }
  10457. else
  10458. {
  10459. pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
  10460. pSubset_anchor_indices = &g_astc_bc7_pattern2_anchors[common_pattern][0];
  10461. }
  10462. }
  10463. return pSubset_anchor_indices;
  10464. }
  10465. static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset)
  10466. {
  10467. uint32_t byte_bits = pBuf[bit_offset >> 3] >> (bit_offset & 7);
  10468. bit_offset += 1;
  10469. return byte_bits & 1;
  10470. }
  10471. static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
  10472. {
  10473. assert(codesize <= 9);
  10474. if (!codesize)
  10475. return 0;
  10476. if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
  10477. {
  10478. const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
  10479. uint32_t byte_bit_offset = bit_offset & 7U;
  10480. uint32_t bits = pBytes[0] >> byte_bit_offset;
  10481. uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
  10482. uint32_t bits_remaining = codesize - bits_read;
  10483. if (bits_remaining)
  10484. bits |= ((uint32_t)pBytes[1]) << bits_read;
  10485. bit_offset += codesize;
  10486. return bits & ((1U << codesize) - 1U);
  10487. }
  10488. uint32_t byte_bit_offset = bit_offset & 7U;
  10489. const uint16_t w = *(const uint16_t *)(&pBuf[bit_offset >> 3U]);
  10490. bit_offset += codesize;
  10491. return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
  10492. }
  10493. inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
  10494. {
  10495. assert(codesize <= 64U);
  10496. uint64_t bits = 0;
  10497. uint32_t total_bits = 0;
  10498. while (total_bits < codesize)
  10499. {
  10500. uint32_t byte_bit_offset = bit_offset & 7U;
  10501. uint32_t bits_to_read = basisu::minimum<int>(codesize - total_bits, 8U - byte_bit_offset);
  10502. uint32_t byte_bits = pBuf[bit_offset >> 3U] >> byte_bit_offset;
  10503. byte_bits &= ((1U << bits_to_read) - 1U);
  10504. bits |= ((uint64_t)(byte_bits) << total_bits);
  10505. total_bits += bits_to_read;
  10506. bit_offset += bits_to_read;
  10507. }
  10508. return bits;
  10509. }
  10510. static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
  10511. {
  10512. assert(codesize <= 9);
  10513. if (!codesize)
  10514. return 0;
  10515. assert(bit_offset < 112);
  10516. if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
  10517. {
  10518. const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
  10519. uint32_t byte_bit_offset = bit_offset & 7U;
  10520. uint32_t bits = pBytes[0] >> byte_bit_offset;
  10521. uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
  10522. uint32_t bits_remaining = codesize - bits_read;
  10523. if (bits_remaining)
  10524. bits |= ((uint32_t)pBytes[1]) << bits_read;
  10525. bit_offset += codesize;
  10526. return bits & ((1U << codesize) - 1U);
  10527. }
  10528. else
  10529. {
  10530. uint32_t byte_bit_offset = bit_offset & 7U;
  10531. const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
  10532. bit_offset += codesize;
  10533. return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
  10534. }
  10535. }
  10536. bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
  10537. {
  10538. //memset(&unpacked, 0, sizeof(unpacked));
  10539. #if 0
  10540. uint8_t table[128];
  10541. memset(table, 0xFF, sizeof(table));
  10542. {
  10543. for (uint32_t mode = 0; mode <= TOTAL_UASTC_MODES; mode++)
  10544. {
  10545. const uint32_t code = g_uastc_mode_huff_codes[mode][0];
  10546. const uint32_t codesize = g_uastc_mode_huff_codes[mode][1];
  10547. table[code] = mode;
  10548. uint32_t bits_left = 7 - codesize;
  10549. for (uint32_t i = 0; i < (1 << bits_left); i++)
  10550. table[code | (i << codesize)] = mode;
  10551. }
  10552. for (uint32_t i = 0; i < 128; i++)
  10553. printf("%u,", table[i]);
  10554. exit(0);
  10555. }
  10556. #endif
  10557. const int mode = g_uastc_huff_modes[blk.m_bytes[0] & 127];
  10558. if (mode >= (int)TOTAL_UASTC_MODES)
  10559. return false;
  10560. unpacked.m_mode = mode;
  10561. unpacked.m_common_pattern = 0;
  10562. uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
  10563. if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
  10564. {
  10565. unpacked.m_solid_color.r = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
  10566. unpacked.m_solid_color.g = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
  10567. unpacked.m_solid_color.b = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
  10568. unpacked.m_solid_color.a = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
  10569. if (read_hints)
  10570. {
  10571. unpacked.m_etc1_flip = false;
  10572. unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
  10573. unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
  10574. unpacked.m_etc1_inten1 = 0;
  10575. unpacked.m_etc1_selector = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
  10576. unpacked.m_etc1_r = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
  10577. unpacked.m_etc1_g = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
  10578. unpacked.m_etc1_b = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
  10579. unpacked.m_etc1_bias = 0;
  10580. unpacked.m_etc2_hints = 0;
  10581. }
  10582. return true;
  10583. }
  10584. if (read_hints)
  10585. {
  10586. if (g_uastc_mode_has_bc1_hint0[mode])
  10587. unpacked.m_bc1_hint0 = read_bit(blk.m_bytes, bit_ofs) != 0;
  10588. else
  10589. unpacked.m_bc1_hint0 = false;
  10590. if (g_uastc_mode_has_bc1_hint1[mode])
  10591. unpacked.m_bc1_hint1 = read_bit(blk.m_bytes, bit_ofs) != 0;
  10592. else
  10593. unpacked.m_bc1_hint1 = false;
  10594. unpacked.m_etc1_flip = read_bit(blk.m_bytes, bit_ofs) != 0;
  10595. unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
  10596. unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
  10597. unpacked.m_etc1_inten1 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
  10598. if (g_uastc_mode_has_etc1_bias[mode])
  10599. unpacked.m_etc1_bias = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
  10600. else
  10601. unpacked.m_etc1_bias = 0;
  10602. if (g_uastc_mode_has_alpha[mode])
  10603. {
  10604. unpacked.m_etc2_hints = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
  10605. //assert(unpacked.m_etc2_hints > 0);
  10606. }
  10607. else
  10608. unpacked.m_etc2_hints = 0;
  10609. }
  10610. else
  10611. bit_ofs += g_uastc_mode_total_hint_bits[mode];
  10612. uint32_t subsets = 1;
  10613. switch (mode)
  10614. {
  10615. case 2:
  10616. case 4:
  10617. case 7:
  10618. case 9:
  10619. case 16:
  10620. unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
  10621. subsets = 2;
  10622. break;
  10623. case 3:
  10624. unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 4);
  10625. subsets = 3;
  10626. break;
  10627. default:
  10628. break;
  10629. }
  10630. uint32_t part_seed = 0;
  10631. switch (mode)
  10632. {
  10633. case 2:
  10634. case 4:
  10635. case 9:
  10636. case 16:
  10637. if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS2)
  10638. return false;
  10639. part_seed = g_astc_bc7_common_partitions2[unpacked.m_common_pattern].m_astc;
  10640. break;
  10641. case 3:
  10642. if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS3)
  10643. return false;
  10644. part_seed = g_astc_bc7_common_partitions3[unpacked.m_common_pattern].m_astc;
  10645. break;
  10646. case 7:
  10647. if (unpacked.m_common_pattern >= TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS)
  10648. return false;
  10649. part_seed = g_bc7_3_astc2_common_partitions[unpacked.m_common_pattern].m_astc2;
  10650. break;
  10651. default:
  10652. break;
  10653. }
  10654. uint32_t total_planes = 1;
  10655. switch (mode)
  10656. {
  10657. case 6:
  10658. case 11:
  10659. case 13:
  10660. unpacked.m_astc.m_ccs = (int)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
  10661. total_planes = 2;
  10662. break;
  10663. case 17:
  10664. unpacked.m_astc.m_ccs = 3;
  10665. total_planes = 2;
  10666. break;
  10667. default:
  10668. break;
  10669. }
  10670. unpacked.m_astc.m_dual_plane = (total_planes == 2);
  10671. unpacked.m_astc.m_subsets = subsets;
  10672. unpacked.m_astc.m_partition_seed = part_seed;
  10673. const uint32_t total_comps = g_uastc_mode_comps[mode];
  10674. const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
  10675. unpacked.m_astc.m_weight_range = g_uastc_mode_weight_ranges[mode];
  10676. const uint32_t total_values = total_comps * 2 * subsets;
  10677. const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
  10678. const uint32_t cem = g_uastc_mode_cem[mode];
  10679. unpacked.m_astc.m_cem = cem;
  10680. const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0];
  10681. const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1];
  10682. const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2];
  10683. uint32_t total_tqs = 0;
  10684. uint32_t bundle_size = 0, mul = 0;
  10685. if (ep_trits)
  10686. {
  10687. total_tqs = (total_values + 4) / 5;
  10688. bundle_size = 5;
  10689. mul = 3;
  10690. }
  10691. else if (ep_quints)
  10692. {
  10693. total_tqs = (total_values + 2) / 3;
  10694. bundle_size = 3;
  10695. mul = 5;
  10696. }
  10697. uint32_t tq_values[8];
  10698. for (uint32_t i = 0; i < total_tqs; i++)
  10699. {
  10700. uint32_t num_bits = ep_trits ? 8 : 7;
  10701. if (i == (total_tqs - 1))
  10702. {
  10703. uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size;
  10704. if (ep_trits)
  10705. {
  10706. switch (num_remaining)
  10707. {
  10708. case 1: num_bits = 2; break;
  10709. case 2: num_bits = 4; break;
  10710. case 3: num_bits = 5; break;
  10711. case 4: num_bits = 7; break;
  10712. default: break;
  10713. }
  10714. }
  10715. else if (ep_quints)
  10716. {
  10717. switch (num_remaining)
  10718. {
  10719. case 1: num_bits = 3; break;
  10720. case 2: num_bits = 5; break;
  10721. default: break;
  10722. }
  10723. }
  10724. }
  10725. tq_values[i] = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, num_bits);
  10726. } // i
  10727. uint32_t accum = 0;
  10728. uint32_t accum_remaining = 0;
  10729. uint32_t next_tq_index = 0;
  10730. for (uint32_t i = 0; i < total_values; i++)
  10731. {
  10732. uint32_t value = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, ep_bits);
  10733. if (total_tqs)
  10734. {
  10735. if (!accum_remaining)
  10736. {
  10737. assert(next_tq_index < total_tqs);
  10738. accum = tq_values[next_tq_index++];
  10739. accum_remaining = bundle_size;
  10740. }
  10741. // TODO: Optimize with tables
  10742. uint32_t v = accum % mul;
  10743. accum /= mul;
  10744. accum_remaining--;
  10745. value |= (v << ep_bits);
  10746. }
  10747. unpacked.m_astc.m_endpoints[i] = (uint8_t)value;
  10748. }
  10749. const uint8_t* pPartition_pattern;
  10750. const uint8_t* pSubset_anchor_indices = get_anchor_indices(subsets, mode, unpacked.m_common_pattern, pPartition_pattern);
  10751. #ifdef _DEBUG
  10752. for (uint32_t i = 0; i < 16; i++)
  10753. assert(pPartition_pattern[i] == astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true));
  10754. for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
  10755. {
  10756. uint32_t anchor_index = 0;
  10757. for (uint32_t i = 0; i < 16; i++)
  10758. {
  10759. if (pPartition_pattern[i] == subset_index)
  10760. {
  10761. anchor_index = i;
  10762. break;
  10763. }
  10764. }
  10765. assert(pSubset_anchor_indices[subset_index] == anchor_index);
  10766. }
  10767. #endif
  10768. #if 0
  10769. const uint32_t total_planes_shift = total_planes - 1;
  10770. for (uint32_t i = 0; i < 16 * total_planes; i++)
  10771. {
  10772. uint32_t num_bits = weight_bits;
  10773. for (uint32_t s = 0; s < subsets; s++)
  10774. {
  10775. if (pSubset_anchor_indices[s] == (i >> total_planes_shift))
  10776. {
  10777. num_bits--;
  10778. break;
  10779. }
  10780. }
  10781. unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, num_bits);
  10782. }
  10783. #endif
  10784. if (mode == 18)
  10785. {
  10786. // Mode 18 is the only mode with more than 64 weight bits.
  10787. for (uint32_t i = 0; i < 16; i++)
  10788. unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, i ? weight_bits : (weight_bits - 1));
  10789. }
  10790. else
  10791. {
  10792. // All other modes have <= 64 weight bits.
  10793. uint64_t bits;
  10794. // Read the weight bits
  10795. if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
  10796. bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum<int>(64, 128 - (int)bit_ofs));
  10797. else
  10798. {
  10799. bits = blk.m_dwords[2];
  10800. bits |= (((uint64_t)blk.m_dwords[3]) << 32U);
  10801. if (bit_ofs >= 64U)
  10802. bits >>= (bit_ofs - 64U);
  10803. else
  10804. {
  10805. assert(bit_ofs >= 56U);
  10806. uint32_t bits_needed = 64U - bit_ofs;
  10807. bits <<= bits_needed;
  10808. bits |= (blk.m_bytes[7] >> (8U - bits_needed));
  10809. }
  10810. }
  10811. bit_ofs = 0;
  10812. const uint32_t mask = (1U << weight_bits) - 1U;
  10813. const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U;
  10814. if (total_planes == 2)
  10815. {
  10816. // Dual plane modes always have a single subset, and the first 2 weights are anchors.
  10817. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
  10818. bit_ofs += (weight_bits - 1);
  10819. unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
  10820. bit_ofs += (weight_bits - 1);
  10821. for (uint32_t i = 2; i < 32; i++)
  10822. {
  10823. unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
  10824. bit_ofs += weight_bits;
  10825. }
  10826. }
  10827. else
  10828. {
  10829. if (subsets == 1)
  10830. {
  10831. // Specialize the single subset case.
  10832. if (weight_bits == 4)
  10833. {
  10834. assert(bit_ofs == 0);
  10835. // Specialize the most common case: 4-bit weights.
  10836. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7);
  10837. unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15);
  10838. unpacked.m_astc.m_weights[2] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 1)) & 15);
  10839. unpacked.m_astc.m_weights[3] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 2)) & 15);
  10840. unpacked.m_astc.m_weights[4] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 3)) & 15);
  10841. unpacked.m_astc.m_weights[5] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 4)) & 15);
  10842. unpacked.m_astc.m_weights[6] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 5)) & 15);
  10843. unpacked.m_astc.m_weights[7] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 6)) & 15);
  10844. unpacked.m_astc.m_weights[8] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 7)) & 15);
  10845. unpacked.m_astc.m_weights[9] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 8)) & 15);
  10846. unpacked.m_astc.m_weights[10] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 9)) & 15);
  10847. unpacked.m_astc.m_weights[11] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 10)) & 15);
  10848. unpacked.m_astc.m_weights[12] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 11)) & 15);
  10849. unpacked.m_astc.m_weights[13] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 12)) & 15);
  10850. unpacked.m_astc.m_weights[14] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 13)) & 15);
  10851. unpacked.m_astc.m_weights[15] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 14)) & 15);
  10852. }
  10853. else
  10854. {
  10855. // First weight is always an anchor.
  10856. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
  10857. bit_ofs += (weight_bits - 1);
  10858. for (uint32_t i = 1; i < 16; i++)
  10859. {
  10860. unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
  10861. bit_ofs += weight_bits;
  10862. }
  10863. }
  10864. }
  10865. else
  10866. {
  10867. const uint32_t a0 = pSubset_anchor_indices[0], a1 = pSubset_anchor_indices[1], a2 = pSubset_anchor_indices[2];
  10868. for (uint32_t i = 0; i < 16; i++)
  10869. {
  10870. if ((i == a0) || (i == a1) || (i == a2))
  10871. {
  10872. unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
  10873. bit_ofs += (weight_bits - 1);
  10874. }
  10875. else
  10876. {
  10877. unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
  10878. bit_ofs += weight_bits;
  10879. }
  10880. }
  10881. }
  10882. }
  10883. }
  10884. if ((blue_contract_check) && (total_comps >= 3))
  10885. {
  10886. // We only need to disable ASTC Blue Contraction when we'll be packing to ASTC. The other transcoders don't care.
  10887. bool invert_subset[3] = { false, false, false };
  10888. bool any_flag = false;
  10889. for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
  10890. {
  10891. const int s0 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 0]].m_unquant +
  10892. g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 2]].m_unquant +
  10893. g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 4]].m_unquant;
  10894. const int s1 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 1]].m_unquant +
  10895. g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 3]].m_unquant +
  10896. g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 5]].m_unquant;
  10897. if (s1 < s0)
  10898. {
  10899. for (uint32_t c = 0; c < total_comps; c++)
  10900. std::swap(unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 0], unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 1]);
  10901. invert_subset[subset_index] = true;
  10902. any_flag = true;
  10903. }
  10904. }
  10905. if (any_flag)
  10906. {
  10907. const uint32_t weight_mask = (1 << weight_bits) - 1;
  10908. for (uint32_t i = 0; i < 16; i++)
  10909. {
  10910. uint32_t subset = pPartition_pattern[i];
  10911. if (invert_subset[subset])
  10912. {
  10913. unpacked.m_astc.m_weights[i * total_planes] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes]);
  10914. if (total_planes == 2)
  10915. unpacked.m_astc.m_weights[i * total_planes + 1] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes + 1]);
  10916. }
  10917. }
  10918. }
  10919. }
  10920. return true;
  10921. }
  10922. static const uint32_t* g_astc_weight_tables[6] = { nullptr, g_bc7_weights1, g_bc7_weights2, g_bc7_weights3, g_astc_weights4, g_astc_weights5 };
  10923. bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb)
  10924. {
  10925. if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
  10926. {
  10927. for (uint32_t i = 0; i < 16; i++)
  10928. pPixels[i] = solid_color;
  10929. return true;
  10930. }
  10931. color32 endpoints[3][2];
  10932. const uint32_t total_subsets = g_uastc_mode_subsets[mode];
  10933. const uint32_t total_comps = basisu::minimum<uint32_t>(4U, g_uastc_mode_comps[mode]);
  10934. const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
  10935. const uint32_t total_planes = g_uastc_mode_planes[mode];
  10936. const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
  10937. const uint32_t weight_levels = 1 << weight_bits;
  10938. for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
  10939. {
  10940. if (total_comps == 2)
  10941. {
  10942. const uint32_t ll = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 0]].m_unquant;
  10943. const uint32_t lh = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 1]].m_unquant;
  10944. const uint32_t al = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 0]].m_unquant;
  10945. const uint32_t ah = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 1]].m_unquant;
  10946. endpoints[subset_index][0].set_noclamp_rgba(ll, ll, ll, al);
  10947. endpoints[subset_index][1].set_noclamp_rgba(lh, lh, lh, ah);
  10948. }
  10949. else
  10950. {
  10951. for (uint32_t comp_index = 0; comp_index < total_comps; comp_index++)
  10952. {
  10953. endpoints[subset_index][0][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 0]].m_unquant;
  10954. endpoints[subset_index][1][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 1]].m_unquant;
  10955. }
  10956. for (uint32_t comp_index = total_comps; comp_index < 4; comp_index++)
  10957. {
  10958. endpoints[subset_index][0][comp_index] = 255;
  10959. endpoints[subset_index][1][comp_index] = 255;
  10960. }
  10961. }
  10962. }
  10963. color32 block_colors[3][32];
  10964. const uint32_t* pWeights = g_astc_weight_tables[weight_bits];
  10965. for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
  10966. {
  10967. for (uint32_t l = 0; l < weight_levels; l++)
  10968. {
  10969. if (total_comps == 2)
  10970. {
  10971. const uint8_t lc = (uint8_t)astc_interpolate(endpoints[subset_index][0][0], endpoints[subset_index][1][0], pWeights[l], srgb);
  10972. const uint8_t ac = (uint8_t)astc_interpolate(endpoints[subset_index][0][3], endpoints[subset_index][1][3], pWeights[l], srgb);
  10973. block_colors[subset_index][l].set(lc, lc, lc, ac);
  10974. }
  10975. else
  10976. {
  10977. uint32_t comp_index;
  10978. for (comp_index = 0; comp_index < total_comps; comp_index++)
  10979. block_colors[subset_index][l][comp_index] = (uint8_t)astc_interpolate(endpoints[subset_index][0][comp_index], endpoints[subset_index][1][comp_index], pWeights[l], srgb);
  10980. for (; comp_index < 4; comp_index++)
  10981. block_colors[subset_index][l][comp_index] = 255;
  10982. }
  10983. }
  10984. }
  10985. const uint8_t* pPartition_pattern = g_zero_pattern;
  10986. if (total_subsets >= 2)
  10987. {
  10988. if (total_subsets == 3)
  10989. pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
  10990. else if (mode == 7)
  10991. pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
  10992. else
  10993. pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
  10994. #ifdef _DEBUG
  10995. for (uint32_t i = 0; i < 16; i++)
  10996. {
  10997. assert(pPartition_pattern[i] == (uint8_t)astc_compute_texel_partition(astc.m_partition_seed, i & 3, i >> 2, 0, total_subsets, true));
  10998. }
  10999. #endif
  11000. }
  11001. if (total_planes == 1)
  11002. {
  11003. if (total_subsets == 1)
  11004. {
  11005. for (uint32_t i = 0; i < 16; i++)
  11006. {
  11007. assert(astc.m_weights[i] < weight_levels);
  11008. pPixels[i] = block_colors[0][astc.m_weights[i]];
  11009. }
  11010. }
  11011. else
  11012. {
  11013. for (uint32_t i = 0; i < 16; i++)
  11014. {
  11015. assert(astc.m_weights[i] < weight_levels);
  11016. pPixels[i] = block_colors[pPartition_pattern[i]][astc.m_weights[i]];
  11017. }
  11018. }
  11019. }
  11020. else
  11021. {
  11022. assert(total_subsets == 1);
  11023. for (uint32_t i = 0; i < 16; i++)
  11024. {
  11025. const uint32_t subset_index = 0; // pPartition_pattern[i];
  11026. const uint32_t weight_index0 = astc.m_weights[i * 2];
  11027. const uint32_t weight_index1 = astc.m_weights[i * 2 + 1];
  11028. assert(weight_index0 < weight_levels && weight_index1 < weight_levels);
  11029. color32& c = pPixels[i];
  11030. for (uint32_t comp = 0; comp < 4; comp++)
  11031. {
  11032. if ((int)comp == astc.m_ccs)
  11033. c[comp] = block_colors[subset_index][weight_index1][comp];
  11034. else
  11035. c[comp] = block_colors[subset_index][weight_index0][comp];
  11036. }
  11037. }
  11038. }
  11039. return true;
  11040. }
  11041. bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb)
  11042. {
  11043. return unpack_uastc(unpacked_blk.m_mode, unpacked_blk.m_common_pattern, unpacked_blk.m_solid_color, unpacked_blk.m_astc, pPixels, srgb);
  11044. }
  11045. bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb)
  11046. {
  11047. unpacked_uastc_block unpacked_blk;
  11048. if (!unpack_uastc(blk, unpacked_blk, false, false))
  11049. return false;
  11050. return unpack_uastc(unpacked_blk, pPixels, srgb);
  11051. }
  11052. // Determines the best shared pbits to use to encode xl/xh
  11053. static void determine_shared_pbits(
  11054. uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
  11055. color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
  11056. {
  11057. const uint32_t total_bits = comp_bits + 1;
  11058. assert(total_bits >= 4 && total_bits <= 8);
  11059. const int iscalep = (1 << total_bits) - 1;
  11060. const float scalep = (float)iscalep;
  11061. float best_err = 1e+9f;
  11062. for (int p = 0; p < 2; p++)
  11063. {
  11064. color_quad_u8 xMinColor, xMaxColor;
  11065. for (uint32_t c = 0; c < 4; c++)
  11066. {
  11067. xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
  11068. xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
  11069. }
  11070. color_quad_u8 scaledLow, scaledHigh;
  11071. for (uint32_t i = 0; i < 4; i++)
  11072. {
  11073. scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
  11074. scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
  11075. assert(scaledLow.m_c[i] <= 255);
  11076. scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
  11077. scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
  11078. assert(scaledHigh.m_c[i] <= 255);
  11079. }
  11080. float err = 0;
  11081. for (uint32_t i = 0; i < total_comps; i++)
  11082. err += basisu::squaref((scaledLow.m_c[i] / 255.0f) - xl[i]) + basisu::squaref((scaledHigh.m_c[i] / 255.0f) - xh[i]);
  11083. if (err < best_err)
  11084. {
  11085. best_err = err;
  11086. best_pbits[0] = p;
  11087. best_pbits[1] = p;
  11088. for (uint32_t j = 0; j < 4; j++)
  11089. {
  11090. bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1;
  11091. bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1;
  11092. }
  11093. }
  11094. }
  11095. }
  11096. // Determines the best unique pbits to use to encode xl/xh
  11097. static void determine_unique_pbits(
  11098. uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
  11099. color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
  11100. {
  11101. const uint32_t total_bits = comp_bits + 1;
  11102. const int iscalep = (1 << total_bits) - 1;
  11103. const float scalep = (float)iscalep;
  11104. float best_err0 = 1e+9f;
  11105. float best_err1 = 1e+9f;
  11106. for (int p = 0; p < 2; p++)
  11107. {
  11108. color_quad_u8 xMinColor, xMaxColor;
  11109. for (uint32_t c = 0; c < 4; c++)
  11110. {
  11111. xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
  11112. xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
  11113. }
  11114. color_quad_u8 scaledLow, scaledHigh;
  11115. for (uint32_t i = 0; i < 4; i++)
  11116. {
  11117. scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
  11118. scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
  11119. assert(scaledLow.m_c[i] <= 255);
  11120. scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
  11121. scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
  11122. assert(scaledHigh.m_c[i] <= 255);
  11123. }
  11124. float err0 = 0, err1 = 0;
  11125. for (uint32_t i = 0; i < total_comps; i++)
  11126. {
  11127. err0 += basisu::squaref(scaledLow.m_c[i] - xl[i] * 255.0f);
  11128. err1 += basisu::squaref(scaledHigh.m_c[i] - xh[i] * 255.0f);
  11129. }
  11130. if (err0 < best_err0)
  11131. {
  11132. best_err0 = err0;
  11133. best_pbits[0] = p;
  11134. bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1;
  11135. bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1;
  11136. bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1;
  11137. bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1;
  11138. }
  11139. if (err1 < best_err1)
  11140. {
  11141. best_err1 = err1;
  11142. best_pbits[1] = p;
  11143. bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1;
  11144. bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1;
  11145. bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1;
  11146. bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1;
  11147. }
  11148. }
  11149. }
  11150. bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst)
  11151. {
  11152. unpacked_uastc_block unpacked_src_blk;
  11153. if (!unpack_uastc(src_blk, unpacked_src_blk, true, false))
  11154. return false;
  11155. bool success = false;
  11156. if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
  11157. {
  11158. pack_astc_solid_block(pDst, unpacked_src_blk.m_solid_color);
  11159. success = true;
  11160. }
  11161. else
  11162. {
  11163. success = pack_astc_block(static_cast<uint32_t*>(pDst), &unpacked_src_blk.m_astc, unpacked_src_blk.m_mode);
  11164. }
  11165. return success;
  11166. }
  11167. bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk)
  11168. {
  11169. memset(&dst_blk, 0, sizeof(dst_blk));
  11170. const uint32_t mode = unpacked_src_blk.m_mode;
  11171. const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
  11172. const uint32_t total_comps = g_uastc_mode_comps[mode];
  11173. switch (mode)
  11174. {
  11175. case 0:
  11176. case 5:
  11177. case 10:
  11178. case 12:
  11179. case 14:
  11180. case 15:
  11181. case 18:
  11182. {
  11183. // MODE 0: DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 19 (192) - BC7 MODE6 RGB
  11184. // MODE 5: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 RGB
  11185. // MODE 10 DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE6
  11186. // MODE 12: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 19 (192) - BC7 MODE6
  11187. // MODE 14: DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6
  11188. // MODE 18: DualPlane: 0, WeightRange : 11 (32), Subsets : 1, CEM : 8, EndpointRange : 11 (32) - BC7 MODE6
  11189. // MODE 15: DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE6
  11190. dst_blk.m_mode = 6;
  11191. float xl[4], xh[4];
  11192. if (total_comps == 2)
  11193. {
  11194. xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
  11195. xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
  11196. xl[1] = xl[0];
  11197. xh[1] = xh[0];
  11198. xl[2] = xl[0];
  11199. xh[2] = xh[0];
  11200. xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
  11201. xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
  11202. }
  11203. else
  11204. {
  11205. xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
  11206. xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
  11207. xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4]].m_unquant / 255.0f;
  11208. xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
  11209. xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
  11210. xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5]].m_unquant / 255.0f;
  11211. if (total_comps == 4)
  11212. {
  11213. xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6]].m_unquant / 255.0f;
  11214. xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7]].m_unquant / 255.0f;
  11215. }
  11216. else
  11217. {
  11218. xl[3] = 1.0f;
  11219. xh[3] = 1.0f;
  11220. }
  11221. }
  11222. uint32_t best_pbits[2];
  11223. color_quad_u8 bestMinColor, bestMaxColor;
  11224. determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
  11225. dst_blk.m_low[0] = bestMinColor;
  11226. dst_blk.m_high[0] = bestMaxColor;
  11227. if (total_comps == 3)
  11228. {
  11229. dst_blk.m_low[0].m_c[3] = 127;
  11230. dst_blk.m_high[0].m_c[3] = 127;
  11231. }
  11232. dst_blk.m_pbits[0][0] = best_pbits[0];
  11233. dst_blk.m_pbits[0][1] = best_pbits[1];
  11234. if (mode == 18)
  11235. {
  11236. const uint8_t s_bc7_5_to_4[32] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 };
  11237. for (uint32_t i = 0; i < 16; i++)
  11238. dst_blk.m_selectors[i] = s_bc7_5_to_4[unpacked_src_blk.m_astc.m_weights[i]];
  11239. }
  11240. else if (mode == 14)
  11241. {
  11242. const uint8_t s_bc7_2_to_4[4] = { 0, 5, 10, 15 };
  11243. for (uint32_t i = 0; i < 16; i++)
  11244. dst_blk.m_selectors[i] = s_bc7_2_to_4[unpacked_src_blk.m_astc.m_weights[i]];
  11245. }
  11246. else if ((mode == 5) || (mode == 12))
  11247. {
  11248. const uint8_t s_bc7_3_to_4[8] = { 0, 2, 4, 6, 9, 11, 13, 15 };
  11249. for (uint32_t i = 0; i < 16; i++)
  11250. dst_blk.m_selectors[i] = s_bc7_3_to_4[unpacked_src_blk.m_astc.m_weights[i]];
  11251. }
  11252. else
  11253. {
  11254. for (uint32_t i = 0; i < 16; i++)
  11255. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
  11256. }
  11257. break;
  11258. }
  11259. case 1:
  11260. {
  11261. // DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE3
  11262. // Mode 1 uses endpoint range 20 - no need to use ASTC dequant tables.
  11263. dst_blk.m_mode = 3;
  11264. float xl[4], xh[4];
  11265. xl[0] = unpacked_src_blk.m_astc.m_endpoints[0] / 255.0f;
  11266. xl[1] = unpacked_src_blk.m_astc.m_endpoints[2] / 255.0f;
  11267. xl[2] = unpacked_src_blk.m_astc.m_endpoints[4] / 255.0f;
  11268. xl[3] = 1.0f;
  11269. xh[0] = unpacked_src_blk.m_astc.m_endpoints[1] / 255.0f;
  11270. xh[1] = unpacked_src_blk.m_astc.m_endpoints[3] / 255.0f;
  11271. xh[2] = unpacked_src_blk.m_astc.m_endpoints[5] / 255.0f;
  11272. xh[3] = 1.0f;
  11273. uint32_t best_pbits[2];
  11274. color_quad_u8 bestMinColor, bestMaxColor;
  11275. memset(&bestMinColor, 0, sizeof(bestMinColor));
  11276. memset(&bestMaxColor, 0, sizeof(bestMaxColor));
  11277. determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
  11278. for (uint32_t i = 0; i < 3; i++)
  11279. {
  11280. dst_blk.m_low[0].m_c[i] = bestMinColor.m_c[i];
  11281. dst_blk.m_high[0].m_c[i] = bestMaxColor.m_c[i];
  11282. dst_blk.m_low[1].m_c[i] = bestMinColor.m_c[i];
  11283. dst_blk.m_high[1].m_c[i] = bestMaxColor.m_c[i];
  11284. }
  11285. dst_blk.m_pbits[0][0] = best_pbits[0];
  11286. dst_blk.m_pbits[0][1] = best_pbits[1];
  11287. dst_blk.m_pbits[1][0] = best_pbits[0];
  11288. dst_blk.m_pbits[1][1] = best_pbits[1];
  11289. for (uint32_t i = 0; i < 16; i++)
  11290. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
  11291. break;
  11292. }
  11293. case 2:
  11294. {
  11295. // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1
  11296. dst_blk.m_mode = 1;
  11297. dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
  11298. const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
  11299. float xl[4], xh[4];
  11300. xl[3] = 1.0f;
  11301. xh[3] = 1.0f;
  11302. for (uint32_t subset = 0; subset < 2; subset++)
  11303. {
  11304. for (uint32_t i = 0; i < 3; i++)
  11305. {
  11306. uint32_t v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6];
  11307. v = (v << 4) | v;
  11308. xl[i] = v / 255.0f;
  11309. v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1];
  11310. v = (v << 4) | v;
  11311. xh[i] = v / 255.0f;
  11312. }
  11313. uint32_t best_pbits[2] = { 0, 0 };
  11314. color_quad_u8 bestMinColor, bestMaxColor;
  11315. memset(&bestMinColor, 0, sizeof(bestMinColor));
  11316. memset(&bestMaxColor, 0, sizeof(bestMaxColor));
  11317. determine_shared_pbits(3, 6, xl, xh, bestMinColor, bestMaxColor, best_pbits);
  11318. const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
  11319. for (uint32_t i = 0; i < 3; i++)
  11320. {
  11321. dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
  11322. dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
  11323. }
  11324. dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
  11325. } // subset
  11326. for (uint32_t i = 0; i < 16; i++)
  11327. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
  11328. break;
  11329. }
  11330. case 3:
  11331. {
  11332. // DualPlane: 0, WeightRange : 2 (4), Subsets : 3, EndpointRange : 7 (12) - BC7 MODE2
  11333. dst_blk.m_mode = 2;
  11334. dst_blk.m_partition = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_bc7;
  11335. const uint32_t perm = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_astc_to_bc7_perm;
  11336. for (uint32_t subset = 0; subset < 3; subset++)
  11337. {
  11338. for (uint32_t comp = 0; comp < 3; comp++)
  11339. {
  11340. uint32_t lo = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 0 + subset * 6]].m_unquant;
  11341. uint32_t hi = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 1 + subset * 6]].m_unquant;
  11342. // TODO: I think this can be improved by using tables like Basis Universal does with ETC1S conversion.
  11343. lo = (lo * 31 + 127) / 255;
  11344. hi = (hi * 31 + 127) / 255;
  11345. const uint32_t bc7_subset_index = g_astc_to_bc7_partition_index_perm_tables[perm][subset];
  11346. dst_blk.m_low[bc7_subset_index].m_c[comp] = (uint8_t)lo;
  11347. dst_blk.m_high[bc7_subset_index].m_c[comp] = (uint8_t)hi;
  11348. }
  11349. }
  11350. for (uint32_t i = 0; i < 16; i++)
  11351. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
  11352. break;
  11353. }
  11354. case 4:
  11355. {
  11356. // 4. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, EndpointRange: 12 (40) - BC7 MODE3
  11357. dst_blk.m_mode = 3;
  11358. dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
  11359. const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
  11360. float xl[4], xh[4];
  11361. xl[3] = 1.0f;
  11362. xh[3] = 1.0f;
  11363. for (uint32_t subset = 0; subset < 2; subset++)
  11364. {
  11365. for (uint32_t i = 0; i < 3; i++)
  11366. {
  11367. xl[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]].m_unquant / 255.0f;
  11368. xh[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]].m_unquant / 255.0f;
  11369. }
  11370. uint32_t best_pbits[2] = { 0, 0 };
  11371. color_quad_u8 bestMinColor, bestMaxColor;
  11372. memset(&bestMinColor, 0, sizeof(bestMinColor));
  11373. memset(&bestMaxColor, 0, sizeof(bestMaxColor));
  11374. determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
  11375. const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
  11376. for (uint32_t i = 0; i < 3; i++)
  11377. {
  11378. dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
  11379. dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
  11380. }
  11381. dst_blk.m_low[bc7_subset_index].m_c[3] = 127;
  11382. dst_blk.m_high[bc7_subset_index].m_c[3] = 127;
  11383. dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
  11384. dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
  11385. } // subset
  11386. for (uint32_t i = 0; i < 16; i++)
  11387. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
  11388. break;
  11389. }
  11390. case 6:
  11391. case 11:
  11392. case 13:
  11393. case 17:
  11394. {
  11395. // MODE 6: DualPlane: 1, WeightRange : 2 (4), Subsets : 1, EndpointRange : 18 (160) - BC7 MODE5 RGB
  11396. // MODE 11: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE5
  11397. // MODE 13: DualPlane: 1, WeightRange: 0 (2), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE5
  11398. // MODE 17: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE5
  11399. dst_blk.m_mode = 5;
  11400. dst_blk.m_rotation = (unpacked_src_blk.m_astc.m_ccs + 1) & 3;
  11401. if (total_comps == 2)
  11402. {
  11403. assert(unpacked_src_blk.m_astc.m_ccs == 3);
  11404. dst_blk.m_low->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant * 127 + 127) / 255);
  11405. dst_blk.m_high->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant * 127 + 127) / 255);
  11406. dst_blk.m_low->m_c[1] = dst_blk.m_low->m_c[0];
  11407. dst_blk.m_high->m_c[1] = dst_blk.m_high->m_c[0];
  11408. dst_blk.m_low->m_c[2] = dst_blk.m_low->m_c[0];
  11409. dst_blk.m_high->m_c[2] = dst_blk.m_high->m_c[0];
  11410. dst_blk.m_low->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant);
  11411. dst_blk.m_high->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant);
  11412. }
  11413. else
  11414. {
  11415. for (uint32_t astc_comp = 0; astc_comp < 4; astc_comp++)
  11416. {
  11417. uint32_t bc7_comp = astc_comp;
  11418. // ASTC and BC7 handle dual plane component rotations differently:
  11419. // ASTC: 2nd plane separately interpolates the CCS channel.
  11420. // BC7: 2nd plane channel is swapped with alpha, 2nd plane controls alpha interpolation, then we swap alpha with the desired channel.
  11421. if (astc_comp == (uint32_t)unpacked_src_blk.m_astc.m_ccs)
  11422. bc7_comp = 3;
  11423. else if (astc_comp == 3)
  11424. bc7_comp = unpacked_src_blk.m_astc.m_ccs;
  11425. uint32_t l = 255, h = 255;
  11426. if (astc_comp < total_comps)
  11427. {
  11428. l = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 0]].m_unquant;
  11429. h = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 1]].m_unquant;
  11430. }
  11431. if (bc7_comp < 3)
  11432. {
  11433. l = (l * 127 + 127) / 255;
  11434. h = (h * 127 + 127) / 255;
  11435. }
  11436. dst_blk.m_low->m_c[bc7_comp] = (uint8_t)l;
  11437. dst_blk.m_high->m_c[bc7_comp] = (uint8_t)h;
  11438. }
  11439. }
  11440. if (mode == 13)
  11441. {
  11442. for (uint32_t i = 0; i < 16; i++)
  11443. {
  11444. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2] ? 3 : 0;
  11445. dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1] ? 3 : 0;
  11446. }
  11447. }
  11448. else
  11449. {
  11450. for (uint32_t i = 0; i < 16; i++)
  11451. {
  11452. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2];
  11453. dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1];
  11454. }
  11455. }
  11456. break;
  11457. }
  11458. case 7:
  11459. {
  11460. // DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 12 (40) - BC7 MODE2
  11461. dst_blk.m_mode = 2;
  11462. dst_blk.m_partition = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].m_bc73;
  11463. const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].k;
  11464. for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++)
  11465. {
  11466. const uint32_t astc_part = bc7_convert_partition_index_3_to_2(bc7_part, common_pattern_k);
  11467. for (uint32_t c = 0; c < 3; c++)
  11468. {
  11469. dst_blk.m_low[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 0 + astc_part * 6]].m_unquant * 31 + 127) / 255;
  11470. dst_blk.m_high[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 1 + astc_part * 6]].m_unquant * 31 + 127) / 255;
  11471. }
  11472. }
  11473. for (uint32_t i = 0; i < 16; i++)
  11474. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
  11475. break;
  11476. }
  11477. case UASTC_MODE_INDEX_SOLID_COLOR:
  11478. {
  11479. // Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6)
  11480. const color32& solid_color = unpacked_src_blk.m_solid_color;
  11481. uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error +
  11482. g_bc7_mode_6_optimal_endpoints[solid_color.b][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][0].m_error;
  11483. uint32_t best_err1 = g_bc7_mode_6_optimal_endpoints[solid_color.r][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][1].m_error +
  11484. g_bc7_mode_6_optimal_endpoints[solid_color.b][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][1].m_error;
  11485. if (best_err0 > 0 && best_err1 > 0)
  11486. {
  11487. dst_blk.m_mode = 5;
  11488. for (uint32_t c = 0; c < 3; c++)
  11489. {
  11490. dst_blk.m_low[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_lo;
  11491. dst_blk.m_high[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_hi;
  11492. }
  11493. memset(dst_blk.m_selectors, BC7ENC_MODE_5_OPTIMAL_INDEX, 16);
  11494. dst_blk.m_low[0].m_c[3] = solid_color.c[3];
  11495. dst_blk.m_high[0].m_c[3] = solid_color.c[3];
  11496. //memset(dst_blk.m_alpha_selectors, 0, 16);
  11497. }
  11498. else
  11499. {
  11500. dst_blk.m_mode = 6;
  11501. uint32_t best_p = 0;
  11502. if (best_err1 < best_err0)
  11503. best_p = 1;
  11504. for (uint32_t c = 0; c < 4; c++)
  11505. {
  11506. dst_blk.m_low[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_lo;
  11507. dst_blk.m_high[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_hi;
  11508. }
  11509. dst_blk.m_pbits[0][0] = best_p;
  11510. dst_blk.m_pbits[0][1] = best_p;
  11511. memset(dst_blk.m_selectors, BC7ENC_MODE_6_OPTIMAL_INDEX, 16);
  11512. }
  11513. break;
  11514. }
  11515. case 9:
  11516. case 16:
  11517. {
  11518. // 9. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE7
  11519. // 16. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE7
  11520. dst_blk.m_mode = 7;
  11521. dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
  11522. const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
  11523. for (uint32_t astc_subset = 0; astc_subset < 2; astc_subset++)
  11524. {
  11525. float xl[4], xh[4];
  11526. if (total_comps == 2)
  11527. {
  11528. xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 4]].m_unquant / 255.0f;
  11529. xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 4]].m_unquant / 255.0f;
  11530. xl[1] = xl[0];
  11531. xh[1] = xh[0];
  11532. xl[2] = xl[0];
  11533. xh[2] = xh[0];
  11534. xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 4]].m_unquant / 255.0f;
  11535. xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 4]].m_unquant / 255.0f;
  11536. }
  11537. else
  11538. {
  11539. xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 8]].m_unquant / 255.0f;
  11540. xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 8]].m_unquant / 255.0f;
  11541. xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4 + astc_subset * 8]].m_unquant / 255.0f;
  11542. xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6 + astc_subset * 8]].m_unquant / 255.0f;
  11543. xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 8]].m_unquant / 255.0f;
  11544. xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 8]].m_unquant / 255.0f;
  11545. xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5 + astc_subset * 8]].m_unquant / 255.0f;
  11546. xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7 + astc_subset * 8]].m_unquant / 255.0f;
  11547. }
  11548. uint32_t best_pbits[2] = { 0, 0 };
  11549. color_quad_u8 bestMinColor, bestMaxColor;
  11550. memset(&bestMinColor, 0, sizeof(bestMinColor));
  11551. memset(&bestMaxColor, 0, sizeof(bestMaxColor));
  11552. determine_unique_pbits(4, 5, xl, xh, bestMinColor, bestMaxColor, best_pbits);
  11553. const uint32_t bc7_subset_index = invert_partition ? (1 - astc_subset) : astc_subset;
  11554. dst_blk.m_low[bc7_subset_index] = bestMinColor;
  11555. dst_blk.m_high[bc7_subset_index] = bestMaxColor;
  11556. dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
  11557. dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
  11558. } // astc_subset
  11559. for (uint32_t i = 0; i < 16; i++)
  11560. dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
  11561. break;
  11562. }
  11563. default:
  11564. return false;
  11565. }
  11566. return true;
  11567. }
  11568. bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk)
  11569. {
  11570. unpacked_uastc_block unpacked_src_blk;
  11571. if (!unpack_uastc(src_blk, unpacked_src_blk, false, false))
  11572. return false;
  11573. return transcode_uastc_to_bc7(unpacked_src_blk, dst_blk);
  11574. }
  11575. bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst)
  11576. {
  11577. bc7_optimization_results temp;
  11578. if (!transcode_uastc_to_bc7(src_blk, temp))
  11579. return false;
  11580. encode_bc7_block(pDst, &temp);
  11581. return true;
  11582. }
  11583. color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock)
  11584. {
  11585. color32 result;
  11586. for (uint32_t c = 0; c < 3; c++)
  11587. {
  11588. static const int s_divs[3] = { 1, 3, 9 };
  11589. int delta = 0;
  11590. switch (bias)
  11591. {
  11592. case 2: delta = subblock ? 0 : ((c == 0) ? -1 : 0); break;
  11593. case 5: delta = subblock ? 0 : ((c == 1) ? -1 : 0); break;
  11594. case 6: delta = subblock ? 0 : ((c == 2) ? -1 : 0); break;
  11595. case 7: delta = subblock ? 0 : ((c == 0) ? 1 : 0); break;
  11596. case 11: delta = subblock ? 0 : ((c == 1) ? 1 : 0); break;
  11597. case 15: delta = subblock ? 0 : ((c == 2) ? 1 : 0); break;
  11598. case 18: delta = subblock ? ((c == 0) ? -1 : 0) : 0; break;
  11599. case 19: delta = subblock ? ((c == 1) ? -1 : 0) : 0; break;
  11600. case 20: delta = subblock ? ((c == 2) ? -1 : 0) : 0; break;
  11601. case 21: delta = subblock ? ((c == 0) ? 1 : 0) : 0; break;
  11602. case 24: delta = subblock ? ((c == 1) ? 1 : 0) : 0; break;
  11603. case 8: delta = subblock ? ((c == 2) ? 1 : 0) : 0; break;
  11604. case 10: delta = -2; break;
  11605. case 27: delta = subblock ? 0 : -1; break;
  11606. case 28: delta = subblock ? -1 : 1; break;
  11607. case 29: delta = subblock ? 1 : 0; break;
  11608. case 30: delta = subblock ? -1 : 0; break;
  11609. case 31: delta = subblock ? 0 : 1; break;
  11610. default:
  11611. delta = ((bias / s_divs[c]) % 3) - 1;
  11612. break;
  11613. }
  11614. int v = block_color[c];
  11615. if (v == 0)
  11616. {
  11617. if (delta == -2)
  11618. v += 3;
  11619. else
  11620. v += delta + 1;
  11621. }
  11622. else if (v == (int)limit)
  11623. {
  11624. v += (delta - 1);
  11625. }
  11626. else
  11627. {
  11628. v += delta;
  11629. if ((v < 0) || (v > (int)limit))
  11630. v = (v - delta) - delta;
  11631. }
  11632. assert(v >= 0);
  11633. assert(v <= (int)limit);
  11634. result[c] = (uint8_t)v;
  11635. }
  11636. return result;
  11637. }
  11638. static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock)
  11639. {
  11640. static const uint8_t s_tran[4] = { 1, 0, 2, 3 };
  11641. uint16_t l_bitmask = 0;
  11642. uint16_t h_bitmask = 0;
  11643. for (uint32_t subblock = first_subblock; subblock < last_subblock; subblock++)
  11644. {
  11645. color32 block_colors[4];
  11646. dst_blk.get_block_colors(block_colors, subblock);
  11647. uint32_t block_y[4];
  11648. for (uint32_t i = 0; i < 4; i++)
  11649. block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19;
  11650. const uint32_t block_y01 = block_y[0] + block_y[1];
  11651. const uint32_t block_y12 = block_y[1] + block_y[2];
  11652. const uint32_t block_y23 = block_y[2] + block_y[3];
  11653. // X0 X0 X0 X0 X1 X1 X1 X1 X2 X2 X2 X2 X3 X3 X3 X3
  11654. // Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3
  11655. if (dst_blk.get_flip_bit())
  11656. {
  11657. uint32_t ofs = subblock * 2;
  11658. for (uint32_t y = 0; y < 2; y++)
  11659. {
  11660. for (uint32_t x = 0; x < 4; x++)
  11661. {
  11662. const color32& c = pSource_pixels[x + (subblock * 2 + y) * 4];
  11663. const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
  11664. uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
  11665. assert(ofs < 16);
  11666. l_bitmask |= ((t & 1) << ofs);
  11667. h_bitmask |= ((t >> 1) << ofs);
  11668. ofs += 4;
  11669. }
  11670. ofs = (int)ofs + 1 - 4 * 4;
  11671. }
  11672. }
  11673. else
  11674. {
  11675. uint32_t ofs = (subblock * 2) * 4;
  11676. for (uint32_t x = 0; x < 2; x++)
  11677. {
  11678. for (uint32_t y = 0; y < 4; y++)
  11679. {
  11680. const color32& c = pSource_pixels[subblock * 2 + x + y * 4];
  11681. const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
  11682. uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
  11683. assert(ofs < 16);
  11684. l_bitmask |= ((t & 1) << ofs);
  11685. h_bitmask |= ((t >> 1) << ofs);
  11686. ++ofs;
  11687. }
  11688. }
  11689. }
  11690. }
  11691. dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
  11692. dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
  11693. dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
  11694. dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
  11695. }
  11696. static const uint8_t s_etc1_solid_selectors[4][4] = { { 255, 255, 255, 255 }, { 255, 255, 0, 0 }, { 0, 0, 0, 0 }, {0, 0, 255, 255 } };
  11697. struct etc_coord2
  11698. {
  11699. uint8_t m_x, m_y;
  11700. };
  11701. // [flip][subblock][pixel_index]
  11702. const etc_coord2 g_etc1_pixel_coords[2][2][8] =
  11703. {
  11704. {
  11705. {
  11706. { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
  11707. { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
  11708. },
  11709. {
  11710. { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  11711. { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
  11712. }
  11713. },
  11714. {
  11715. {
  11716. { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
  11717. { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }
  11718. },
  11719. {
  11720. { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
  11721. { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
  11722. },
  11723. }
  11724. };
  11725. void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
  11726. {
  11727. decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
  11728. if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
  11729. {
  11730. dst_blk.m_bytes[3] = (uint8_t)((unpacked_src_blk.m_etc1_diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten0 << 2));
  11731. if (unpacked_src_blk.m_etc1_diff)
  11732. {
  11733. dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r << 3);
  11734. dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g << 3);
  11735. dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b << 3);
  11736. }
  11737. else
  11738. {
  11739. dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r | (unpacked_src_blk.m_etc1_r << 4));
  11740. dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g | (unpacked_src_blk.m_etc1_g << 4));
  11741. dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b | (unpacked_src_blk.m_etc1_b << 4));
  11742. }
  11743. memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[unpacked_src_blk.m_etc1_selector][0], 4);
  11744. return;
  11745. }
  11746. const bool flip = unpacked_src_blk.m_etc1_flip != 0;
  11747. const bool diff = unpacked_src_blk.m_etc1_diff != 0;
  11748. dst_blk.m_bytes[3] = (uint8_t)((int)flip | (diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten1 << 2));
  11749. const uint32_t limit = diff ? 31 : 15;
  11750. color32 block_colors[2];
  11751. for (uint32_t subset = 0; subset < 2; subset++)
  11752. {
  11753. uint32_t avg_color[3];
  11754. memset(avg_color, 0, sizeof(avg_color));
  11755. for (uint32_t j = 0; j < 8; j++)
  11756. {
  11757. const etc_coord2& c = g_etc1_pixel_coords[flip][subset][j];
  11758. avg_color[0] += block_pixels[c.m_y][c.m_x].r;
  11759. avg_color[1] += block_pixels[c.m_y][c.m_x].g;
  11760. avg_color[2] += block_pixels[c.m_y][c.m_x].b;
  11761. } // j
  11762. block_colors[subset][0] = (uint8_t)((avg_color[0] * limit + 1020) / (8 * 255));
  11763. block_colors[subset][1] = (uint8_t)((avg_color[1] * limit + 1020) / (8 * 255));
  11764. block_colors[subset][2] = (uint8_t)((avg_color[2] * limit + 1020) / (8 * 255));
  11765. block_colors[subset][3] = 0;
  11766. if (g_uastc_mode_has_etc1_bias[unpacked_src_blk.m_mode])
  11767. {
  11768. block_colors[subset] = apply_etc1_bias(block_colors[subset], unpacked_src_blk.m_etc1_bias, limit, subset);
  11769. }
  11770. } // subset
  11771. if (diff)
  11772. {
  11773. int dr = block_colors[1].r - block_colors[0].r;
  11774. int dg = block_colors[1].g - block_colors[0].g;
  11775. int db = block_colors[1].b - block_colors[0].b;
  11776. dr = basisu::clamp<int>(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
  11777. dg = basisu::clamp<int>(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
  11778. db = basisu::clamp<int>(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
  11779. if (dr < 0) dr += 8;
  11780. if (dg < 0) dg += 8;
  11781. if (db < 0) db += 8;
  11782. dst_blk.m_bytes[0] = (uint8_t)((block_colors[0].r << 3) | dr);
  11783. dst_blk.m_bytes[1] = (uint8_t)((block_colors[0].g << 3) | dg);
  11784. dst_blk.m_bytes[2] = (uint8_t)((block_colors[0].b << 3) | db);
  11785. }
  11786. else
  11787. {
  11788. dst_blk.m_bytes[0] = (uint8_t)(block_colors[1].r | (block_colors[0].r << 4));
  11789. dst_blk.m_bytes[1] = (uint8_t)(block_colors[1].g | (block_colors[0].g << 4));
  11790. dst_blk.m_bytes[2] = (uint8_t)(block_colors[1].b | (block_colors[0].b << 4));
  11791. }
  11792. etc1_determine_selectors(dst_blk, &block_pixels[0][0], 0, 2);
  11793. }
  11794. bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst)
  11795. {
  11796. unpacked_uastc_block unpacked_src_blk;
  11797. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  11798. return false;
  11799. color32 block_pixels[4][4];
  11800. if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
  11801. {
  11802. const bool unpack_srgb = false;
  11803. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  11804. return false;
  11805. }
  11806. transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, pDst);
  11807. return true;
  11808. }
  11809. static inline int gray_distance2(const uint8_t c, int y)
  11810. {
  11811. int gray_dist = (int)c - y;
  11812. return gray_dist * gray_dist;
  11813. }
  11814. static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels,
  11815. int& upper_avg, int& lower_avg, int& left_avg, int& right_avg)
  11816. {
  11817. int sums[2][2];
  11818. #define GET_XY(x, y) pSrc_pixels[(x) + ((y) * 4)]
  11819. sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1);
  11820. sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1);
  11821. sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3);
  11822. sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3);
  11823. upper_avg = (sums[0][0] + sums[1][0] + 4) / 8;
  11824. lower_avg = (sums[0][1] + sums[1][1] + 4) / 8;
  11825. left_avg = (sums[0][0] + sums[0][1] + 4) / 8;
  11826. right_avg = (sums[1][0] + sums[1][1] + 4) / 8;
  11827. #undef GET_XY
  11828. #define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a)
  11829. int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0;
  11830. for (uint32_t i = 0; i < 4; i++)
  11831. {
  11832. for (uint32_t j = 0; j < 2; j++)
  11833. {
  11834. upper_gray_dist += GET_XY(i, j, upper_avg);
  11835. lower_gray_dist += GET_XY(i, 2 + j, lower_avg);
  11836. left_gray_dist += GET_XY(j, i, left_avg);
  11837. right_gray_dist += GET_XY(2 + j, i, right_avg);
  11838. }
  11839. }
  11840. #undef GET_XY
  11841. int upper_lower_sum = upper_gray_dist + lower_gray_dist;
  11842. int left_right_sum = left_gray_dist + right_gray_dist;
  11843. return upper_lower_sum < left_right_sum;
  11844. }
  11845. // Base Sel Table
  11846. // XXXXX XX XXX
  11847. static const uint16_t g_etc1_y_solid_block_configs[256] =
  11848. {
  11849. 0,781,64,161,260,192,33,131,96,320,65,162,261,193,34,291,97,224,66,163,262,194,35,549,98,4,67,653,164,195,523,36,99,5,578,68,165,353,196,37,135,100,324,69,166,354,197,38,295,101,228,70,167,
  11850. 355,198,39,553,102,8,71,608,168,199,527,40,103,9,582,72,169,357,200,41,139,104,328,73,170,358,201,42,299,105,232,74,171,359,202,43,557,106,12,75,612,172,203,531,44,107,13,586,76,173,361,
  11851. 204,45,143,108,332,77,174,362,205,46,303,109,236,78,175,363,206,47,561,110,16,79,616,176,207,535,48,111,17,590,80,177,365,208,49,147,112,336,81,178,366,209,50,307,113,240,82,179,367,210,
  11852. 51,565,114,20,83,620,180,211,539,52,115,21,594,84,181,369,212,53,151,116,340,85,182,370,213,54,311,117,244,86,183,371,214,55,569,118,24,87,624,184,215,543,56,119,25,598,88,185,373,216,57,
  11853. 155,120,344,89,186,374,217,58,315,121,248,90,187,375,218,59,573,122,28,91,628,188,219,754,60,123,29,602,92,189,377,220,61,159,124,348,93,190,378,221,62,319,125,252,94,191,379,222,63,882,126
  11854. };
  11855. // individual
  11856. // table base sel0 sel1 sel2 sel3
  11857. static const uint16_t g_etc1_y_solid_block_4i_configs[256] =
  11858. {
  11859. 0xA000,0xA800,0x540B,0xAA01,0xAA01,0xFE00,0xFF00,0xFF00,0x8,0x5515,0x5509,0x5509,0xAA03,0x5508,0x5508,0x9508,0xA508,0xA908,0xAA08,0x5513,0xAA09,0xAA09,0xAA05,0xFF08,0xFF08,0x10,0x551D,0x5511,0x5511,
  11860. 0xAA0B,0x5510,0x5510,0x9510,0xA510,0xA910,0xAA10,0x551B,0xAA11,0xAA11,0xAA0D,0xFF10,0xFF10,0x18,0x5525,0x5519,0x5519,0xAA13,0x5518,0x5518,0x9518,0xA518,0xA918,0xAA18,0x5523,0xAA19,0xAA19,0xAA15,
  11861. 0xFF18,0xFF18,0x20,0x552D,0x5521,0x5521,0xAA1B,0x5520,0x5520,0x9520,0xA520,0xA920,0xAA20,0x552B,0xAA21,0xAA21,0xAA1D,0xFF20,0xFF20,0x28,0x5535,0x5529,0x5529,0xAA23,0x5528,0x5528,0x9528,0xA528,0xA928,
  11862. 0xAA28,0x5533,0xAA29,0xAA29,0xAA25,0xFF28,0xFF28,0x30,0x553D,0x5531,0x5531,0xAA2B,0x5530,0x5530,0x9530,0xA530,0xA930,0xAA30,0x553B,0xAA31,0xAA31,0xAA2D,0xFF30,0xFF30,0x38,0x5545,0x5539,0x5539,0xAA33,
  11863. 0x5538,0x5538,0x9538,0xA538,0xA938,0xAA38,0x5543,0xAA39,0xAA39,0xAA35,0xFF38,0xFF38,0x40,0x554D,0x5541,0x5541,0xAA3B,0x5540,0x5540,0x9540,0xA540,0xA940,0xAA40,0x554B,0xAA41,0xAA41,0xAA3D,0xFF40,0xFF40,
  11864. 0x48,0x5555,0x5549,0x5549,0xAA43,0x5548,0x5548,0x9548,0xA548,0xA948,0xAA48,0x5553,0xAA49,0xAA49,0xAA45,0xFF48,0xFF48,0x50,0x555D,0x5551,0x5551,0xAA4B,0x5550,0x5550,0x9550,0xA550,0xA950,0xAA50,0x555B,
  11865. 0xAA51,0xAA51,0xAA4D,0xFF50,0xFF50,0x58,0x5565,0x5559,0x5559,0xAA53,0x5558,0x5558,0x9558,0xA558,0xA958,0xAA58,0x5563,0xAA59,0xAA59,0xAA55,0xFF58,0xFF58,0x60,0x556D,0x5561,0x5561,0xAA5B,0x5560,0x5560,
  11866. 0x9560,0xA560,0xA960,0xAA60,0x556B,0xAA61,0xAA61,0xAA5D,0xFF60,0xFF60,0x68,0x5575,0x5569,0x5569,0xAA63,0x5568,0x5568,0x9568,0xA568,0xA968,0xAA68,0x5573,0xAA69,0xAA69,0xAA65,0xFF68,0xFF68,0x70,0x557D,
  11867. 0x5571,0x5571,0xAA6B,0x5570,0x5570,0x9570,0xA570,0xA970,0xAA70,0x557B,0xAA71,0xAA71,0xAA6D,0xFF70,0xFF70,0x78,0x78,0x5579,0x5579,0xAA73,0x5578,0x9578,0x2578,0xE6E,0x278
  11868. };
  11869. static const uint16_t g_etc1_y_solid_block_2i_configs[256] =
  11870. {
  11871. 0x416,0x800,0xA00,0x50B,0xA01,0xA01,0xF00,0xF00,0xF00,0x8,0x515,0x509,0x509,0xA03,0x508,0x508,0xF01,0xF01,0xA08,0xA08,0x513,0xA09,0xA09,0xA05,0xF08,0xF08,0x10,0x51D,0x511,0x511,0xA0B,0x510,0x510,0xF09,
  11872. 0xF09,0xA10,0xA10,0x51B,0xA11,0xA11,0xA0D,0xF10,0xF10,0x18,0x525,0x519,0x519,0xA13,0x518,0x518,0xF11,0xF11,0xA18,0xA18,0x523,0xA19,0xA19,0xA15,0xF18,0xF18,0x20,0x52D,0x521,0x521,0xA1B,0x520,0x520,0xF19,
  11873. 0xF19,0xA20,0xA20,0x52B,0xA21,0xA21,0xA1D,0xF20,0xF20,0x28,0x535,0x529,0x529,0xA23,0x528,0x528,0xF21,0xF21,0xA28,0xA28,0x533,0xA29,0xA29,0xA25,0xF28,0xF28,0x30,0x53D,0x531,0x531,0xA2B,0x530,0x530,0xF29,
  11874. 0xF29,0xA30,0xA30,0x53B,0xA31,0xA31,0xA2D,0xF30,0xF30,0x38,0x545,0x539,0x539,0xA33,0x538,0x538,0xF31,0xF31,0xA38,0xA38,0x543,0xA39,0xA39,0xA35,0xF38,0xF38,0x40,0x54D,0x541,0x541,0xA3B,0x540,0x540,0xF39,
  11875. 0xF39,0xA40,0xA40,0x54B,0xA41,0xA41,0xA3D,0xF40,0xF40,0x48,0x555,0x549,0x549,0xA43,0x548,0x548,0xF41,0xF41,0xA48,0xA48,0x553,0xA49,0xA49,0xA45,0xF48,0xF48,0x50,0x55D,0x551,0x551,0xA4B,0x550,0x550,0xF49,
  11876. 0xF49,0xA50,0xA50,0x55B,0xA51,0xA51,0xA4D,0xF50,0xF50,0x58,0x565,0x559,0x559,0xA53,0x558,0x558,0xF51,0xF51,0xA58,0xA58,0x563,0xA59,0xA59,0xA55,0xF58,0xF58,0x60,0x56D,0x561,0x561,0xA5B,0x560,0x560,0xF59,
  11877. 0xF59,0xA60,0xA60,0x56B,0xA61,0xA61,0xA5D,0xF60,0xF60,0x68,0x575,0x569,0x569,0xA63,0x568,0x568,0xF61,0xF61,0xA68,0xA68,0x573,0xA69,0xA69,0xA65,0xF68,0xF68,0x70,0x57D,0x571,0x571,0xA6B,0x570,0x570,0xF69,
  11878. 0xF69,0xA70,0xA70,0x57B,0xA71,0xA71,0xA6D,0xF70,0xF70,0x78,0x78,0x579,0x579,0xA73,0x578,0x578,0xE6E,0x278
  11879. };
  11880. static const uint16_t g_etc1_y_solid_block_1i_configs[256] =
  11881. {
  11882. 0x0,0x116,0x200,0x200,0x10B,0x201,0x201,0x300,0x300,0x8,0x115,0x109,0x109,0x203,0x108,0x108,0x114,0x301,0x204,0x208,0x208,0x113,0x209,0x209,0x205,0x308,0x10,0x11D,0x111,0x111,0x20B,0x110,0x110,0x11C,0x309,
  11883. 0x20C,0x210,0x210,0x11B,0x211,0x211,0x20D,0x310,0x18,0x125,0x119,0x119,0x213,0x118,0x118,0x124,0x311,0x214,0x218,0x218,0x123,0x219,0x219,0x215,0x318,0x20,0x12D,0x121,0x121,0x21B,0x120,0x120,0x12C,0x319,0x21C,
  11884. 0x220,0x220,0x12B,0x221,0x221,0x21D,0x320,0x28,0x135,0x129,0x129,0x223,0x128,0x128,0x134,0x321,0x224,0x228,0x228,0x133,0x229,0x229,0x225,0x328,0x30,0x13D,0x131,0x131,0x22B,0x130,0x130,0x13C,0x329,0x22C,0x230,
  11885. 0x230,0x13B,0x231,0x231,0x22D,0x330,0x38,0x145,0x139,0x139,0x233,0x138,0x138,0x144,0x331,0x234,0x238,0x238,0x143,0x239,0x239,0x235,0x338,0x40,0x14D,0x141,0x141,0x23B,0x140,0x140,0x14C,0x339,0x23C,0x240,0x240,
  11886. 0x14B,0x241,0x241,0x23D,0x340,0x48,0x155,0x149,0x149,0x243,0x148,0x148,0x154,0x341,0x244,0x248,0x248,0x153,0x249,0x249,0x245,0x348,0x50,0x15D,0x151,0x151,0x24B,0x150,0x150,0x15C,0x349,0x24C,0x250,0x250,0x15B,
  11887. 0x251,0x251,0x24D,0x350,0x58,0x165,0x159,0x159,0x253,0x158,0x158,0x164,0x351,0x254,0x258,0x258,0x163,0x259,0x259,0x255,0x358,0x60,0x16D,0x161,0x161,0x25B,0x160,0x160,0x16C,0x359,0x25C,0x260,0x260,0x16B,0x261,
  11888. 0x261,0x25D,0x360,0x68,0x175,0x169,0x169,0x263,0x168,0x168,0x174,0x361,0x264,0x268,0x268,0x173,0x269,0x269,0x265,0x368,0x70,0x17D,0x171,0x171,0x26B,0x170,0x170,0x17C,0x369,0x26C,0x270,0x270,0x17B,0x271,0x271,
  11889. 0x26D,0x370,0x78,0x78,0x179,0x179,0x273,0x178,0x178,0x26E,0x278
  11890. };
  11891. // We don't have any useful hints to accelerate single channel ETC1, so we need to real-time encode from scratch.
  11892. bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel)
  11893. {
  11894. unpacked_uastc_block unpacked_src_blk;
  11895. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  11896. return false;
  11897. #if 0
  11898. for (uint32_t individ = 0; individ < 2; individ++)
  11899. {
  11900. uint32_t overall_error = 0;
  11901. for (uint32_t c = 0; c < 256; c++)
  11902. {
  11903. uint32_t best_err = UINT32_MAX;
  11904. uint32_t best_individ = 0;
  11905. uint32_t best_base = 0;
  11906. uint32_t best_sels[4] = { 0,0,0,0 };
  11907. uint32_t best_table = 0;
  11908. const uint32_t limit = individ ? 16 : 32;
  11909. for (uint32_t table = 0; table < 8; table++)
  11910. {
  11911. for (uint32_t base = 0; base < limit; base++)
  11912. {
  11913. uint32_t total_e = 0;
  11914. uint32_t sels[4] = { 0,0,0,0 };
  11915. const uint32_t N = 4;
  11916. for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
  11917. {
  11918. uint32_t best_sel_e = UINT32_MAX;
  11919. uint32_t best_sel = 0;
  11920. for (uint32_t sel = 0; sel < 4; sel++)
  11921. {
  11922. int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
  11923. val = clamp255(val + g_etc1_inten_tables[table][sel]);
  11924. int e = iabs(val - clamp255(c + i));
  11925. if (e < best_sel_e)
  11926. {
  11927. best_sel_e = e;
  11928. best_sel = sel;
  11929. }
  11930. } // sel
  11931. sels[i] = best_sel;
  11932. total_e += best_sel_e * best_sel_e;
  11933. } // i
  11934. if (total_e < best_err)
  11935. {
  11936. best_err = total_e;
  11937. best_individ = individ;
  11938. best_base = base;
  11939. memcpy(best_sels, sels, sizeof(best_sels));
  11940. best_table = table;
  11941. }
  11942. } // base
  11943. } // table
  11944. //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
  11945. uint32_t encoded = best_table | (best_base << 3) |
  11946. (best_sels[0] << 8) |
  11947. (best_sels[1] << 10) |
  11948. (best_sels[2] << 12) |
  11949. (best_sels[3] << 14);
  11950. printf("0x%X,", encoded);
  11951. overall_error += best_err;
  11952. } // c
  11953. printf("\n");
  11954. printf("Overall error: %u\n", overall_error);
  11955. } // individ
  11956. exit(0);
  11957. #endif
  11958. #if 0
  11959. for (uint32_t individ = 0; individ < 2; individ++)
  11960. {
  11961. uint32_t overall_error = 0;
  11962. for (uint32_t c = 0; c < 256; c++)
  11963. {
  11964. uint32_t best_err = UINT32_MAX;
  11965. uint32_t best_individ = 0;
  11966. uint32_t best_base = 0;
  11967. uint32_t best_sels[4] = { 0,0,0,0 };
  11968. uint32_t best_table = 0;
  11969. const uint32_t limit = individ ? 16 : 32;
  11970. for (uint32_t table = 0; table < 8; table++)
  11971. {
  11972. for (uint32_t base = 0; base < limit; base++)
  11973. {
  11974. uint32_t total_e = 0;
  11975. uint32_t sels[4] = { 0,0,0,0 };
  11976. const uint32_t N = 1;
  11977. for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
  11978. {
  11979. uint32_t best_sel_e = UINT32_MAX;
  11980. uint32_t best_sel = 0;
  11981. for (uint32_t sel = 0; sel < 4; sel++)
  11982. {
  11983. int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
  11984. val = clamp255(val + g_etc1_inten_tables[table][sel]);
  11985. int e = iabs(val - clamp255(c + i));
  11986. if (e < best_sel_e)
  11987. {
  11988. best_sel_e = e;
  11989. best_sel = sel;
  11990. }
  11991. } // sel
  11992. sels[i] = best_sel;
  11993. total_e += best_sel_e * best_sel_e;
  11994. } // i
  11995. if (total_e < best_err)
  11996. {
  11997. best_err = total_e;
  11998. best_individ = individ;
  11999. best_base = base;
  12000. memcpy(best_sels, sels, sizeof(best_sels));
  12001. best_table = table;
  12002. }
  12003. } // base
  12004. } // table
  12005. //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
  12006. uint32_t encoded = best_table | (best_base << 3) |
  12007. (best_sels[0] << 8) |
  12008. (best_sels[1] << 10) |
  12009. (best_sels[2] << 12) |
  12010. (best_sels[3] << 14);
  12011. printf("0x%X,", encoded);
  12012. overall_error += best_err;
  12013. } // c
  12014. printf("\n");
  12015. printf("Overall error: %u\n", overall_error);
  12016. } // individ
  12017. exit(0);
  12018. #endif
  12019. decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
  12020. if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
  12021. {
  12022. const uint32_t y = unpacked_src_blk.m_solid_color[channel];
  12023. const uint32_t encoded_config = g_etc1_y_solid_block_configs[y];
  12024. const uint32_t base = encoded_config & 31;
  12025. const uint32_t sel = (encoded_config >> 5) & 3;
  12026. const uint32_t table = encoded_config >> 7;
  12027. dst_blk.m_bytes[3] = (uint8_t)(2 | (table << 5) | (table << 2));
  12028. dst_blk.m_bytes[0] = (uint8_t)(base << 3);
  12029. dst_blk.m_bytes[1] = (uint8_t)(base << 3);
  12030. dst_blk.m_bytes[2] = (uint8_t)(base << 3);
  12031. memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[sel][0], 4);
  12032. return true;
  12033. }
  12034. color32 block_pixels[4][4];
  12035. const bool unpack_srgb = false;
  12036. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  12037. return false;
  12038. uint8_t block_y[4][4];
  12039. for (uint32_t i = 0; i < 16; i++)
  12040. ((uint8_t*)block_y)[i] = ((color32*)block_pixels)[i][channel];
  12041. int upper_avg, lower_avg, left_avg, right_avg;
  12042. bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg);
  12043. // non-flipped: | |
  12044. // vs.
  12045. // flipped: --
  12046. // --
  12047. uint32_t low[2] = { 255, 255 }, high[2] = { 0, 0 };
  12048. if (flip)
  12049. {
  12050. for (uint32_t y = 0; y < 2; y++)
  12051. {
  12052. for (uint32_t x = 0; x < 4; x++)
  12053. {
  12054. const uint32_t v = block_y[y][x];
  12055. low[0] = basisu::minimum(low[0], v);
  12056. high[0] = basisu::maximum(high[0], v);
  12057. }
  12058. }
  12059. for (uint32_t y = 2; y < 4; y++)
  12060. {
  12061. for (uint32_t x = 0; x < 4; x++)
  12062. {
  12063. const uint32_t v = block_y[y][x];
  12064. low[1] = basisu::minimum(low[1], v);
  12065. high[1] = basisu::maximum(high[1], v);
  12066. }
  12067. }
  12068. }
  12069. else
  12070. {
  12071. for (uint32_t y = 0; y < 4; y++)
  12072. {
  12073. for (uint32_t x = 0; x < 2; x++)
  12074. {
  12075. const uint32_t v = block_y[y][x];
  12076. low[0] = basisu::minimum(low[0], v);
  12077. high[0] = basisu::maximum(high[0], v);
  12078. }
  12079. }
  12080. for (uint32_t y = 0; y < 4; y++)
  12081. {
  12082. for (uint32_t x = 2; x < 4; x++)
  12083. {
  12084. const uint32_t v = block_y[y][x];
  12085. low[1] = basisu::minimum(low[1], v);
  12086. high[1] = basisu::maximum(high[1], v);
  12087. }
  12088. }
  12089. }
  12090. const uint32_t range[2] = { high[0] - low[0], high[1] - low[1] };
  12091. dst_blk.m_bytes[3] = (uint8_t)((int)flip);
  12092. if ((range[0] <= 3) && (range[1] <= 3))
  12093. {
  12094. // This is primarily for better gradients.
  12095. dst_blk.m_bytes[0] = 0;
  12096. dst_blk.m_bytes[1] = 0;
  12097. dst_blk.m_bytes[2] = 0;
  12098. uint16_t l_bitmask = 0, h_bitmask = 0;
  12099. for (uint32_t subblock = 0; subblock < 2; subblock++)
  12100. {
  12101. const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
  12102. const uint32_t table = encoded & 7;
  12103. const uint32_t base = (encoded >> 3) & 31;
  12104. assert(base <= 15);
  12105. const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
  12106. dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
  12107. const uint32_t sv = base << (subblock ? 0 : 4);
  12108. dst_blk.m_bytes[0] |= (uint8_t)(sv);
  12109. dst_blk.m_bytes[1] |= (uint8_t)(sv);
  12110. dst_blk.m_bytes[2] |= (uint8_t)(sv);
  12111. if (flip)
  12112. {
  12113. uint32_t ofs = subblock * 2;
  12114. for (uint32_t y = 0; y < 2; y++)
  12115. {
  12116. for (uint32_t x = 0; x < 4; x++)
  12117. {
  12118. uint32_t t = block_y[y + subblock * 2][x];
  12119. assert(t >= low[subblock] && t <= high[subblock]);
  12120. t -= low[subblock];
  12121. assert(t <= 3);
  12122. t = g_selector_index_to_etc1[sels[t]];
  12123. assert(ofs < 16);
  12124. l_bitmask |= ((t & 1) << ofs);
  12125. h_bitmask |= ((t >> 1) << ofs);
  12126. ofs += 4;
  12127. }
  12128. ofs = (int)ofs + 1 - 4 * 4;
  12129. }
  12130. }
  12131. else
  12132. {
  12133. uint32_t ofs = (subblock * 2) * 4;
  12134. for (uint32_t x = 0; x < 2; x++)
  12135. {
  12136. for (uint32_t y = 0; y < 4; y++)
  12137. {
  12138. uint32_t t = block_y[y][x + subblock * 2];
  12139. assert(t >= low[subblock] && t <= high[subblock]);
  12140. t -= low[subblock];
  12141. assert(t <= 3);
  12142. t = g_selector_index_to_etc1[sels[t]];
  12143. assert(ofs < 16);
  12144. l_bitmask |= ((t & 1) << ofs);
  12145. h_bitmask |= ((t >> 1) << ofs);
  12146. ++ofs;
  12147. }
  12148. }
  12149. }
  12150. } // subblock
  12151. dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
  12152. dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
  12153. dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
  12154. dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
  12155. return true;
  12156. }
  12157. uint32_t y0 = ((flip ? upper_avg : left_avg) * 31 + 127) / 255;
  12158. uint32_t y1 = ((flip ? lower_avg : right_avg) * 31 + 127) / 255;
  12159. bool diff = true;
  12160. int dy = y1 - y0;
  12161. if ((dy < cETC1ColorDeltaMin) || (dy > cETC1ColorDeltaMax))
  12162. {
  12163. diff = false;
  12164. y0 = ((flip ? upper_avg : left_avg) * 15 + 127) / 255;
  12165. y1 = ((flip ? lower_avg : right_avg) * 15 + 127) / 255;
  12166. dst_blk.m_bytes[0] = (uint8_t)(y1 | (y0 << 4));
  12167. dst_blk.m_bytes[1] = (uint8_t)(y1 | (y0 << 4));
  12168. dst_blk.m_bytes[2] = (uint8_t)(y1 | (y0 << 4));
  12169. }
  12170. else
  12171. {
  12172. dy = basisu::clamp<int>(dy, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
  12173. y1 = y0 + dy;
  12174. if (dy < 0) dy += 8;
  12175. dst_blk.m_bytes[0] = (uint8_t)((y0 << 3) | dy);
  12176. dst_blk.m_bytes[1] = (uint8_t)((y0 << 3) | dy);
  12177. dst_blk.m_bytes[2] = (uint8_t)((y0 << 3) | dy);
  12178. dst_blk.m_bytes[3] |= 2;
  12179. }
  12180. const uint32_t base_y[2] = { diff ? ((y0 << 3) | (y0 >> 2)) : ((y0 << 4) | y0), diff ? ((y1 << 3) | (y1 >> 2)) : ((y1 << 4) | y1) };
  12181. uint32_t enc_range[2];
  12182. for (uint32_t subset = 0; subset < 2; subset++)
  12183. {
  12184. const int pos = basisu::iabs((int)high[subset] - (int)base_y[subset]);
  12185. const int neg = basisu::iabs((int)base_y[subset] - (int)low[subset]);
  12186. enc_range[subset] = basisu::maximum(pos, neg);
  12187. }
  12188. uint16_t l_bitmask = 0, h_bitmask = 0;
  12189. for (uint32_t subblock = 0; subblock < 2; subblock++)
  12190. {
  12191. if ((!diff) && (range[subblock] <= 3))
  12192. {
  12193. const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
  12194. const uint32_t table = encoded & 7;
  12195. const uint32_t base = (encoded >> 3) & 31;
  12196. assert(base <= 15);
  12197. const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
  12198. dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
  12199. const uint32_t mask = ~(0xF << (subblock ? 0 : 4));
  12200. dst_blk.m_bytes[0] &= mask;
  12201. dst_blk.m_bytes[1] &= mask;
  12202. dst_blk.m_bytes[2] &= mask;
  12203. const uint32_t sv = base << (subblock ? 0 : 4);
  12204. dst_blk.m_bytes[0] |= (uint8_t)(sv);
  12205. dst_blk.m_bytes[1] |= (uint8_t)(sv);
  12206. dst_blk.m_bytes[2] |= (uint8_t)(sv);
  12207. if (flip)
  12208. {
  12209. uint32_t ofs = subblock * 2;
  12210. for (uint32_t y = 0; y < 2; y++)
  12211. {
  12212. for (uint32_t x = 0; x < 4; x++)
  12213. {
  12214. uint32_t t = block_y[y + subblock * 2][x];
  12215. assert(t >= low[subblock] && t <= high[subblock]);
  12216. t -= low[subblock];
  12217. assert(t <= 3);
  12218. t = g_selector_index_to_etc1[sels[t]];
  12219. assert(ofs < 16);
  12220. l_bitmask |= ((t & 1) << ofs);
  12221. h_bitmask |= ((t >> 1) << ofs);
  12222. ofs += 4;
  12223. }
  12224. ofs = (int)ofs + 1 - 4 * 4;
  12225. }
  12226. }
  12227. else
  12228. {
  12229. uint32_t ofs = (subblock * 2) * 4;
  12230. for (uint32_t x = 0; x < 2; x++)
  12231. {
  12232. for (uint32_t y = 0; y < 4; y++)
  12233. {
  12234. uint32_t t = block_y[y][x + subblock * 2];
  12235. assert(t >= low[subblock] && t <= high[subblock]);
  12236. t -= low[subblock];
  12237. assert(t <= 3);
  12238. t = g_selector_index_to_etc1[sels[t]];
  12239. assert(ofs < 16);
  12240. l_bitmask |= ((t & 1) << ofs);
  12241. h_bitmask |= ((t >> 1) << ofs);
  12242. ++ofs;
  12243. }
  12244. }
  12245. }
  12246. continue;
  12247. } // if
  12248. uint32_t best_err = UINT32_MAX;
  12249. uint8_t best_sels[8];
  12250. uint32_t best_inten = 0;
  12251. const int base = base_y[subblock];
  12252. const int low_limit = -base;
  12253. const int high_limit = 255 - base;
  12254. assert(low_limit <= 0 && high_limit >= 0);
  12255. uint32_t inten_table_mask = 0xFF;
  12256. const uint32_t er = enc_range[subblock];
  12257. // Each one of these tables is expensive to evaluate, so let's only examine the ones we know may be useful.
  12258. if (er <= 51)
  12259. {
  12260. inten_table_mask = 0xF;
  12261. if (er > 22)
  12262. inten_table_mask &= ~(1 << 0);
  12263. if ((er < 4) || (er > 39))
  12264. inten_table_mask &= ~(1 << 1);
  12265. if (er < 9)
  12266. inten_table_mask &= ~(1 << 2);
  12267. if (er < 12)
  12268. inten_table_mask &= ~(1 << 3);
  12269. }
  12270. else
  12271. {
  12272. inten_table_mask &= ~((1 << 0) | (1 << 1));
  12273. if (er > 60)
  12274. inten_table_mask &= ~(1 << 2);
  12275. if (er > 89)
  12276. inten_table_mask &= ~(1 << 3);
  12277. if (er > 120)
  12278. inten_table_mask &= ~(1 << 4);
  12279. if (er > 136)
  12280. inten_table_mask &= ~(1 << 5);
  12281. if (er > 174)
  12282. inten_table_mask &= ~(1 << 6);
  12283. }
  12284. for (uint32_t inten = 0; inten < 8; inten++)
  12285. {
  12286. if ((inten_table_mask & (1 << inten)) == 0)
  12287. continue;
  12288. const int t0 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][0]);
  12289. const int t1 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][1]);
  12290. const int t2 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][2]);
  12291. const int t3 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][3]);
  12292. assert((t0 <= t1) && (t1 <= t2) && (t2 <= t3));
  12293. const int tv[4] = { t2, t3, t1, t0 };
  12294. const int thresh01 = t0 + t1;
  12295. const int thresh12 = t1 + t2;
  12296. const int thresh23 = t2 + t3;
  12297. assert(thresh01 <= thresh12 && thresh12 <= thresh23);
  12298. static const uint8_t s_table[4] = { 1, 0, 2, 3 };
  12299. uint32_t total_err = 0;
  12300. uint8_t sels[8];
  12301. if (flip)
  12302. {
  12303. if (((int)high[subblock] - base) * 2 < thresh01)
  12304. {
  12305. memset(sels, 3, 8);
  12306. for (uint32_t y = 0; y < 2; y++)
  12307. {
  12308. for (uint32_t x = 0; x < 4; x++)
  12309. {
  12310. const int delta = (int)block_y[y + subblock * 2][x] - base;
  12311. const uint32_t c = 3;
  12312. uint32_t e = basisu::iabs(tv[c] - delta);
  12313. total_err += e * e;
  12314. }
  12315. if (total_err >= best_err)
  12316. break;
  12317. }
  12318. }
  12319. else if (((int)low[subblock] - base) * 2 >= thresh23)
  12320. {
  12321. memset(sels, 1, 8);
  12322. for (uint32_t y = 0; y < 2; y++)
  12323. {
  12324. for (uint32_t x = 0; x < 4; x++)
  12325. {
  12326. const int delta = (int)block_y[y + subblock * 2][x] - base;
  12327. const uint32_t c = 1;
  12328. uint32_t e = basisu::iabs(tv[c] - delta);
  12329. total_err += e * e;
  12330. }
  12331. if (total_err >= best_err)
  12332. break;
  12333. }
  12334. }
  12335. else
  12336. {
  12337. for (uint32_t y = 0; y < 2; y++)
  12338. {
  12339. for (uint32_t x = 0; x < 4; x++)
  12340. {
  12341. const int delta = (int)block_y[y + subblock * 2][x] - base;
  12342. const int delta2 = delta * 2;
  12343. uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
  12344. sels[y * 4 + x] = (uint8_t)c;
  12345. uint32_t e = basisu::iabs(tv[c] - delta);
  12346. total_err += e * e;
  12347. }
  12348. if (total_err >= best_err)
  12349. break;
  12350. }
  12351. }
  12352. }
  12353. else
  12354. {
  12355. if (((int)high[subblock] - base) * 2 < thresh01)
  12356. {
  12357. memset(sels, 3, 8);
  12358. for (uint32_t y = 0; y < 4; y++)
  12359. {
  12360. for (uint32_t x = 0; x < 2; x++)
  12361. {
  12362. const int delta = (int)block_y[y][x + subblock * 2] - base;
  12363. const uint32_t c = 3;
  12364. uint32_t e = basisu::iabs(tv[c] - delta);
  12365. total_err += e * e;
  12366. }
  12367. if (total_err >= best_err)
  12368. break;
  12369. }
  12370. }
  12371. else if (((int)low[subblock] - base) * 2 >= thresh23)
  12372. {
  12373. memset(sels, 1, 8);
  12374. for (uint32_t y = 0; y < 4; y++)
  12375. {
  12376. for (uint32_t x = 0; x < 2; x++)
  12377. {
  12378. const int delta = (int)block_y[y][x + subblock * 2] - base;
  12379. const uint32_t c = 1;
  12380. uint32_t e = basisu::iabs(tv[c] - delta);
  12381. total_err += e * e;
  12382. }
  12383. if (total_err >= best_err)
  12384. break;
  12385. }
  12386. }
  12387. else
  12388. {
  12389. for (uint32_t y = 0; y < 4; y++)
  12390. {
  12391. for (uint32_t x = 0; x < 2; x++)
  12392. {
  12393. const int delta = (int)block_y[y][x + subblock * 2] - base;
  12394. const int delta2 = delta * 2;
  12395. uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
  12396. sels[y * 2 + x] = (uint8_t)c;
  12397. uint32_t e = basisu::iabs(tv[c] - delta);
  12398. total_err += e * e;
  12399. }
  12400. if (total_err >= best_err)
  12401. break;
  12402. }
  12403. }
  12404. }
  12405. if (total_err < best_err)
  12406. {
  12407. best_err = total_err;
  12408. best_inten = inten;
  12409. memcpy(best_sels, sels, 8);
  12410. }
  12411. } // inten
  12412. //g_inten_hist[best_inten][enc_range[subblock]]++;
  12413. dst_blk.m_bytes[3] |= (uint8_t)(best_inten << (subblock ? 2 : 5));
  12414. if (flip)
  12415. {
  12416. uint32_t ofs = subblock * 2;
  12417. for (uint32_t y = 0; y < 2; y++)
  12418. {
  12419. for (uint32_t x = 0; x < 4; x++)
  12420. {
  12421. uint32_t t = best_sels[y * 4 + x];
  12422. assert(ofs < 16);
  12423. l_bitmask |= ((t & 1) << ofs);
  12424. h_bitmask |= ((t >> 1) << ofs);
  12425. ofs += 4;
  12426. }
  12427. ofs = (int)ofs + 1 - 4 * 4;
  12428. }
  12429. }
  12430. else
  12431. {
  12432. uint32_t ofs = (subblock * 2) * 4;
  12433. for (uint32_t x = 0; x < 2; x++)
  12434. {
  12435. for (uint32_t y = 0; y < 4; y++)
  12436. {
  12437. uint32_t t = best_sels[y * 2 + x];
  12438. assert(ofs < 16);
  12439. l_bitmask |= ((t & 1) << ofs);
  12440. h_bitmask |= ((t >> 1) << ofs);
  12441. ++ofs;
  12442. }
  12443. }
  12444. }
  12445. } // subblock
  12446. dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
  12447. dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
  12448. dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
  12449. dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
  12450. return true;
  12451. }
  12452. const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7;
  12453. void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
  12454. {
  12455. eac_block& dst = *static_cast<eac_block*>(pDst);
  12456. const color32* pSrc_pixels = &block_pixels[0][0];
  12457. if ((!g_uastc_mode_has_alpha[unpacked_src_blk.m_mode]) || (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR))
  12458. {
  12459. const uint32_t a = (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) ? unpacked_src_blk.m_solid_color[3] : 255;
  12460. dst.m_base = a;
  12461. dst.m_table = 13;
  12462. dst.m_multiplier = 1;
  12463. memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
  12464. return;
  12465. }
  12466. uint32_t min_a = 255, max_a = 0;
  12467. for (uint32_t i = 0; i < 16; i++)
  12468. {
  12469. min_a = basisu::minimum<uint32_t>(min_a, pSrc_pixels[i].a);
  12470. max_a = basisu::maximum<uint32_t>(max_a, pSrc_pixels[i].a);
  12471. }
  12472. if (min_a == max_a)
  12473. {
  12474. dst.m_base = min_a;
  12475. dst.m_table = 13;
  12476. dst.m_multiplier = 1;
  12477. memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
  12478. return;
  12479. }
  12480. const uint32_t table = unpacked_src_blk.m_etc2_hints & 0xF;
  12481. const int multiplier = unpacked_src_blk.m_etc2_hints >> 4;
  12482. assert(multiplier >= 1);
  12483. dst.m_multiplier = multiplier;
  12484. dst.m_table = table;
  12485. const float range = (float)(g_eac_modifier_table[dst.m_table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]);
  12486. const int center = (int)roundf(basisu::lerp((float)min_a, (float)max_a, (float)(0 - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range));
  12487. dst.m_base = center;
  12488. const int8_t* pTable = &g_eac_modifier_table[dst.m_table][0];
  12489. uint32_t vals[8];
  12490. for (uint32_t j = 0; j < 8; j++)
  12491. vals[j] = clamp255(center + (pTable[j] * multiplier));
  12492. uint64_t sels = 0;
  12493. for (uint32_t i = 0; i < 16; i++)
  12494. {
  12495. const uint32_t a = block_pixels[i & 3][i >> 2].a;
  12496. const uint32_t err0 = (basisu::iabs(vals[0] - a) << 3) | 0;
  12497. const uint32_t err1 = (basisu::iabs(vals[1] - a) << 3) | 1;
  12498. const uint32_t err2 = (basisu::iabs(vals[2] - a) << 3) | 2;
  12499. const uint32_t err3 = (basisu::iabs(vals[3] - a) << 3) | 3;
  12500. const uint32_t err4 = (basisu::iabs(vals[4] - a) << 3) | 4;
  12501. const uint32_t err5 = (basisu::iabs(vals[5] - a) << 3) | 5;
  12502. const uint32_t err6 = (basisu::iabs(vals[6] - a) << 3) | 6;
  12503. const uint32_t err7 = (basisu::iabs(vals[7] - a) << 3) | 7;
  12504. const uint32_t min_err = basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(err0, err1, err2), err3), err4), err5), err6), err7);
  12505. const uint64_t best_index = min_err & 7;
  12506. sels |= (best_index << (45 - i * 3));
  12507. }
  12508. dst.set_selector_bits(sels);
  12509. }
  12510. bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst)
  12511. {
  12512. eac_block& dst_etc2_eac_a8_blk = *static_cast<eac_block*>(pDst);
  12513. decoder_etc_block& dst_etc1_blk = static_cast<decoder_etc_block*>(pDst)[1];
  12514. unpacked_uastc_block unpacked_src_blk;
  12515. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  12516. return false;
  12517. color32 block_pixels[4][4];
  12518. if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
  12519. {
  12520. const bool unpack_srgb = false;
  12521. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  12522. return false;
  12523. }
  12524. transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &dst_etc2_eac_a8_blk);
  12525. transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, &dst_etc1_blk);
  12526. return true;
  12527. }
  12528. static const uint8_t s_uastc5_to_bc1[32] = { 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1 };
  12529. static const uint8_t s_uastc4_to_bc1[16] = { 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1 };
  12530. static const uint8_t s_uastc3_to_bc1[8] = { 0, 0, 2, 2, 3, 3, 1, 1 };
  12531. static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 };
  12532. static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 };
  12533. const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 };
  12534. void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride)
  12535. {
  12536. uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v;
  12537. {
  12538. min0_v = max0_v = pPixels[0 * stride];
  12539. min1_v = max1_v = pPixels[1 * stride];
  12540. min2_v = max2_v = pPixels[2 * stride];
  12541. min3_v = max3_v = pPixels[3 * stride];
  12542. }
  12543. {
  12544. uint32_t v0 = pPixels[4 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
  12545. uint32_t v1 = pPixels[5 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
  12546. uint32_t v2 = pPixels[6 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
  12547. uint32_t v3 = pPixels[7 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
  12548. }
  12549. {
  12550. uint32_t v0 = pPixels[8 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
  12551. uint32_t v1 = pPixels[9 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
  12552. uint32_t v2 = pPixels[10 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
  12553. uint32_t v3 = pPixels[11 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
  12554. }
  12555. {
  12556. uint32_t v0 = pPixels[12 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
  12557. uint32_t v1 = pPixels[13 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
  12558. uint32_t v2 = pPixels[14 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
  12559. uint32_t v3 = pPixels[15 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
  12560. }
  12561. const uint32_t min_v = basisu::minimum(min0_v, min1_v, min2_v, min3_v);
  12562. const uint32_t max_v = basisu::maximum(max0_v, max1_v, max2_v, max3_v);
  12563. uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst);
  12564. pDst_bytes[0] = (uint8_t)max_v;
  12565. pDst_bytes[1] = (uint8_t)min_v;
  12566. if (max_v == min_v)
  12567. {
  12568. memset(pDst_bytes + 2, 0, 6);
  12569. return;
  12570. }
  12571. const uint32_t delta = max_v - min_v;
  12572. // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
  12573. const int t0 = delta * 13;
  12574. const int t1 = delta * 11;
  12575. const int t2 = delta * 9;
  12576. const int t3 = delta * 7;
  12577. const int t4 = delta * 5;
  12578. const int t5 = delta * 3;
  12579. const int t6 = delta * 1;
  12580. // BC4 floors in its divisions, which we compensate for with the 4 bias.
  12581. // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
  12582. const int bias = 4 - min_v * 14;
  12583. static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U };
  12584. static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U };
  12585. static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U };
  12586. static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U };
  12587. uint64_t a0, a1, a2, a3;
  12588. {
  12589. const int v0 = pPixels[0 * stride] * 14 + bias;
  12590. const int v1 = pPixels[1 * stride] * 14 + bias;
  12591. const int v2 = pPixels[2 * stride] * 14 + bias;
  12592. const int v3 = pPixels[3 * stride] * 14 + bias;
  12593. a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
  12594. a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
  12595. a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
  12596. a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
  12597. }
  12598. {
  12599. const int v0 = pPixels[4 * stride] * 14 + bias;
  12600. const int v1 = pPixels[5 * stride] * 14 + bias;
  12601. const int v2 = pPixels[6 * stride] * 14 + bias;
  12602. const int v3 = pPixels[7 * stride] * 14 + bias;
  12603. a0 |= (s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
  12604. a1 |= (s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
  12605. a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
  12606. a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
  12607. }
  12608. {
  12609. const int v0 = pPixels[8 * stride] * 14 + bias;
  12610. const int v1 = pPixels[9 * stride] * 14 + bias;
  12611. const int v2 = pPixels[10 * stride] * 14 + bias;
  12612. const int v3 = pPixels[11 * stride] * 14 + bias;
  12613. a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
  12614. a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
  12615. a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
  12616. a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
  12617. }
  12618. {
  12619. const int v0 = pPixels[12 * stride] * 14 + bias;
  12620. const int v1 = pPixels[13 * stride] * 14 + bias;
  12621. const int v2 = pPixels[14 * stride] * 14 + bias;
  12622. const int v3 = pPixels[15 * stride] * 14 + bias;
  12623. a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
  12624. a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
  12625. a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
  12626. a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
  12627. }
  12628. const uint64_t f = a0 | a1 | a2 | a3;
  12629. pDst_bytes[2] = (uint8_t)f;
  12630. pDst_bytes[3] = (uint8_t)(f >> 8U);
  12631. pDst_bytes[4] = (uint8_t)(f >> 16U);
  12632. pDst_bytes[5] = (uint8_t)(f >> 24U);
  12633. pDst_bytes[6] = (uint8_t)(f >> 32U);
  12634. pDst_bytes[7] = (uint8_t)(f >> 40U);
  12635. }
  12636. static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
  12637. {
  12638. uint32_t block_r[4], block_g[4], block_b[4];
  12639. block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2);
  12640. block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2);
  12641. block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
  12642. block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
  12643. int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
  12644. int dots[4];
  12645. for (uint32_t i = 0; i < 4; i++)
  12646. dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
  12647. int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
  12648. ar *= 2; ag *= 2; ab *= 2;
  12649. for (uint32_t i = 0; i < 16; i++)
  12650. {
  12651. const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab;
  12652. static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
  12653. // Rounding matters here!
  12654. // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality.
  12655. sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)];
  12656. }
  12657. }
  12658. static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
  12659. {
  12660. uint32_t block_r[4], block_g[4], block_b[4];
  12661. block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2);
  12662. block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2);
  12663. block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
  12664. block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
  12665. int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
  12666. int dots[4];
  12667. for (uint32_t i = 0; i < 4; i++)
  12668. dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
  12669. int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
  12670. ar *= 2; ag *= 2; ab *= 2;
  12671. static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
  12672. for (uint32_t i = 0; i < 16; i += 4)
  12673. {
  12674. const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab;
  12675. const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab;
  12676. const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab;
  12677. const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab;
  12678. sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
  12679. sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
  12680. sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
  12681. sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
  12682. }
  12683. }
  12684. struct vec3F { float c[3]; };
  12685. static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh)
  12686. {
  12687. // Derived from bc7enc16's LS function.
  12688. // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
  12689. // I did this in matrix form first, expanded out all the ops, then optimized it a bit.
  12690. uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0;
  12691. // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
  12692. // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
  12693. static const uint32_t s_weight_vals[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
  12694. uint32_t weight_accum = 0;
  12695. for (uint32_t i = 0; i < 16; i++)
  12696. {
  12697. const uint32_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
  12698. const uint32_t sel = pSelectors[i];
  12699. ut_r += r;
  12700. ut_g += g;
  12701. ut_b += b;
  12702. weight_accum += s_weight_vals[sel];
  12703. uq00_r += sel * r;
  12704. uq00_g += sel * g;
  12705. uq00_b += sel * b;
  12706. }
  12707. float q00_r = (float)uq00_r, q10_r = (float)uq10_r, t_r = (float)ut_r;
  12708. float q00_g = (float)uq00_g, q10_g = (float)uq10_g, t_g = (float)ut_g;
  12709. float q00_b = (float)uq00_b, q10_b = (float)uq10_b, t_b = (float)ut_b;
  12710. q10_r = t_r * 3.0f - q00_r;
  12711. q10_g = t_g * 3.0f - q00_g;
  12712. q10_b = t_b * 3.0f - q00_b;
  12713. float z00 = (float)((weight_accum >> 16) & 0xFF);
  12714. float z10 = (float)((weight_accum >> 8) & 0xFF);
  12715. float z11 = (float)(weight_accum & 0xFF);
  12716. float z01 = z10;
  12717. float det = z00 * z11 - z01 * z10;
  12718. if (fabs(det) < 1e-8f)
  12719. return false;
  12720. det = 3.0f / det;
  12721. float iz00, iz01, iz10, iz11;
  12722. iz00 = z11 * det;
  12723. iz01 = -z01 * det;
  12724. iz10 = -z10 * det;
  12725. iz11 = z00 * det;
  12726. pXl->c[0] = iz00 * q00_r + iz01 * q10_r; pXh->c[0] = iz10 * q00_r + iz11 * q10_r;
  12727. pXl->c[1] = iz00 * q00_g + iz01 * q10_g; pXh->c[1] = iz10 * q00_g + iz11 * q10_g;
  12728. pXl->c[2] = iz00 * q00_b + iz01 * q10_b; pXh->c[2] = iz10 * q00_b + iz11 * q10_b;
  12729. // Check and fix channel singularities - might not be needed, but is in UASTC's encoder.
  12730. for (uint32_t c = 0; c < 3; c++)
  12731. {
  12732. if ((pXl->c[c] < 0.0f) || (pXh->c[c] > 255.0f))
  12733. {
  12734. uint32_t lo_v = UINT32_MAX, hi_v = 0;
  12735. for (uint32_t i = 0; i < 16; i++)
  12736. {
  12737. lo_v = basisu::minimumu(lo_v, pColors[i].c[c]);
  12738. hi_v = basisu::maximumu(hi_v, pColors[i].c[c]);
  12739. }
  12740. if (lo_v == hi_v)
  12741. {
  12742. pXl->c[c] = (float)lo_v;
  12743. pXh->c[c] = (float)hi_v;
  12744. }
  12745. }
  12746. }
  12747. return true;
  12748. }
  12749. void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb)
  12750. {
  12751. dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
  12752. uint32_t mask = 0xAA;
  12753. uint32_t max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi;
  12754. uint32_t min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo;
  12755. if (min16 == max16)
  12756. {
  12757. // Always forbid 3 color blocks
  12758. // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
  12759. mask = 0;
  12760. // Make l > h
  12761. if (min16 > 0)
  12762. min16--;
  12763. else
  12764. {
  12765. // l = h = 0
  12766. assert(min16 == max16 && max16 == 0);
  12767. max16 = 1;
  12768. min16 = 0;
  12769. mask = 0x55;
  12770. }
  12771. assert(max16 > min16);
  12772. }
  12773. if (max16 < min16)
  12774. {
  12775. std::swap(max16, min16);
  12776. mask ^= 0x55;
  12777. }
  12778. pDst_block->set_low_color(static_cast<uint16_t>(max16));
  12779. pDst_block->set_high_color(static_cast<uint16_t>(min16));
  12780. pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
  12781. pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
  12782. pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
  12783. pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
  12784. }
  12785. static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
  12786. static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
  12787. // Good references: squish library, stb_dxt.
  12788. void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags)
  12789. {
  12790. const color32* pSrc_pixels = (const color32*)pPixels;
  12791. dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
  12792. int avg_r = -1, avg_g = 0, avg_b = 0;
  12793. int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
  12794. uint8_t sels[16];
  12795. const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
  12796. if (use_sels)
  12797. {
  12798. // Caller is jamming in their own selectors for us to try.
  12799. const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
  12800. static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
  12801. for (uint32_t i = 0; i < 16; i++)
  12802. sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
  12803. }
  12804. else
  12805. {
  12806. const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
  12807. uint32_t j;
  12808. for (j = 1; j < 16; j++)
  12809. if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
  12810. break;
  12811. if (j == 16)
  12812. {
  12813. encode_bc1_solid_block(pDst, fr, fg, fb);
  12814. return;
  12815. }
  12816. // Select 2 colors along the principle axis. (There must be a faster/simpler way.)
  12817. int total_r = fr, total_g = fg, total_b = fb;
  12818. int max_r = fr, max_g = fg, max_b = fb;
  12819. int min_r = fr, min_g = fg, min_b = fb;
  12820. for (uint32_t i = 1; i < 16; i++)
  12821. {
  12822. const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
  12823. max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
  12824. min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
  12825. total_r += r; total_g += g; total_b += b;
  12826. }
  12827. avg_r = (total_r + 8) >> 4;
  12828. avg_g = (total_g + 8) >> 4;
  12829. avg_b = (total_b + 8) >> 4;
  12830. int icov[6] = { 0, 0, 0, 0, 0, 0 };
  12831. for (uint32_t i = 0; i < 16; i++)
  12832. {
  12833. int r = (int)pSrc_pixels[i].r - avg_r;
  12834. int g = (int)pSrc_pixels[i].g - avg_g;
  12835. int b = (int)pSrc_pixels[i].b - avg_b;
  12836. icov[0] += r * r;
  12837. icov[1] += r * g;
  12838. icov[2] += r * b;
  12839. icov[3] += g * g;
  12840. icov[4] += g * b;
  12841. icov[5] += b * b;
  12842. }
  12843. float cov[6];
  12844. for (uint32_t i = 0; i < 6; i++)
  12845. cov[i] = static_cast<float>(icov[i])* (1.0f / 255.0f);
  12846. #if 0
  12847. // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference).
  12848. // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta
  12849. int saxis_r = max_r - min_r;
  12850. int saxis_g = max_g - min_g;
  12851. int saxis_b = max_b - min_b;
  12852. #else
  12853. float xr = (float)(max_r - min_r);
  12854. float xg = (float)(max_g - min_g);
  12855. float xb = (float)(max_b - min_b);
  12856. //float xr = (float)(max_r - avg_r); // max-avg is nearly the same, and doesn't require computing min's
  12857. //float xg = (float)(max_g - avg_g);
  12858. //float xb = (float)(max_b - avg_b);
  12859. for (uint32_t power_iter = 0; power_iter < 4; power_iter++)
  12860. {
  12861. float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
  12862. float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
  12863. float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
  12864. xr = r; xg = g; xb = b;
  12865. }
  12866. float k = basisu::maximum(fabsf(xr), fabsf(xg), fabsf(xb));
  12867. int saxis_r = 306, saxis_g = 601, saxis_b = 117;
  12868. if (k >= 2)
  12869. {
  12870. float m = 1024.0f / k;
  12871. saxis_r = (int)(xr * m);
  12872. saxis_g = (int)(xg * m);
  12873. saxis_b = (int)(xb * m);
  12874. }
  12875. #endif
  12876. int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0;
  12877. for (uint32_t i = 0; i < 16; i++)
  12878. {
  12879. int dot = pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b;
  12880. if (dot < low_dot)
  12881. {
  12882. low_dot = dot;
  12883. low_c = i;
  12884. }
  12885. if (dot > high_dot)
  12886. {
  12887. high_dot = dot;
  12888. high_c = i;
  12889. }
  12890. }
  12891. lr = to_5(pSrc_pixels[low_c].r);
  12892. lg = to_6(pSrc_pixels[low_c].g);
  12893. lb = to_5(pSrc_pixels[low_c].b);
  12894. hr = to_5(pSrc_pixels[high_c].r);
  12895. hg = to_6(pSrc_pixels[high_c].g);
  12896. hb = to_5(pSrc_pixels[high_c].b);
  12897. bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
  12898. } // if (use_sels)
  12899. const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
  12900. for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
  12901. {
  12902. // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
  12903. vec3F xl, xh;
  12904. if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
  12905. {
  12906. if (avg_r < 0)
  12907. {
  12908. int total_r = 0, total_g = 0, total_b = 0;
  12909. for (uint32_t i = 0; i < 16; i++)
  12910. {
  12911. total_r += pSrc_pixels[i].r;
  12912. total_g += pSrc_pixels[i].g;
  12913. total_b += pSrc_pixels[i].b;
  12914. }
  12915. avg_r = (total_r + 8) >> 4;
  12916. avg_g = (total_g + 8) >> 4;
  12917. avg_b = (total_b + 8) >> 4;
  12918. }
  12919. // All selectors equal - treat it as a solid block which should always be equal or better.
  12920. lr = g_bc1_match5_equals_1[avg_r].m_hi;
  12921. lg = g_bc1_match6_equals_1[avg_g].m_hi;
  12922. lb = g_bc1_match5_equals_1[avg_b].m_hi;
  12923. hr = g_bc1_match5_equals_1[avg_r].m_lo;
  12924. hg = g_bc1_match6_equals_1[avg_g].m_lo;
  12925. hb = g_bc1_match5_equals_1[avg_b].m_lo;
  12926. // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
  12927. }
  12928. else
  12929. {
  12930. lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
  12931. lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
  12932. lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
  12933. hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
  12934. hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
  12935. hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
  12936. }
  12937. bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
  12938. }
  12939. uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
  12940. uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
  12941. // Always forbid 3 color blocks
  12942. if (lc16 == hc16)
  12943. {
  12944. uint8_t mask = 0;
  12945. // Make l > h
  12946. if (hc16 > 0)
  12947. hc16--;
  12948. else
  12949. {
  12950. // lc16 = hc16 = 0
  12951. assert(lc16 == hc16 && hc16 == 0);
  12952. hc16 = 0;
  12953. lc16 = 1;
  12954. mask = 0x55; // select hc16
  12955. }
  12956. assert(lc16 > hc16);
  12957. pDst_block->set_low_color(static_cast<uint16_t>(lc16));
  12958. pDst_block->set_high_color(static_cast<uint16_t>(hc16));
  12959. pDst_block->m_selectors[0] = mask;
  12960. pDst_block->m_selectors[1] = mask;
  12961. pDst_block->m_selectors[2] = mask;
  12962. pDst_block->m_selectors[3] = mask;
  12963. }
  12964. else
  12965. {
  12966. uint8_t invert_mask = 0;
  12967. if (lc16 < hc16)
  12968. {
  12969. std::swap(lc16, hc16);
  12970. invert_mask = 0x55;
  12971. }
  12972. assert(lc16 > hc16);
  12973. pDst_block->set_low_color((uint16_t)lc16);
  12974. pDst_block->set_high_color((uint16_t)hc16);
  12975. uint32_t packed_sels = 0;
  12976. static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
  12977. for (uint32_t i = 0; i < 16; i++)
  12978. packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
  12979. pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
  12980. pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
  12981. pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
  12982. pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
  12983. }
  12984. }
  12985. void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags)
  12986. {
  12987. const color32* pSrc_pixels = (const color32*)pPixels;
  12988. dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
  12989. int avg_r = -1, avg_g = 0, avg_b = 0;
  12990. int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
  12991. uint8_t sels[16];
  12992. const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
  12993. if (use_sels)
  12994. {
  12995. // Caller is jamming in their own selectors for us to try.
  12996. const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
  12997. static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
  12998. for (uint32_t i = 0; i < 16; i++)
  12999. sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
  13000. }
  13001. else
  13002. {
  13003. const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
  13004. uint32_t j;
  13005. for (j = 1; j < 16; j++)
  13006. if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
  13007. break;
  13008. if (j == 16)
  13009. {
  13010. encode_bc1_solid_block(pDst, fr, fg, fb);
  13011. return;
  13012. }
  13013. // Select 2 colors along the principle axis. (There must be a faster/simpler way.)
  13014. int total_r = fr, total_g = fg, total_b = fb;
  13015. int max_r = fr, max_g = fg, max_b = fb;
  13016. int min_r = fr, min_g = fg, min_b = fb;
  13017. uint32_t grayscale_flag = (fr == fg) && (fr == fb);
  13018. for (uint32_t i = 1; i < 16; i++)
  13019. {
  13020. const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
  13021. grayscale_flag &= ((r == g) && (r == b));
  13022. max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
  13023. min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
  13024. total_r += r; total_g += g; total_b += b;
  13025. }
  13026. if (grayscale_flag)
  13027. {
  13028. // Grayscale blocks are a common enough case to specialize.
  13029. if ((max_r - min_r) < 2)
  13030. {
  13031. lr = lb = hr = hb = to_5(fr);
  13032. lg = hg = to_6(fr);
  13033. }
  13034. else
  13035. {
  13036. lr = lb = to_5(min_r);
  13037. lg = to_6(min_r);
  13038. hr = hb = to_5(max_r);
  13039. hg = to_6(max_r);
  13040. }
  13041. }
  13042. else
  13043. {
  13044. avg_r = (total_r + 8) >> 4;
  13045. avg_g = (total_g + 8) >> 4;
  13046. avg_b = (total_b + 8) >> 4;
  13047. // Find the shortest vector from a AABB corner to the block's average color.
  13048. // This is to help avoid outliers.
  13049. uint32_t dist[3][2];
  13050. dist[0][0] = basisu::square(min_r - avg_r) << 3; dist[0][1] = basisu::square(max_r - avg_r) << 3;
  13051. dist[1][0] = basisu::square(min_g - avg_g) << 3; dist[1][1] = basisu::square(max_g - avg_g) << 3;
  13052. dist[2][0] = basisu::square(min_b - avg_b) << 3; dist[2][1] = basisu::square(max_b - avg_b) << 3;
  13053. uint32_t min_d0 = (dist[0][0] + dist[1][0] + dist[2][0]);
  13054. uint32_t d4 = (dist[0][0] + dist[1][0] + dist[2][1]) | 4;
  13055. min_d0 = basisu::minimum(min_d0, d4);
  13056. uint32_t min_d1 = (dist[0][1] + dist[1][0] + dist[2][0]) | 1;
  13057. uint32_t d5 = (dist[0][1] + dist[1][0] + dist[2][1]) | 5;
  13058. min_d1 = basisu::minimum(min_d1, d5);
  13059. uint32_t d2 = (dist[0][0] + dist[1][1] + dist[2][0]) | 2;
  13060. min_d0 = basisu::minimum(min_d0, d2);
  13061. uint32_t d3 = (dist[0][1] + dist[1][1] + dist[2][0]) | 3;
  13062. min_d1 = basisu::minimum(min_d1, d3);
  13063. uint32_t d6 = (dist[0][0] + dist[1][1] + dist[2][1]) | 6;
  13064. min_d0 = basisu::minimum(min_d0, d6);
  13065. uint32_t d7 = (dist[0][1] + dist[1][1] + dist[2][1]) | 7;
  13066. min_d1 = basisu::minimum(min_d1, d7);
  13067. uint32_t min_d = basisu::minimum(min_d0, min_d1);
  13068. uint32_t best_i = min_d & 7;
  13069. int delta_r = (best_i & 1) ? (max_r - avg_r) : (avg_r - min_r);
  13070. int delta_g = (best_i & 2) ? (max_g - avg_g) : (avg_g - min_g);
  13071. int delta_b = (best_i & 4) ? (max_b - avg_b) : (avg_b - min_b);
  13072. // Note: if delta_r/g/b==0, we actually want to choose a single color, so the block average color optimization kicks in.
  13073. uint32_t low_c = 0, high_c = 0;
  13074. if ((delta_r | delta_g | delta_b) != 0)
  13075. {
  13076. // Now we have a smaller AABB going from the block's average color to a cornerpoint of the larger AABB.
  13077. // Project all pixels colors along the 4 vectors going from a smaller AABB cornerpoint to the opposite cornerpoint, find largest projection.
  13078. // One of these vectors will be a decent approximation of the block's PCA.
  13079. const int saxis0_r = delta_r, saxis0_g = delta_g, saxis0_b = delta_b;
  13080. int low_dot0 = INT_MAX, high_dot0 = INT_MIN;
  13081. int low_dot1 = INT_MAX, high_dot1 = INT_MIN;
  13082. int low_dot2 = INT_MAX, high_dot2 = INT_MIN;
  13083. int low_dot3 = INT_MAX, high_dot3 = INT_MIN;
  13084. //int low_c0, low_c1, low_c2, low_c3;
  13085. //int high_c0, high_c1, high_c2, high_c3;
  13086. for (uint32_t i = 0; i < 16; i++)
  13087. {
  13088. const int dotx = pSrc_pixels[i].r * saxis0_r;
  13089. const int doty = pSrc_pixels[i].g * saxis0_g;
  13090. const int dotz = pSrc_pixels[i].b * saxis0_b;
  13091. const int dot0 = ((dotz + dotx + doty) << 4) + i;
  13092. const int dot1 = ((dotz - dotx - doty) << 4) + i;
  13093. const int dot2 = ((dotz - dotx + doty) << 4) + i;
  13094. const int dot3 = ((dotz + dotx - doty) << 4) + i;
  13095. if (dot0 < low_dot0)
  13096. {
  13097. low_dot0 = dot0;
  13098. //low_c0 = i;
  13099. }
  13100. if ((dot0 ^ 15) > high_dot0)
  13101. {
  13102. high_dot0 = dot0 ^ 15;
  13103. //high_c0 = i;
  13104. }
  13105. if (dot1 < low_dot1)
  13106. {
  13107. low_dot1 = dot1;
  13108. //low_c1 = i;
  13109. }
  13110. if ((dot1 ^ 15) > high_dot1)
  13111. {
  13112. high_dot1 = dot1 ^ 15;
  13113. //high_c1 = i;
  13114. }
  13115. if (dot2 < low_dot2)
  13116. {
  13117. low_dot2 = dot2;
  13118. //low_c2 = i;
  13119. }
  13120. if ((dot2 ^ 15) > high_dot2)
  13121. {
  13122. high_dot2 = dot2 ^ 15;
  13123. //high_c2 = i;
  13124. }
  13125. if (dot3 < low_dot3)
  13126. {
  13127. low_dot3 = dot3;
  13128. //low_c3 = i;
  13129. }
  13130. if ((dot3 ^ 15) > high_dot3)
  13131. {
  13132. high_dot3 = dot3 ^ 15;
  13133. //high_c3 = i;
  13134. }
  13135. }
  13136. low_c = low_dot0 & 15;
  13137. high_c = ~high_dot0 & 15;
  13138. uint32_t r = (high_dot0 & ~15) - (low_dot0 & ~15);
  13139. uint32_t tr = (high_dot1 & ~15) - (low_dot1 & ~15);
  13140. if (tr > r) {
  13141. low_c = low_dot1 & 15;
  13142. high_c = ~high_dot1 & 15;
  13143. r = tr;
  13144. }
  13145. tr = (high_dot2 & ~15) - (low_dot2 & ~15);
  13146. if (tr > r) {
  13147. low_c = low_dot2 & 15;
  13148. high_c = ~high_dot2 & 15;
  13149. r = tr;
  13150. }
  13151. tr = (high_dot3 & ~15) - (low_dot3 & ~15);
  13152. if (tr > r) {
  13153. low_c = low_dot3 & 15;
  13154. high_c = ~high_dot3 & 15;
  13155. }
  13156. }
  13157. lr = to_5(pSrc_pixels[low_c].r);
  13158. lg = to_6(pSrc_pixels[low_c].g);
  13159. lb = to_5(pSrc_pixels[low_c].b);
  13160. hr = to_5(pSrc_pixels[high_c].r);
  13161. hg = to_6(pSrc_pixels[high_c].g);
  13162. hb = to_5(pSrc_pixels[high_c].b);
  13163. }
  13164. bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
  13165. } // if (use_sels)
  13166. const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
  13167. for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
  13168. {
  13169. int prev_lr = lr, prev_lg = lg, prev_lb = lb, prev_hr = hr, prev_hg = hg, prev_hb = hb;
  13170. // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
  13171. vec3F xl, xh;
  13172. if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
  13173. {
  13174. if (avg_r < 0)
  13175. {
  13176. int total_r = 0, total_g = 0, total_b = 0;
  13177. for (uint32_t i = 0; i < 16; i++)
  13178. {
  13179. total_r += pSrc_pixels[i].r;
  13180. total_g += pSrc_pixels[i].g;
  13181. total_b += pSrc_pixels[i].b;
  13182. }
  13183. avg_r = (total_r + 8) >> 4;
  13184. avg_g = (total_g + 8) >> 4;
  13185. avg_b = (total_b + 8) >> 4;
  13186. }
  13187. // All selectors equal - treat it as a solid block which should always be equal or better.
  13188. lr = g_bc1_match5_equals_1[avg_r].m_hi;
  13189. lg = g_bc1_match6_equals_1[avg_g].m_hi;
  13190. lb = g_bc1_match5_equals_1[avg_b].m_hi;
  13191. hr = g_bc1_match5_equals_1[avg_r].m_lo;
  13192. hg = g_bc1_match6_equals_1[avg_g].m_lo;
  13193. hb = g_bc1_match5_equals_1[avg_b].m_lo;
  13194. // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
  13195. }
  13196. else
  13197. {
  13198. lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
  13199. lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
  13200. lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
  13201. hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
  13202. hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
  13203. hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
  13204. }
  13205. if ((prev_lr == lr) && (prev_lg == lg) && (prev_lb == lb) && (prev_hr == hr) && (prev_hg == hg) && (prev_hb == hb))
  13206. break;
  13207. bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
  13208. }
  13209. uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
  13210. uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
  13211. // Always forbid 3 color blocks
  13212. if (lc16 == hc16)
  13213. {
  13214. uint8_t mask = 0;
  13215. // Make l > h
  13216. if (hc16 > 0)
  13217. hc16--;
  13218. else
  13219. {
  13220. // lc16 = hc16 = 0
  13221. assert(lc16 == hc16 && hc16 == 0);
  13222. hc16 = 0;
  13223. lc16 = 1;
  13224. mask = 0x55; // select hc16
  13225. }
  13226. assert(lc16 > hc16);
  13227. pDst_block->set_low_color(static_cast<uint16_t>(lc16));
  13228. pDst_block->set_high_color(static_cast<uint16_t>(hc16));
  13229. pDst_block->m_selectors[0] = mask;
  13230. pDst_block->m_selectors[1] = mask;
  13231. pDst_block->m_selectors[2] = mask;
  13232. pDst_block->m_selectors[3] = mask;
  13233. }
  13234. else
  13235. {
  13236. uint8_t invert_mask = 0;
  13237. if (lc16 < hc16)
  13238. {
  13239. std::swap(lc16, hc16);
  13240. invert_mask = 0x55;
  13241. }
  13242. assert(lc16 > hc16);
  13243. pDst_block->set_low_color((uint16_t)lc16);
  13244. pDst_block->set_high_color((uint16_t)hc16);
  13245. uint32_t packed_sels = 0;
  13246. static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
  13247. for (uint32_t i = 0; i < 16; i++)
  13248. packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
  13249. pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
  13250. pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
  13251. pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
  13252. pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
  13253. }
  13254. }
  13255. // Scale the UASTC first subset endpoints and first plane's weight indices directly to BC1's - fastest.
  13256. void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst)
  13257. {
  13258. const uint32_t mode = unpacked_src_blk.m_mode;
  13259. const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
  13260. dxt1_block& b = *static_cast<dxt1_block*>(pDst);
  13261. const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
  13262. const uint32_t total_comps = g_uastc_mode_comps[mode];
  13263. if (total_comps == 2)
  13264. {
  13265. const uint32_t l = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant;
  13266. const uint32_t h = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant;
  13267. b.set_low_color(dxt1_block::pack_color(color32(l, l, l, 255), true, 127));
  13268. b.set_high_color(dxt1_block::pack_color(color32(h, h, h, 255), true, 127));
  13269. }
  13270. else
  13271. {
  13272. b.set_low_color(dxt1_block::pack_color(
  13273. color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant,
  13274. g_astc_unquant[endpoint_range][astc_blk.m_endpoints[2]].m_unquant,
  13275. g_astc_unquant[endpoint_range][astc_blk.m_endpoints[4]].m_unquant,
  13276. 255), true, 127)
  13277. );
  13278. b.set_high_color(dxt1_block::pack_color(
  13279. color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant,
  13280. g_astc_unquant[endpoint_range][astc_blk.m_endpoints[3]].m_unquant,
  13281. g_astc_unquant[endpoint_range][astc_blk.m_endpoints[5]].m_unquant,
  13282. 255), true, 127)
  13283. );
  13284. }
  13285. if (b.get_low_color() == b.get_high_color())
  13286. {
  13287. // Always forbid 3 color blocks
  13288. uint16_t lc16 = (uint16_t)b.get_low_color();
  13289. uint16_t hc16 = (uint16_t)b.get_high_color();
  13290. uint8_t mask = 0;
  13291. // Make l > h
  13292. if (hc16 > 0)
  13293. hc16--;
  13294. else
  13295. {
  13296. // lc16 = hc16 = 0
  13297. assert(lc16 == hc16 && hc16 == 0);
  13298. hc16 = 0;
  13299. lc16 = 1;
  13300. mask = 0x55; // select hc16
  13301. }
  13302. assert(lc16 > hc16);
  13303. b.set_low_color(static_cast<uint16_t>(lc16));
  13304. b.set_high_color(static_cast<uint16_t>(hc16));
  13305. b.m_selectors[0] = mask;
  13306. b.m_selectors[1] = mask;
  13307. b.m_selectors[2] = mask;
  13308. b.m_selectors[3] = mask;
  13309. }
  13310. else
  13311. {
  13312. bool invert = false;
  13313. if (b.get_low_color() < b.get_high_color())
  13314. {
  13315. std::swap(b.m_low_color[0], b.m_high_color[0]);
  13316. std::swap(b.m_low_color[1], b.m_high_color[1]);
  13317. invert = true;
  13318. }
  13319. const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
  13320. const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
  13321. uint32_t sels = 0;
  13322. for (int i = 15; i >= 0; --i)
  13323. {
  13324. uint32_t s = pTran[astc_blk.m_weights[i << plane_shift]];
  13325. if (invert)
  13326. s ^= 1;
  13327. sels = (sels << 2) | s;
  13328. }
  13329. b.m_selectors[0] = sels & 0xFF;
  13330. b.m_selectors[1] = (sels >> 8) & 0xFF;
  13331. b.m_selectors[2] = (sels >> 16) & 0xFF;
  13332. b.m_selectors[3] = (sels >> 24) & 0xFF;
  13333. }
  13334. }
  13335. // Scale the UASTC first plane's weight indices to BC1, use 1 or 2 least squares passes to compute endpoints - no PCA needed.
  13336. void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality)
  13337. {
  13338. const uint32_t mode = unpacked_src_blk.m_mode;
  13339. const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
  13340. dxt1_block& b = *static_cast<dxt1_block*>(pDst);
  13341. b.set_low_color(1);
  13342. b.set_high_color(0);
  13343. const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
  13344. const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
  13345. uint32_t sels = 0;
  13346. for (int i = 15; i >= 0; --i)
  13347. {
  13348. sels <<= 2;
  13349. sels |= pTran[astc_blk.m_weights[i << plane_shift]];
  13350. }
  13351. b.m_selectors[0] = sels & 0xFF;
  13352. b.m_selectors[1] = (sels >> 8) & 0xFF;
  13353. b.m_selectors[2] = (sels >> 16) & 0xFF;
  13354. b.m_selectors[3] = (sels >> 24) & 0xFF;
  13355. encode_bc1(&b, (const uint8_t*)&block_pixels[0][0].c[0], (high_quality ? cEncodeBC1HighQuality : 0) | cEncodeBC1UseSelectors);
  13356. }
  13357. bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality)
  13358. {
  13359. unpacked_uastc_block unpacked_src_blk;
  13360. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  13361. return false;
  13362. const uint32_t mode = unpacked_src_blk.m_mode;
  13363. if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
  13364. {
  13365. encode_bc1_solid_block(pDst, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
  13366. return true;
  13367. }
  13368. if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
  13369. transcode_uastc_to_bc1_hint0(unpacked_src_blk, pDst);
  13370. else
  13371. {
  13372. color32 block_pixels[4][4];
  13373. const bool unpack_srgb = false;
  13374. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  13375. return false;
  13376. if (unpacked_src_blk.m_bc1_hint1)
  13377. transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pDst, high_quality);
  13378. else
  13379. encode_bc1(pDst, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
  13380. }
  13381. return true;
  13382. }
  13383. static void write_bc4_solid_block(uint8_t* pDst, uint32_t a)
  13384. {
  13385. pDst[0] = (uint8_t)a;
  13386. pDst[1] = (uint8_t)a;
  13387. memset(pDst + 2, 0, 6);
  13388. }
  13389. bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality)
  13390. {
  13391. unpacked_uastc_block unpacked_src_blk;
  13392. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  13393. return false;
  13394. const uint32_t mode = unpacked_src_blk.m_mode;
  13395. void* pBC4_block = pDst;
  13396. dxt1_block* pBC1_block = &static_cast<dxt1_block*>(pDst)[1];
  13397. if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
  13398. {
  13399. write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.a);
  13400. encode_bc1_solid_block(pBC1_block, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
  13401. return true;
  13402. }
  13403. color32 block_pixels[4][4];
  13404. const bool unpack_srgb = false;
  13405. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  13406. return false;
  13407. basist::encode_bc4(pBC4_block, &block_pixels[0][0].a, sizeof(color32));
  13408. if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
  13409. transcode_uastc_to_bc1_hint0(unpacked_src_blk, pBC1_block);
  13410. else
  13411. {
  13412. if (unpacked_src_blk.m_bc1_hint1)
  13413. transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pBC1_block, high_quality);
  13414. else
  13415. encode_bc1(pBC1_block, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
  13416. }
  13417. return true;
  13418. }
  13419. bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
  13420. {
  13421. BASISU_NOTE_UNUSED(high_quality);
  13422. unpacked_uastc_block unpacked_src_blk;
  13423. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  13424. return false;
  13425. const uint32_t mode = unpacked_src_blk.m_mode;
  13426. void* pBC4_block = pDst;
  13427. if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
  13428. {
  13429. write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.c[chan0]);
  13430. return true;
  13431. }
  13432. color32 block_pixels[4][4];
  13433. const bool unpack_srgb = false;
  13434. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  13435. return false;
  13436. basist::encode_bc4(pBC4_block, &block_pixels[0][0].c[chan0], sizeof(color32));
  13437. return true;
  13438. }
  13439. bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
  13440. {
  13441. BASISU_NOTE_UNUSED(high_quality);
  13442. unpacked_uastc_block unpacked_src_blk;
  13443. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  13444. return false;
  13445. const uint32_t mode = unpacked_src_blk.m_mode;
  13446. void* pBC4_block0 = pDst;
  13447. void* pBC4_block1 = (uint8_t*)pDst + 8;
  13448. if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
  13449. {
  13450. write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block0), unpacked_src_blk.m_solid_color.c[chan0]);
  13451. write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block1), unpacked_src_blk.m_solid_color.c[chan1]);
  13452. return true;
  13453. }
  13454. color32 block_pixels[4][4];
  13455. const bool unpack_srgb = false;
  13456. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  13457. return false;
  13458. basist::encode_bc4(pBC4_block0, &block_pixels[0][0].c[chan0], sizeof(color32));
  13459. basist::encode_bc4(pBC4_block1, &block_pixels[0][0].c[chan1], sizeof(color32));
  13460. return true;
  13461. }
  13462. static const uint8_t s_etc2_eac_bit_ofs[16] = { 45, 33, 21, 9, 42, 30, 18, 6, 39, 27, 15, 3, 36, 24, 12, 0 };
  13463. static void pack_eac_solid_block(eac_block& blk, uint32_t a)
  13464. {
  13465. blk.m_base = static_cast<uint8_t>(a);
  13466. blk.m_table = 13;
  13467. blk.m_multiplier = 0;
  13468. memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
  13469. return;
  13470. }
  13471. // Only checks 4 tables.
  13472. static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
  13473. {
  13474. uint32_t min_alpha = 255, max_alpha = 0;
  13475. for (uint32_t i = 0; i < 16; i++)
  13476. {
  13477. const uint32_t a = pPixels[i * stride];
  13478. if (a < min_alpha) min_alpha = a;
  13479. if (a > max_alpha) max_alpha = a;
  13480. }
  13481. if (min_alpha == max_alpha)
  13482. {
  13483. pack_eac_solid_block(blk, min_alpha);
  13484. return;
  13485. }
  13486. const uint32_t alpha_range = max_alpha - min_alpha;
  13487. const uint32_t SINGLE_TABLE_THRESH = 5;
  13488. if (alpha_range <= SINGLE_TABLE_THRESH)
  13489. {
  13490. // If alpha_range <= 5 table 13 is lossless
  13491. int base = clamp255((int)max_alpha - 2);
  13492. blk.m_base = base;
  13493. blk.m_multiplier = 1;
  13494. blk.m_table = 13;
  13495. base -= 3;
  13496. uint64_t packed_sels = 0;
  13497. for (uint32_t i = 0; i < 16; i++)
  13498. {
  13499. const int a = pPixels[i * stride];
  13500. static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
  13501. int sel = a - base;
  13502. assert(sel >= 0 && sel <= 5);
  13503. packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
  13504. }
  13505. blk.set_selector_bits(packed_sels);
  13506. return;
  13507. }
  13508. const uint32_t T0 = 2, T1 = 8, T2 = 11, T3 = 13;
  13509. static const uint8_t s_tables[4] = { T0, T1, T2, T3 };
  13510. int base[4], mul[4];
  13511. uint32_t mul_or = 0;
  13512. for (uint32_t i = 0; i < 4; i++)
  13513. {
  13514. const uint32_t table = s_tables[i];
  13515. const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
  13516. base[i] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
  13517. mul[i] = clampi((int)roundf(alpha_range / range), 1, 15);
  13518. mul_or |= mul[i];
  13519. }
  13520. uint32_t total_err[4] = { 0, 0, 0, 0 };
  13521. uint8_t sels[4][16];
  13522. for (uint32_t i = 0; i < 16; i++)
  13523. {
  13524. const int a = pPixels[i * stride];
  13525. uint32_t l0 = UINT32_MAX, l1 = UINT32_MAX, l2 = UINT32_MAX, l3 = UINT32_MAX;
  13526. if ((a < 7) || (a > (255 - 7)))
  13527. {
  13528. for (uint32_t s = 0; s < 8; s++)
  13529. {
  13530. const int v0 = clamp255(mul[0] * g_eac_modifier_table[T0][s] + base[0]);
  13531. const int v1 = clamp255(mul[1] * g_eac_modifier_table[T1][s] + base[1]);
  13532. const int v2 = clamp255(mul[2] * g_eac_modifier_table[T2][s] + base[2]);
  13533. const int v3 = clamp255(mul[3] * g_eac_modifier_table[T3][s] + base[3]);
  13534. l0 = basisu::minimum(l0, (basisu::iabs(v0 - a) << 3) | s);
  13535. l1 = basisu::minimum(l1, (basisu::iabs(v1 - a) << 3) | s);
  13536. l2 = basisu::minimum(l2, (basisu::iabs(v2 - a) << 3) | s);
  13537. l3 = basisu::minimum(l3, (basisu::iabs(v3 - a) << 3) | s);
  13538. }
  13539. }
  13540. else if (mul_or == 1)
  13541. {
  13542. const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
  13543. for (uint32_t s = 0; s < 8; s++)
  13544. {
  13545. const int v0 = g_eac_modifier_table[T0][s] + a0;
  13546. const int v1 = g_eac_modifier_table[T1][s] + a1;
  13547. const int v2 = g_eac_modifier_table[T2][s] + a2;
  13548. const int v3 = g_eac_modifier_table[T3][s] + a3;
  13549. l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
  13550. l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
  13551. l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
  13552. l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
  13553. }
  13554. }
  13555. else
  13556. {
  13557. const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
  13558. for (uint32_t s = 0; s < 8; s++)
  13559. {
  13560. const int v0 = mul[0] * g_eac_modifier_table[T0][s] + a0;
  13561. const int v1 = mul[1] * g_eac_modifier_table[T1][s] + a1;
  13562. const int v2 = mul[2] * g_eac_modifier_table[T2][s] + a2;
  13563. const int v3 = mul[3] * g_eac_modifier_table[T3][s] + a3;
  13564. l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
  13565. l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
  13566. l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
  13567. l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
  13568. }
  13569. }
  13570. sels[0][i] = l0 & 7;
  13571. sels[1][i] = l1 & 7;
  13572. sels[2][i] = l2 & 7;
  13573. sels[3][i] = l3 & 7;
  13574. total_err[0] += basisu::square<uint32_t>(l0 >> 3);
  13575. total_err[1] += basisu::square<uint32_t>(l1 >> 3);
  13576. total_err[2] += basisu::square<uint32_t>(l2 >> 3);
  13577. total_err[3] += basisu::square<uint32_t>(l3 >> 3);
  13578. }
  13579. uint32_t min_err = total_err[0], min_index = 0;
  13580. for (uint32_t i = 1; i < 4; i++)
  13581. {
  13582. if (total_err[i] < min_err)
  13583. {
  13584. min_err = total_err[i];
  13585. min_index = i;
  13586. }
  13587. }
  13588. blk.m_base = base[min_index];
  13589. blk.m_multiplier = mul[min_index];
  13590. blk.m_table = s_tables[min_index];
  13591. uint64_t packed_sels = 0;
  13592. const uint8_t* pSels = &sels[min_index][0];
  13593. for (uint32_t i = 0; i < 16; i++)
  13594. packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
  13595. blk.set_selector_bits(packed_sels);
  13596. }
  13597. // Checks all 16 tables. Around ~2 dB better vs. pack_eac(), ~1.2 dB less than near-optimal.
  13598. static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
  13599. {
  13600. uint32_t min_alpha = 255, max_alpha = 0;
  13601. for (uint32_t i = 0; i < 16; i++)
  13602. {
  13603. const uint32_t a = pPixels[i * stride];
  13604. if (a < min_alpha) min_alpha = a;
  13605. if (a > max_alpha) max_alpha = a;
  13606. }
  13607. if (min_alpha == max_alpha)
  13608. {
  13609. pack_eac_solid_block(blk, min_alpha);
  13610. return;
  13611. }
  13612. const uint32_t alpha_range = max_alpha - min_alpha;
  13613. const uint32_t SINGLE_TABLE_THRESH = 5;
  13614. if (alpha_range <= SINGLE_TABLE_THRESH)
  13615. {
  13616. // If alpha_range <= 5 table 13 is lossless
  13617. int base = clamp255((int)max_alpha - 2);
  13618. blk.m_base = base;
  13619. blk.m_multiplier = 1;
  13620. blk.m_table = 13;
  13621. base -= 3;
  13622. uint64_t packed_sels = 0;
  13623. for (uint32_t i = 0; i < 16; i++)
  13624. {
  13625. const int a = pPixels[i * stride];
  13626. static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
  13627. int sel = a - base;
  13628. assert(sel >= 0 && sel <= 5);
  13629. packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
  13630. }
  13631. blk.set_selector_bits(packed_sels);
  13632. return;
  13633. }
  13634. int base[16], mul[16];
  13635. for (uint32_t table = 0; table < 16; table++)
  13636. {
  13637. const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
  13638. base[table] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
  13639. mul[table] = clampi((int)roundf(alpha_range / range), 1, 15);
  13640. }
  13641. uint32_t total_err[16];
  13642. memset(total_err, 0, sizeof(total_err));
  13643. uint8_t sels[16][16];
  13644. for (uint32_t table = 0; table < 16; table++)
  13645. {
  13646. const int8_t* pTable = &g_eac_modifier_table[table][0];
  13647. const int m = mul[table], b = base[table];
  13648. uint32_t prev_l = 0, prev_a = UINT32_MAX;
  13649. for (uint32_t i = 0; i < 16; i++)
  13650. {
  13651. const int a = pPixels[i * stride];
  13652. if ((uint32_t)a == prev_a)
  13653. {
  13654. sels[table][i] = prev_l & 7;
  13655. total_err[table] += basisu::square<uint32_t>(prev_l >> 3);
  13656. }
  13657. else
  13658. {
  13659. uint32_t l = basisu::iabs(clamp255(m * pTable[0] + b) - a) << 3;
  13660. l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[1] + b) - a) << 3) | 1);
  13661. l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[2] + b) - a) << 3) | 2);
  13662. l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[3] + b) - a) << 3) | 3);
  13663. l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[4] + b) - a) << 3) | 4);
  13664. l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[5] + b) - a) << 3) | 5);
  13665. l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[6] + b) - a) << 3) | 6);
  13666. l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[7] + b) - a) << 3) | 7);
  13667. sels[table][i] = l & 7;
  13668. total_err[table] += basisu::square<uint32_t>(l >> 3);
  13669. prev_l = l;
  13670. prev_a = a;
  13671. }
  13672. }
  13673. }
  13674. uint32_t min_err = total_err[0], min_index = 0;
  13675. for (uint32_t i = 1; i < 16; i++)
  13676. {
  13677. if (total_err[i] < min_err)
  13678. {
  13679. min_err = total_err[i];
  13680. min_index = i;
  13681. }
  13682. }
  13683. blk.m_base = base[min_index];
  13684. blk.m_multiplier = mul[min_index];
  13685. blk.m_table = min_index;
  13686. uint64_t packed_sels = 0;
  13687. const uint8_t* pSels = &sels[min_index][0];
  13688. for (uint32_t i = 0; i < 16; i++)
  13689. packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
  13690. blk.set_selector_bits(packed_sels);
  13691. }
  13692. bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
  13693. {
  13694. unpacked_uastc_block unpacked_src_blk;
  13695. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  13696. return false;
  13697. const uint32_t mode = unpacked_src_blk.m_mode;
  13698. if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
  13699. {
  13700. pack_eac_solid_block(*static_cast<eac_block*>(pDst), unpacked_src_blk.m_solid_color.c[chan0]);
  13701. return true;
  13702. }
  13703. color32 block_pixels[4][4];
  13704. const bool unpack_srgb = false;
  13705. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  13706. return false;
  13707. if (chan0 == 3)
  13708. transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, pDst);
  13709. else
  13710. (high_quality ? pack_eac_high_quality : pack_eac)(*static_cast<eac_block*>(pDst), &block_pixels[0][0].c[chan0], sizeof(color32));
  13711. return true;
  13712. }
  13713. bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
  13714. {
  13715. unpacked_uastc_block unpacked_src_blk;
  13716. if (!unpack_uastc(src_blk, unpacked_src_blk, false))
  13717. return false;
  13718. const uint32_t mode = unpacked_src_blk.m_mode;
  13719. if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
  13720. {
  13721. pack_eac_solid_block(static_cast<eac_block*>(pDst)[0], unpacked_src_blk.m_solid_color.c[chan0]);
  13722. pack_eac_solid_block(static_cast<eac_block*>(pDst)[1], unpacked_src_blk.m_solid_color.c[chan1]);
  13723. return true;
  13724. }
  13725. color32 block_pixels[4][4];
  13726. const bool unpack_srgb = false;
  13727. if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
  13728. return false;
  13729. if (chan0 == 3)
  13730. transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[0]);
  13731. else
  13732. (high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[0], &block_pixels[0][0].c[chan0], sizeof(color32));
  13733. if (chan1 == 3)
  13734. transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[1]);
  13735. else
  13736. (high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[1], &block_pixels[0][0].c[chan1], sizeof(color32));
  13737. return true;
  13738. }
  13739. // PVRTC1
  13740. static void fixup_pvrtc1_4_modulation_rgb(
  13741. const uastc_block* pSrc_blocks,
  13742. const uint32_t* pPVRTC_endpoints,
  13743. void* pDst_blocks,
  13744. uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha)
  13745. {
  13746. const uint32_t x_mask = num_blocks_x - 1;
  13747. const uint32_t y_mask = num_blocks_y - 1;
  13748. const uint32_t x_bits = basisu::total_bits(x_mask);
  13749. const uint32_t y_bits = basisu::total_bits(y_mask);
  13750. const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
  13751. //const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
  13752. const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
  13753. uint32_t block_index = 0;
  13754. // really 3x3
  13755. int e0[4][4], e1[4][4];
  13756. for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
  13757. {
  13758. const uint32_t* pE_rows[3];
  13759. for (int ey = 0; ey < 3; ey++)
  13760. {
  13761. int by = y + ey - 1;
  13762. const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
  13763. pE_rows[ey] = pE;
  13764. for (int ex = 0; ex < 3; ex++)
  13765. {
  13766. int bx = 0 + ex - 1;
  13767. const uint32_t e = pE[bx & x_mask];
  13768. e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
  13769. e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
  13770. }
  13771. }
  13772. const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
  13773. for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
  13774. {
  13775. const uastc_block& src_block = pSrc_blocks[block_index];
  13776. color32 block_pixels[4][4];
  13777. unpack_uastc(src_block, &block_pixels[0][0], false);
  13778. if (from_alpha)
  13779. {
  13780. // Just set RGB to alpha to avoid adding complexity below.
  13781. for (uint32_t i = 0; i < 16; i++)
  13782. {
  13783. const uint8_t a = ((color32*)block_pixels)[i].a;
  13784. ((color32*)block_pixels)[i].set(a, a, a, 255);
  13785. }
  13786. }
  13787. const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
  13788. uint32_t swizzled = x_swizzle | y_swizzle;
  13789. if (num_blocks_x != num_blocks_y)
  13790. {
  13791. swizzled &= swizzle_mask;
  13792. if (num_blocks_x > num_blocks_y)
  13793. swizzled |= ((x >> min_bits) << (min_bits * 2));
  13794. else
  13795. swizzled |= ((y >> min_bits) << (min_bits * 2));
  13796. }
  13797. pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
  13798. pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
  13799. {
  13800. const uint32_t ex = 2;
  13801. int bx = x + ex - 1;
  13802. bx &= x_mask;
  13803. #define DO_ROW(ey) \
  13804. { \
  13805. const uint32_t e = pE_rows[ey][bx]; \
  13806. e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
  13807. e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
  13808. }
  13809. DO_ROW(0);
  13810. DO_ROW(1);
  13811. DO_ROW(2);
  13812. #undef DO_ROW
  13813. }
  13814. uint32_t mod = 0;
  13815. #define DO_PIX(lx, ly, w0, w1, w2, w3) \
  13816. { \
  13817. int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
  13818. int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
  13819. int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \
  13820. int dl = cb_l - ca_l; \
  13821. int vl = cl - ca_l; \
  13822. int p = vl * 16; \
  13823. if (ca_l > cb_l) { p = -p; dl = -dl; } \
  13824. uint32_t m = 0; \
  13825. if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
  13826. if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
  13827. if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
  13828. mod |= m; \
  13829. }
  13830. {
  13831. const uint32_t ex = 0, ey = 0;
  13832. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  13833. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  13834. DO_PIX(0, 0, 4, 4, 4, 4);
  13835. DO_PIX(1, 0, 2, 6, 2, 6);
  13836. DO_PIX(0, 1, 2, 2, 6, 6);
  13837. DO_PIX(1, 1, 1, 3, 3, 9);
  13838. }
  13839. {
  13840. const uint32_t ex = 1, ey = 0;
  13841. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  13842. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  13843. DO_PIX(2, 0, 8, 0, 8, 0);
  13844. DO_PIX(3, 0, 6, 2, 6, 2);
  13845. DO_PIX(2, 1, 4, 0, 12, 0);
  13846. DO_PIX(3, 1, 3, 1, 9, 3);
  13847. }
  13848. {
  13849. const uint32_t ex = 0, ey = 1;
  13850. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  13851. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  13852. DO_PIX(0, 2, 8, 8, 0, 0);
  13853. DO_PIX(1, 2, 4, 12, 0, 0);
  13854. DO_PIX(0, 3, 6, 6, 2, 2);
  13855. DO_PIX(1, 3, 3, 9, 1, 3);
  13856. }
  13857. {
  13858. const uint32_t ex = 1, ey = 1;
  13859. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  13860. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  13861. DO_PIX(2, 2, 16, 0, 0, 0);
  13862. DO_PIX(3, 2, 12, 4, 0, 0);
  13863. DO_PIX(2, 3, 12, 0, 4, 0);
  13864. DO_PIX(3, 3, 9, 3, 3, 1);
  13865. }
  13866. #undef DO_PIX
  13867. pDst_block->m_modulation = mod;
  13868. e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
  13869. e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
  13870. e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
  13871. e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
  13872. e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
  13873. e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
  13874. } // x
  13875. } // y
  13876. }
  13877. static void fixup_pvrtc1_4_modulation_rgba(
  13878. const uastc_block* pSrc_blocks,
  13879. const uint32_t* pPVRTC_endpoints,
  13880. void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
  13881. {
  13882. const uint32_t x_mask = num_blocks_x - 1;
  13883. const uint32_t y_mask = num_blocks_y - 1;
  13884. const uint32_t x_bits = basisu::total_bits(x_mask);
  13885. const uint32_t y_bits = basisu::total_bits(y_mask);
  13886. const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
  13887. //const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
  13888. const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
  13889. uint32_t block_index = 0;
  13890. // really 3x3
  13891. int e0[4][4], e1[4][4];
  13892. for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
  13893. {
  13894. const uint32_t* pE_rows[3];
  13895. for (int ey = 0; ey < 3; ey++)
  13896. {
  13897. int by = y + ey - 1;
  13898. const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
  13899. pE_rows[ey] = pE;
  13900. for (int ex = 0; ex < 3; ex++)
  13901. {
  13902. int bx = 0 + ex - 1;
  13903. const uint32_t e = pE[bx & x_mask];
  13904. e0[ex][ey] = get_endpoint_l8(e, 0);
  13905. e1[ex][ey] = get_endpoint_l8(e, 1);
  13906. }
  13907. }
  13908. const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
  13909. for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
  13910. {
  13911. const uastc_block& src_block = pSrc_blocks[block_index];
  13912. color32 block_pixels[4][4];
  13913. unpack_uastc(src_block, &block_pixels[0][0], false);
  13914. const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
  13915. uint32_t swizzled = x_swizzle | y_swizzle;
  13916. if (num_blocks_x != num_blocks_y)
  13917. {
  13918. swizzled &= swizzle_mask;
  13919. if (num_blocks_x > num_blocks_y)
  13920. swizzled |= ((x >> min_bits) << (min_bits * 2));
  13921. else
  13922. swizzled |= ((y >> min_bits) << (min_bits * 2));
  13923. }
  13924. pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
  13925. pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
  13926. {
  13927. const uint32_t ex = 2;
  13928. int bx = x + ex - 1;
  13929. bx &= x_mask;
  13930. #define DO_ROW(ey) \
  13931. { \
  13932. const uint32_t e = pE_rows[ey][bx]; \
  13933. e0[ex][ey] = get_endpoint_l8(e, 0); \
  13934. e1[ex][ey] = get_endpoint_l8(e, 1); \
  13935. }
  13936. DO_ROW(0);
  13937. DO_ROW(1);
  13938. DO_ROW(2);
  13939. #undef DO_ROW
  13940. }
  13941. uint32_t mod = 0;
  13942. #define DO_PIX(lx, ly, w0, w1, w2, w3) \
  13943. { \
  13944. int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
  13945. int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
  13946. int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \
  13947. int dl = cb_l - ca_l; \
  13948. int vl = cl - ca_l; \
  13949. int p = vl * 16; \
  13950. if (ca_l > cb_l) { p = -p; dl = -dl; } \
  13951. uint32_t m = 0; \
  13952. if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
  13953. if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
  13954. if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
  13955. mod |= m; \
  13956. }
  13957. {
  13958. const uint32_t ex = 0, ey = 0;
  13959. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  13960. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  13961. DO_PIX(0, 0, 4, 4, 4, 4);
  13962. DO_PIX(1, 0, 2, 6, 2, 6);
  13963. DO_PIX(0, 1, 2, 2, 6, 6);
  13964. DO_PIX(1, 1, 1, 3, 3, 9);
  13965. }
  13966. {
  13967. const uint32_t ex = 1, ey = 0;
  13968. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  13969. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  13970. DO_PIX(2, 0, 8, 0, 8, 0);
  13971. DO_PIX(3, 0, 6, 2, 6, 2);
  13972. DO_PIX(2, 1, 4, 0, 12, 0);
  13973. DO_PIX(3, 1, 3, 1, 9, 3);
  13974. }
  13975. {
  13976. const uint32_t ex = 0, ey = 1;
  13977. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  13978. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  13979. DO_PIX(0, 2, 8, 8, 0, 0);
  13980. DO_PIX(1, 2, 4, 12, 0, 0);
  13981. DO_PIX(0, 3, 6, 6, 2, 2);
  13982. DO_PIX(1, 3, 3, 9, 1, 3);
  13983. }
  13984. {
  13985. const uint32_t ex = 1, ey = 1;
  13986. const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
  13987. const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
  13988. DO_PIX(2, 2, 16, 0, 0, 0);
  13989. DO_PIX(3, 2, 12, 4, 0, 0);
  13990. DO_PIX(2, 3, 12, 0, 4, 0);
  13991. DO_PIX(3, 3, 9, 3, 3, 1);
  13992. }
  13993. #undef DO_PIX
  13994. pDst_block->m_modulation = mod;
  13995. e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
  13996. e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
  13997. e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
  13998. e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
  13999. e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
  14000. e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
  14001. } // x
  14002. } // y
  14003. }
  14004. bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha)
  14005. {
  14006. BASISU_NOTE_UNUSED(high_quality);
  14007. if ((!num_blocks_x) || (!num_blocks_y))
  14008. return false;
  14009. const uint32_t width = num_blocks_x * 4;
  14010. const uint32_t height = num_blocks_y * 4;
  14011. if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
  14012. return false;
  14013. basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
  14014. for (uint32_t y = 0; y < num_blocks_y; y++)
  14015. {
  14016. for (uint32_t x = 0; x < num_blocks_x; x++)
  14017. {
  14018. color32 block_pixels[16];
  14019. if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
  14020. return false;
  14021. // Get block's RGB bounding box
  14022. color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
  14023. if (from_alpha)
  14024. {
  14025. uint32_t low_a = 255, high_a = 0;
  14026. for (uint32_t i = 0; i < 16; i++)
  14027. {
  14028. low_a = basisu::minimum<uint32_t>(low_a, block_pixels[i].a);
  14029. high_a = basisu::maximum<uint32_t>(high_a, block_pixels[i].a);
  14030. }
  14031. low_color.set(low_a, low_a, low_a, 255);
  14032. high_color.set(high_a, high_a, high_a, 255);
  14033. }
  14034. else
  14035. {
  14036. for (uint32_t i = 0; i < 16; i++)
  14037. {
  14038. low_color = color32::comp_min(low_color, block_pixels[i]);
  14039. high_color = color32::comp_max(high_color, block_pixels[i]);
  14040. }
  14041. }
  14042. // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
  14043. pvrtc4_block temp;
  14044. temp.set_opaque_endpoint_floor(0, low_color);
  14045. temp.set_opaque_endpoint_ceil(1, high_color);
  14046. temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
  14047. }
  14048. }
  14049. fixup_pvrtc1_4_modulation_rgb(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y, from_alpha);
  14050. return true;
  14051. }
  14052. bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality)
  14053. {
  14054. BASISU_NOTE_UNUSED(high_quality);
  14055. if ((!num_blocks_x) || (!num_blocks_y))
  14056. return false;
  14057. const uint32_t width = num_blocks_x * 4;
  14058. const uint32_t height = num_blocks_y * 4;
  14059. if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
  14060. return false;
  14061. basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
  14062. for (uint32_t y = 0; y < num_blocks_y; y++)
  14063. {
  14064. for (uint32_t x = 0; x < num_blocks_x; x++)
  14065. {
  14066. color32 block_pixels[16];
  14067. if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
  14068. return false;
  14069. // Get block's RGBA bounding box
  14070. color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
  14071. for (uint32_t i = 0; i < 16; i++)
  14072. {
  14073. low_color = color32::comp_min(low_color, block_pixels[i]);
  14074. high_color = color32::comp_max(high_color, block_pixels[i]);
  14075. }
  14076. // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
  14077. pvrtc4_block temp;
  14078. temp.set_endpoint_floor(0, low_color);
  14079. temp.set_endpoint_ceil(1, high_color);
  14080. temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
  14081. }
  14082. }
  14083. fixup_pvrtc1_4_modulation_rgba(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y);
  14084. return true;
  14085. }
  14086. void uastc_init()
  14087. {
  14088. for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++)
  14089. {
  14090. if (!astc_is_valid_endpoint_range(range))
  14091. continue;
  14092. const uint32_t levels = astc_get_levels(range);
  14093. uint32_t vals[256];
  14094. for (uint32_t i = 0; i < levels; i++)
  14095. vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i;
  14096. std::sort(vals, vals + levels);
  14097. for (uint32_t i = 0; i < levels; i++)
  14098. {
  14099. const uint32_t order = vals[i] & 0xFF;
  14100. const uint32_t unq = vals[i] >> 8;
  14101. g_astc_unquant[range][order].m_unquant = (uint8_t)unq;
  14102. g_astc_unquant[range][order].m_index = (uint8_t)i;
  14103. } // i
  14104. }
  14105. // TODO: Precompute?
  14106. // BC7 777.1
  14107. for (int c = 0; c < 256; c++)
  14108. {
  14109. for (uint32_t lp = 0; lp < 2; lp++)
  14110. {
  14111. endpoint_err best;
  14112. best.m_error = (uint16_t)UINT16_MAX;
  14113. for (uint32_t l = 0; l < 128; l++)
  14114. {
  14115. const uint32_t low = (l << 1) | lp;
  14116. for (uint32_t h = 0; h < 128; h++)
  14117. {
  14118. const uint32_t high = (h << 1) | lp;
  14119. const int k = (low * (64 - g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX]) + high * g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX] + 32) >> 6;
  14120. const int err = (k - c) * (k - c);
  14121. if (err < best.m_error)
  14122. {
  14123. best.m_error = (uint16_t)err;
  14124. best.m_lo = (uint8_t)l;
  14125. best.m_hi = (uint8_t)h;
  14126. }
  14127. } // h
  14128. } // l
  14129. g_bc7_mode_6_optimal_endpoints[c][lp] = best;
  14130. } // lp
  14131. } // c
  14132. // BC7 777
  14133. for (int c = 0; c < 256; c++)
  14134. {
  14135. endpoint_err best;
  14136. best.m_error = (uint16_t)UINT16_MAX;
  14137. for (uint32_t l = 0; l < 128; l++)
  14138. {
  14139. const uint32_t low = (l << 1) | (l >> 6);
  14140. for (uint32_t h = 0; h < 128; h++)
  14141. {
  14142. const uint32_t high = (h << 1) | (h >> 6);
  14143. const int k = (low * (64 - g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX] + 32) >> 6;
  14144. const int err = (k - c) * (k - c);
  14145. if (err < best.m_error)
  14146. {
  14147. best.m_error = (uint16_t)err;
  14148. best.m_lo = (uint8_t)l;
  14149. best.m_hi = (uint8_t)h;
  14150. }
  14151. } // h
  14152. } // l
  14153. g_bc7_mode_5_optimal_endpoints[c] = best;
  14154. } // c
  14155. }
  14156. #endif // #if BASISD_SUPPORT_UASTC
  14157. // ------------------------------------------------------------------------------------------------------
  14158. // KTX2
  14159. // ------------------------------------------------------------------------------------------------------
  14160. #if BASISD_SUPPORT_KTX2
  14161. const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
  14162. ktx2_transcoder::ktx2_transcoder() :
  14163. m_etc1s_transcoder()
  14164. {
  14165. clear();
  14166. }
  14167. void ktx2_transcoder::clear()
  14168. {
  14169. m_pData = nullptr;
  14170. m_data_size = 0;
  14171. memset(&m_header, 0, sizeof(m_header));
  14172. m_levels.clear();
  14173. m_dfd.clear();
  14174. m_key_values.clear();
  14175. memset(&m_etc1s_header, 0, sizeof(m_etc1s_header));
  14176. m_etc1s_image_descs.clear();
  14177. m_format = basist::basis_tex_format::cETC1S;
  14178. m_dfd_color_model = 0;
  14179. m_dfd_color_prims = KTX2_DF_PRIMARIES_UNSPECIFIED;
  14180. m_dfd_transfer_func = 0;
  14181. m_dfd_flags = 0;
  14182. m_dfd_samples = 0;
  14183. m_dfd_chan0 = KTX2_DF_CHANNEL_UASTC_RGB;
  14184. m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB;
  14185. m_etc1s_transcoder.clear();
  14186. m_def_transcoder_state.clear();
  14187. m_has_alpha = false;
  14188. m_is_video = false;
  14189. }
  14190. bool ktx2_transcoder::init(const void* pData, uint32_t data_size)
  14191. {
  14192. clear();
  14193. if (!pData)
  14194. {
  14195. BASISU_DEVEL_ERROR("ktx2_transcoder::init: pData is nullptr\n");
  14196. assert(0);
  14197. return false;
  14198. }
  14199. if (data_size <= sizeof(ktx2_header))
  14200. {
  14201. BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is impossibly too small to be a valid KTX2 file\n");
  14202. return false;
  14203. }
  14204. if (memcmp(pData, g_ktx2_file_identifier, sizeof(g_ktx2_file_identifier)) != 0)
  14205. {
  14206. BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file identifier is not present\n");
  14207. return false;
  14208. }
  14209. m_pData = static_cast<const uint8_t *>(pData);
  14210. m_data_size = data_size;
  14211. memcpy(&m_header, pData, sizeof(m_header));
  14212. // We only support UASTC LDR, UASTC HDR and ETC1S.
  14213. // Note the DFD's contents are what we are guided by for decoding the KTX2 file, not this format field (currently).
  14214. if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) &&
  14215. (m_header.m_vk_format != basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK))
  14216. {
  14217. BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n");
  14218. return false;
  14219. }
  14220. // 3.3: "When format is VK_FORMAT_UNDEFINED, typeSize must equal 1."
  14221. if (m_header.m_type_size != 1)
  14222. {
  14223. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid type_size\n");
  14224. return false;
  14225. }
  14226. // We only currently support 2D textures (plain, cubemapped, or texture array), which is by far the most common use case.
  14227. // The BasisU library does not support 1D or 3D textures at all.
  14228. if ((m_header.m_pixel_width < 1) || (m_header.m_pixel_height < 1) || (m_header.m_pixel_depth > 0))
  14229. {
  14230. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Only 2D or cubemap textures are supported\n");
  14231. return false;
  14232. }
  14233. // Face count must be 1 or 6
  14234. if ((m_header.m_face_count != 1) && (m_header.m_face_count != 6))
  14235. {
  14236. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid face count, file is corrupted or invalid\n");
  14237. return false;
  14238. }
  14239. if (m_header.m_face_count > 1)
  14240. {
  14241. // 3.4: Make sure cubemaps are square.
  14242. if (m_header.m_pixel_width != m_header.m_pixel_height)
  14243. {
  14244. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Cubemap is not square\n");
  14245. return false;
  14246. }
  14247. }
  14248. // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats"
  14249. if (m_header.m_level_count < 1)
  14250. {
  14251. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level count\n");
  14252. return false;
  14253. }
  14254. // Sanity check the level count.
  14255. if (m_header.m_level_count > KTX2_MAX_SUPPORTED_LEVEL_COUNT)
  14256. {
  14257. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Too many levels or file is corrupted or invalid\n");
  14258. return false;
  14259. }
  14260. if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD)
  14261. {
  14262. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n");
  14263. return false;
  14264. }
  14265. if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
  14266. {
  14267. if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header))
  14268. {
  14269. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n");
  14270. return false;
  14271. }
  14272. if (m_header.m_sgd_byte_offset < sizeof(ktx2_header))
  14273. {
  14274. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n");
  14275. return false;
  14276. }
  14277. if (m_header.m_sgd_byte_offset + m_header.m_sgd_byte_length > m_data_size)
  14278. {
  14279. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset and/or length is too high\n");
  14280. return false;
  14281. }
  14282. }
  14283. if (!m_levels.try_resize(m_header.m_level_count))
  14284. {
  14285. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
  14286. return false;
  14287. }
  14288. const uint32_t level_index_size_in_bytes = basisu::maximum(1U, (uint32_t)m_header.m_level_count) * sizeof(ktx2_level_index);
  14289. if ((sizeof(ktx2_header) + level_index_size_in_bytes) > m_data_size)
  14290. {
  14291. BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is too small (can't read level index array)\n");
  14292. return false;
  14293. }
  14294. memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes);
  14295. // Sanity check the level offsets and byte sizes
  14296. for (uint32_t i = 0; i < m_levels.size(); i++)
  14297. {
  14298. if (m_levels[i].m_byte_offset < sizeof(ktx2_header))
  14299. {
  14300. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too low)\n");
  14301. return false;
  14302. }
  14303. if (!m_levels[i].m_byte_length)
  14304. {
  14305. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level byte length\n");
  14306. }
  14307. if ((m_levels[i].m_byte_offset + m_levels[i].m_byte_length) > m_data_size)
  14308. {
  14309. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n");
  14310. return false;
  14311. }
  14312. const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL;
  14313. if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE)
  14314. {
  14315. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n");
  14316. return false;
  14317. }
  14318. if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
  14319. {
  14320. if (m_levels[i].m_uncompressed_byte_length)
  14321. {
  14322. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n");
  14323. return false;
  14324. }
  14325. }
  14326. else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD)
  14327. {
  14328. if (!m_levels[i].m_uncompressed_byte_length)
  14329. {
  14330. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n");
  14331. return false;
  14332. }
  14333. }
  14334. }
  14335. const uint32_t DFD_MINIMUM_SIZE = 44, DFD_MAXIMUM_SIZE = 60;
  14336. if ((m_header.m_dfd_byte_length != DFD_MINIMUM_SIZE) && (m_header.m_dfd_byte_length != DFD_MAXIMUM_SIZE))
  14337. {
  14338. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD size\n");
  14339. return false;
  14340. }
  14341. if (((m_header.m_dfd_byte_offset + m_header.m_dfd_byte_length) > m_data_size) || (m_header.m_dfd_byte_offset < sizeof(ktx2_header)))
  14342. {
  14343. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n");
  14344. return false;
  14345. }
  14346. const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset;
  14347. if (!m_dfd.try_resize(m_header.m_dfd_byte_length))
  14348. {
  14349. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
  14350. return false;
  14351. }
  14352. memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length);
  14353. // This is all hard coded for only ETC1S and UASTC.
  14354. uint32_t dfd_total_size = basisu::read_le_dword(pDFD);
  14355. // 3.10.3: Sanity check
  14356. if (dfd_total_size != m_header.m_dfd_byte_length)
  14357. {
  14358. BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n");
  14359. return false;
  14360. }
  14361. // 3.10.3: More sanity checking
  14362. if (m_header.m_kvd_byte_length)
  14363. {
  14364. if (dfd_total_size != m_header.m_kvd_byte_offset - m_header.m_dfd_byte_offset)
  14365. {
  14366. BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (2)\n");
  14367. return false;
  14368. }
  14369. }
  14370. const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t));
  14371. const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t));
  14372. m_dfd_color_model = dfd_bits & 255;
  14373. m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255);
  14374. m_dfd_transfer_func = (dfd_bits >> 16) & 255;
  14375. m_dfd_flags = (dfd_bits >> 24) & 255;
  14376. // See 3.10.1.Restrictions
  14377. if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB))
  14378. {
  14379. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD transfer function\n");
  14380. return false;
  14381. }
  14382. if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S)
  14383. {
  14384. m_format = basist::basis_tex_format::cETC1S;
  14385. // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD's sample count."
  14386. // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that.
  14387. m_has_alpha = (m_header.m_dfd_byte_length == 60);
  14388. m_dfd_samples = m_has_alpha ? 2 : 1;
  14389. m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
  14390. if (m_has_alpha)
  14391. {
  14392. const uint32_t sample_channel1 = basisu::read_le_dword(pDFD + 11 * sizeof(uint32_t));
  14393. m_dfd_chan1 = (ktx2_df_channel_id)((sample_channel1 >> 24) & 15);
  14394. }
  14395. }
  14396. else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC)
  14397. {
  14398. m_format = basist::basis_tex_format::cUASTC4x4;
  14399. m_dfd_samples = 1;
  14400. m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
  14401. // We're assuming "DATA" means RGBA so it has alpha.
  14402. m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
  14403. }
  14404. else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR)
  14405. {
  14406. m_format = basist::basis_tex_format::cUASTC_HDR_4x4;
  14407. m_dfd_samples = 1;
  14408. m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
  14409. // We're assuming "DATA" means RGBA so it has alpha.
  14410. m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
  14411. }
  14412. else
  14413. {
  14414. // Unsupported DFD color model.
  14415. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n");
  14416. return false;
  14417. }
  14418. if (!read_key_values())
  14419. {
  14420. BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n");
  14421. return false;
  14422. }
  14423. // Check for a KTXanimData key
  14424. for (uint32_t i = 0; i < m_key_values.size(); i++)
  14425. {
  14426. if (strcmp(reinterpret_cast<const char*>(m_key_values[i].m_key.data()), "KTXanimData") == 0)
  14427. {
  14428. m_is_video = true;
  14429. break;
  14430. }
  14431. }
  14432. return true;
  14433. }
  14434. uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
  14435. {
  14436. const uint32_t etc1s_image_index =
  14437. (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
  14438. layer_index * m_header.m_face_count +
  14439. face_index;
  14440. if (etc1s_image_index >= get_etc1s_image_descs().size())
  14441. {
  14442. assert(0);
  14443. return 0;
  14444. }
  14445. return get_etc1s_image_descs()[etc1s_image_index].m_image_flags;
  14446. }
  14447. const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const
  14448. {
  14449. for (uint32_t i = 0; i < m_key_values.size(); i++)
  14450. if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0)
  14451. return &m_key_values[i].m_value;
  14452. return nullptr;
  14453. }
  14454. bool ktx2_transcoder::start_transcoding()
  14455. {
  14456. if (!m_pData)
  14457. {
  14458. BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Must call init() first\n");
  14459. return false;
  14460. }
  14461. if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
  14462. {
  14463. // Check if we've already decompressed the ETC1S global data. If so don't unpack it again.
  14464. if (!m_etc1s_transcoder.get_endpoints().empty())
  14465. return true;
  14466. if (!decompress_etc1s_global_data())
  14467. {
  14468. BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n");
  14469. return false;
  14470. }
  14471. if (!m_is_video)
  14472. {
  14473. // See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key.
  14474. // Video cannot be a cubemap, and it must be a texture array.
  14475. if ((m_header.m_face_count == 1) && (m_header.m_layer_count > 1))
  14476. {
  14477. for (uint32_t i = 0; i < m_etc1s_image_descs.size(); i++)
  14478. {
  14479. if (m_etc1s_image_descs[i].m_image_flags & KTX2_IMAGE_IS_P_FRAME)
  14480. {
  14481. m_is_video = true;
  14482. break;
  14483. }
  14484. }
  14485. }
  14486. }
  14487. }
  14488. else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
  14489. {
  14490. #if !BASISD_SUPPORT_KTX2_ZSTD
  14491. BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: File uses zstd supercompression, but zstd support was not enabled at compilation time (BASISD_SUPPORT_KTX2_ZSTD == 0)\n");
  14492. return false;
  14493. #endif
  14494. }
  14495. return true;
  14496. }
  14497. bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
  14498. {
  14499. if (level_index >= m_levels.size())
  14500. {
  14501. BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: level_index >= m_levels.size()\n");
  14502. return false;
  14503. }
  14504. if (m_header.m_face_count > 1)
  14505. {
  14506. if (face_index >= 6)
  14507. {
  14508. BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index >= 6\n");
  14509. return false;
  14510. }
  14511. }
  14512. else if (face_index != 0)
  14513. {
  14514. BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index != 0\n");
  14515. return false;
  14516. }
  14517. if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
  14518. {
  14519. BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
  14520. return false;
  14521. }
  14522. const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
  14523. const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
  14524. const uint32_t num_blocks_x = (level_width + 3) >> 2;
  14525. const uint32_t num_blocks_y = (level_height + 3) >> 2;
  14526. level_info.m_face_index = face_index;
  14527. level_info.m_layer_index = layer_index;
  14528. level_info.m_level_index = level_index;
  14529. level_info.m_orig_width = level_width;
  14530. level_info.m_orig_height = level_height;
  14531. level_info.m_width = num_blocks_x * 4;
  14532. level_info.m_height = num_blocks_y * 4;
  14533. level_info.m_num_blocks_x = num_blocks_x;
  14534. level_info.m_num_blocks_y = num_blocks_y;
  14535. level_info.m_total_blocks = num_blocks_x * num_blocks_y;
  14536. level_info.m_alpha_flag = m_has_alpha;
  14537. level_info.m_iframe_flag = false;
  14538. if (m_etc1s_image_descs.size())
  14539. {
  14540. const uint32_t etc1s_image_index =
  14541. (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
  14542. layer_index * m_header.m_face_count +
  14543. face_index;
  14544. level_info.m_iframe_flag = (m_etc1s_image_descs[etc1s_image_index].m_image_flags & KTX2_IMAGE_IS_P_FRAME) == 0;
  14545. }
  14546. return true;
  14547. }
  14548. bool ktx2_transcoder::transcode_image_level(
  14549. uint32_t level_index, uint32_t layer_index, uint32_t face_index,
  14550. void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
  14551. basist::transcoder_texture_format fmt,
  14552. uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1,
  14553. ktx2_transcoder_state* pState)
  14554. {
  14555. if (!m_pData)
  14556. {
  14557. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n");
  14558. return false;
  14559. }
  14560. if (!pState)
  14561. pState = &m_def_transcoder_state;
  14562. if (level_index >= m_levels.size())
  14563. {
  14564. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n");
  14565. return false;
  14566. }
  14567. if (m_header.m_face_count > 1)
  14568. {
  14569. if (face_index >= 6)
  14570. {
  14571. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n");
  14572. return false;
  14573. }
  14574. }
  14575. else if (face_index != 0)
  14576. {
  14577. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n");
  14578. return false;
  14579. }
  14580. if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
  14581. {
  14582. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
  14583. return false;
  14584. }
  14585. const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset;
  14586. uint64_t comp_level_data_size = m_levels[level_index].m_byte_length;
  14587. const uint8_t* pUncomp_level_data = pComp_level_data;
  14588. uint64_t uncomp_level_data_size = comp_level_data_size;
  14589. if (uncomp_level_data_size > UINT32_MAX)
  14590. {
  14591. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n");
  14592. return false;
  14593. }
  14594. if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
  14595. {
  14596. // Check if we've already decompressed this level's supercompressed data.
  14597. if ((int)level_index != pState->m_uncomp_data_level_index)
  14598. {
  14599. // Uncompress the entire level's supercompressed data.
  14600. if (!decompress_level_data(level_index, pState->m_level_uncomp_data))
  14601. {
  14602. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n");
  14603. return false;
  14604. }
  14605. pState->m_uncomp_data_level_index = level_index;
  14606. }
  14607. pUncomp_level_data = pState->m_level_uncomp_data.data();
  14608. uncomp_level_data_size = pState->m_level_uncomp_data.size();
  14609. }
  14610. const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
  14611. const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
  14612. const uint32_t num_blocks_x = (level_width + 3) >> 2;
  14613. const uint32_t num_blocks_y = (level_height + 3) >> 2;
  14614. if (m_format == basist::basis_tex_format::cETC1S)
  14615. {
  14616. // Ensure start_transcoding() was called.
  14617. if (m_etc1s_transcoder.get_endpoints().empty())
  14618. {
  14619. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n");
  14620. return false;
  14621. }
  14622. const uint32_t etc1s_image_index =
  14623. (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
  14624. layer_index * m_header.m_face_count +
  14625. face_index;
  14626. // Sanity check
  14627. if (etc1s_image_index >= m_etc1s_image_descs.size())
  14628. {
  14629. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n");
  14630. assert(0);
  14631. return false;
  14632. }
  14633. if (static_cast<uint32_t>(m_data_size) != m_data_size)
  14634. {
  14635. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: File is too large\n");
  14636. return false;
  14637. }
  14638. const ktx2_etc1s_image_desc& image_desc = m_etc1s_image_descs[etc1s_image_index];
  14639. if (!m_etc1s_transcoder.transcode_image(fmt,
  14640. pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, static_cast<uint32_t>(m_data_size),
  14641. num_blocks_x, num_blocks_y, level_width, level_height,
  14642. level_index,
  14643. m_levels[level_index].m_byte_offset + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length,
  14644. image_desc.m_alpha_slice_byte_length ? (m_levels[level_index].m_byte_offset + image_desc.m_alpha_slice_byte_offset) : 0, image_desc.m_alpha_slice_byte_length,
  14645. decode_flags, m_has_alpha,
  14646. m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels))
  14647. {
  14648. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
  14649. return false;
  14650. }
  14651. }
  14652. else if ((m_format == basist::basis_tex_format::cUASTC4x4) ||
  14653. (m_format == basist::basis_tex_format::cUASTC_HDR_4x4))
  14654. {
  14655. // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
  14656. assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
  14657. const uint32_t total_2D_image_size = num_blocks_x * num_blocks_y * KTX2_UASTC_BLOCK_SIZE;
  14658. const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size;
  14659. // Sanity checks
  14660. if (uncomp_ofs >= uncomp_level_data_size)
  14661. {
  14662. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n");
  14663. return false;
  14664. }
  14665. if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size)
  14666. {
  14667. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n");
  14668. return false;
  14669. }
  14670. if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)
  14671. {
  14672. if (!m_uastc_hdr_transcoder.transcode_image(fmt,
  14673. pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
  14674. (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
  14675. 0, (uint32_t)total_2D_image_size,
  14676. decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
  14677. {
  14678. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
  14679. return false;
  14680. }
  14681. }
  14682. else
  14683. {
  14684. if (!m_uastc_transcoder.transcode_image(fmt,
  14685. pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
  14686. (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
  14687. 0, (uint32_t)total_2D_image_size,
  14688. decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
  14689. {
  14690. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
  14691. return false;
  14692. }
  14693. }
  14694. }
  14695. else
  14696. {
  14697. // Shouldn't get here.
  14698. BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n");
  14699. assert(0);
  14700. return false;
  14701. }
  14702. return true;
  14703. }
  14704. bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data)
  14705. {
  14706. const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData;
  14707. const uint64_t comp_size = m_levels[level_index].m_byte_length;
  14708. const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length;
  14709. if (((size_t)comp_size) != comp_size)
  14710. {
  14711. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Compressed data too large\n");
  14712. return false;
  14713. }
  14714. if (((size_t)uncomp_size) != uncomp_size)
  14715. {
  14716. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Uncompressed data too large\n");
  14717. return false;
  14718. }
  14719. if (!uncomp_data.try_resize((size_t)uncomp_size))
  14720. {
  14721. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n");
  14722. return false;
  14723. }
  14724. if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
  14725. {
  14726. #if BASISD_SUPPORT_KTX2_ZSTD
  14727. size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size);
  14728. if (ZSTD_isError(actualUncompSize))
  14729. {
  14730. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression failed, file is invalid or corrupted\n");
  14731. return false;
  14732. }
  14733. if (actualUncompSize != uncomp_size)
  14734. {
  14735. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression returned too few bytes, file is invalid or corrupted\n");
  14736. return false;
  14737. }
  14738. #else
  14739. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n");
  14740. return false;
  14741. #endif
  14742. }
  14743. return true;
  14744. }
  14745. bool ktx2_transcoder::decompress_etc1s_global_data()
  14746. {
  14747. // Note: we don't actually support 3D textures in here yet
  14748. //uint32_t layer_pixel_depth = basisu::maximum<uint32_t>(m_header.m_pixel_depth, 1);
  14749. //for (uint32_t i = 1; i < m_header.m_level_count; i++)
  14750. // layer_pixel_depth += basisu::maximum<uint32_t>(m_header.m_pixel_depth >> i, 1);
  14751. const uint32_t image_count = basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count;
  14752. assert(image_count);
  14753. const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset;
  14754. memcpy(&m_etc1s_header, pSrc, sizeof(ktx2_etc1s_global_data_header));
  14755. pSrc += sizeof(ktx2_etc1s_global_data_header);
  14756. if ((!m_etc1s_header.m_endpoints_byte_length) || (!m_etc1s_header.m_selectors_byte_length) || (!m_etc1s_header.m_tables_byte_length))
  14757. {
  14758. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Invalid ETC1S global data\n");
  14759. return false;
  14760. }
  14761. if ((!m_etc1s_header.m_endpoint_count) || (!m_etc1s_header.m_selector_count))
  14762. {
  14763. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: endpoint and/or selector count is 0, file is invalid or corrupted\n");
  14764. return false;
  14765. }
  14766. // Sanity check the ETC1S header.
  14767. if ((sizeof(ktx2_etc1s_global_data_header) +
  14768. sizeof(ktx2_etc1s_image_desc) * image_count +
  14769. m_etc1s_header.m_endpoints_byte_length +
  14770. m_etc1s_header.m_selectors_byte_length +
  14771. m_etc1s_header.m_tables_byte_length +
  14772. m_etc1s_header.m_extended_byte_length) > m_header.m_sgd_byte_length)
  14773. {
  14774. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n");
  14775. return false;
  14776. }
  14777. if (!m_etc1s_image_descs.try_resize(image_count))
  14778. {
  14779. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n");
  14780. return false;
  14781. }
  14782. memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count);
  14783. pSrc += sizeof(ktx2_etc1s_image_desc) * image_count;
  14784. // Sanity check the ETC1S image descs
  14785. for (uint32_t i = 0; i < image_count; i++)
  14786. {
  14787. // m_etc1s_transcoder.transcode_image() will validate the slice offsets/lengths before transcoding.
  14788. if (!m_etc1s_image_descs[i].m_rgb_slice_byte_length)
  14789. {
  14790. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (1)\n");
  14791. return false;
  14792. }
  14793. if (m_has_alpha)
  14794. {
  14795. if (!m_etc1s_image_descs[i].m_alpha_slice_byte_length)
  14796. {
  14797. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (2)\n");
  14798. return false;
  14799. }
  14800. }
  14801. }
  14802. const uint8_t* pEndpoint_data = pSrc;
  14803. const uint8_t* pSelector_data = pSrc + m_etc1s_header.m_endpoints_byte_length;
  14804. const uint8_t* pTables_data = pSrc + m_etc1s_header.m_endpoints_byte_length + m_etc1s_header.m_selectors_byte_length;
  14805. if (!m_etc1s_transcoder.decode_tables(pTables_data, m_etc1s_header.m_tables_byte_length))
  14806. {
  14807. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n");
  14808. return false;
  14809. }
  14810. if (!m_etc1s_transcoder.decode_palettes(
  14811. m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length,
  14812. m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length))
  14813. {
  14814. BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n");
  14815. return false;
  14816. }
  14817. return true;
  14818. }
  14819. bool ktx2_transcoder::read_key_values()
  14820. {
  14821. if (!m_header.m_kvd_byte_length)
  14822. {
  14823. if (m_header.m_kvd_byte_offset)
  14824. {
  14825. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset (it should be zero when the length is zero)\n");
  14826. return false;
  14827. }
  14828. return true;
  14829. }
  14830. if (m_header.m_kvd_byte_offset < sizeof(ktx2_header))
  14831. {
  14832. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n");
  14833. return false;
  14834. }
  14835. if ((m_header.m_kvd_byte_offset + m_header.m_kvd_byte_length) > m_data_size)
  14836. {
  14837. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n");
  14838. return false;
  14839. }
  14840. const uint8_t* pSrc = m_pData + m_header.m_kvd_byte_offset;
  14841. uint32_t src_left = m_header.m_kvd_byte_length;
  14842. if (!m_key_values.try_reserve(8))
  14843. {
  14844. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
  14845. return false;
  14846. }
  14847. while (src_left > sizeof(uint32_t))
  14848. {
  14849. uint32_t l = basisu::read_le_dword(pSrc);
  14850. pSrc += sizeof(uint32_t);
  14851. src_left -= sizeof(uint32_t);
  14852. if (l < 2)
  14853. {
  14854. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (0)\n");
  14855. return false;
  14856. }
  14857. if (src_left < l)
  14858. {
  14859. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (1)\n");
  14860. return false;
  14861. }
  14862. if (!m_key_values.try_resize(m_key_values.size() + 1))
  14863. {
  14864. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
  14865. return false;
  14866. }
  14867. basisu::uint8_vec& key_data = m_key_values.back().m_key;
  14868. basisu::uint8_vec& value_data = m_key_values.back().m_value;
  14869. do
  14870. {
  14871. if (!l)
  14872. {
  14873. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (2)\n");
  14874. return false;
  14875. }
  14876. if (!key_data.try_push_back(*pSrc++))
  14877. {
  14878. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
  14879. return false;
  14880. }
  14881. src_left--;
  14882. l--;
  14883. } while (key_data.back());
  14884. if (!value_data.try_resize(l))
  14885. {
  14886. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
  14887. return false;
  14888. }
  14889. if (l)
  14890. {
  14891. memcpy(value_data.data(), pSrc, l);
  14892. pSrc += l;
  14893. src_left -= l;
  14894. }
  14895. uint32_t ofs = (uint32_t)(pSrc - m_pData) & 3;
  14896. uint32_t alignment_bytes = (4 - ofs) & 3;
  14897. if (src_left < alignment_bytes)
  14898. {
  14899. BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (3)\n");
  14900. return false;
  14901. }
  14902. pSrc += alignment_bytes;
  14903. src_left -= alignment_bytes;
  14904. }
  14905. return true;
  14906. }
  14907. #endif // BASISD_SUPPORT_KTX2
  14908. bool basisu_transcoder_supports_ktx2()
  14909. {
  14910. #if BASISD_SUPPORT_KTX2
  14911. return true;
  14912. #else
  14913. return false;
  14914. #endif
  14915. }
  14916. bool basisu_transcoder_supports_ktx2_zstd()
  14917. {
  14918. #if BASISD_SUPPORT_KTX2_ZSTD
  14919. return true;
  14920. #else
  14921. return false;
  14922. #endif
  14923. }
  14924. //-------------------------------
  14925. #ifdef BASISD_SUPPORT_UASTC_HDR
  14926. // This float->half conversion matches how "F32TO16" works on Intel GPU's.
  14927. basist::half_float float_to_half(float val)
  14928. {
  14929. union { float f; int32_t i; uint32_t u; } fi = { val };
  14930. const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
  14931. int s = flt_s, e = 0, m = 0;
  14932. // inf/NaN
  14933. if (flt_e == 0xff)
  14934. {
  14935. e = 31;
  14936. if (flt_m != 0) // NaN
  14937. m = 1;
  14938. }
  14939. // not zero or denormal
  14940. else if (flt_e != 0)
  14941. {
  14942. int new_exp = flt_e - 127;
  14943. if (new_exp > 15)
  14944. e = 31;
  14945. else if (new_exp < -14)
  14946. m = lrintf((1 << 24) * fabsf(fi.f));
  14947. else
  14948. {
  14949. e = new_exp + 15;
  14950. m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
  14951. }
  14952. }
  14953. assert((0 <= m) && (m <= 1024));
  14954. if (m == 1024)
  14955. {
  14956. e++;
  14957. m = 0;
  14958. }
  14959. assert((s >= 0) && (s <= 1));
  14960. assert((e >= 0) && (e <= 31));
  14961. assert((m >= 0) && (m <= 1023));
  14962. basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m);
  14963. return result;
  14964. }
  14965. //------------------------------------------------------------------------------------------------
  14966. // HDR support
  14967. //
  14968. // Originally from bc6h_enc.cpp
  14969. // BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed
  14970. const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b
  14971. {
  14972. // 2 subsets
  14973. { 10, 5, 5, 5, }, // 0, mode 1 in MS/D3D docs
  14974. { 7, 6, 6, 6, }, // 1
  14975. { 11, 5, 4, 4, }, // 2
  14976. { 11, 4, 5, 4, }, // 3
  14977. { 11, 4, 4, 5, }, // 4
  14978. { 9, 5, 5, 5, }, // 5
  14979. { 8, 6, 5, 5, }, // 6
  14980. { 8, 5, 6, 5, }, // 7
  14981. { 8, 5, 5, 6, }, // 8
  14982. { 6, 6, 6, 6, }, // 9, endpoints not delta encoded, mode 10 in MS/D3D docs
  14983. // 1 subset
  14984. { 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs
  14985. { 11, 9, 9, 9, }, // 11
  14986. { 12, 8, 8, 8, }, // 12
  14987. { 16, 4, 4, 4, } // 13, also useful for solid blocks
  14988. };
  14989. const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 };
  14990. const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] =
  14991. {
  14992. // comp_index, subset*2+lh_index, last_bit, first_bit
  14993. //------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta
  14994. { { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 },
  14995. { 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 },
  14996. { 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  14997. //------------------------ mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta
  14998. { { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },
  14999. { 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },
  15000. { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },
  15001. { 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15002. //------------------------ mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta
  15003. { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 },
  15004. { 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },
  15005. { 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15006. //------------------------ mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta
  15007. { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },
  15008. { 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 },
  15009. { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15010. //------------------------ mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta
  15011. { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },
  15012. { 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 },
  15013. { 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15014. //------------------------ mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta
  15015. { { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },
  15016. { 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },
  15017. { 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15018. //------------------------ mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta
  15019. { { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 },
  15020. { 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
  15021. { 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15022. //------------------------ mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta
  15023. { { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 },
  15024. { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
  15025. { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15026. //------------------------ mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta
  15027. { { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 },
  15028. { 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },
  15029. { 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15030. //------------------------ mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta
  15031. { { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 },
  15032. { 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },
  15033. { 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
  15034. //------------------------ mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta
  15035. { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} },
  15036. //------------------------ mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta
  15037. { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} },
  15038. //------------------------ mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta
  15039. { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} },
  15040. //------------------------ mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta
  15041. { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} }
  15042. };
  15043. // The same as the first 32 2-subset patterns in BC7.
  15044. // Bit 7 is a flag indicating that the weight uses 1 less bit than usual.
  15045. const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x]
  15046. {
  15047. { {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} },
  15048. { {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
  15049. { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} },
  15050. { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
  15051. { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
  15052. { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} },
  15053. { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
  15054. { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} },
  15055. { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} },
  15056. { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} },
  15057. { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} },
  15058. { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 0x81} },
  15059. { {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} },
  15060. { {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} },
  15061. { {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} },
  15062. { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} }
  15063. };
  15064. const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
  15065. const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
  15066. struct bc6h_logical_block
  15067. {
  15068. uint32_t m_mode;
  15069. uint32_t m_partition_pattern; // must be 0 if 1 subset
  15070. uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed
  15071. uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0
  15072. void clear()
  15073. {
  15074. basisu::clear_obj(*this);
  15075. }
  15076. };
  15077. static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
  15078. {
  15079. assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
  15080. assert(val < (1ULL << num_bits));
  15081. if (bit_pos < 64)
  15082. {
  15083. l |= (val << bit_pos);
  15084. if ((bit_pos + num_bits) > 64)
  15085. h |= (val >> (64 - bit_pos));
  15086. }
  15087. else
  15088. {
  15089. h |= (val << (bit_pos - 64));
  15090. }
  15091. bit_pos += num_bits;
  15092. assert(bit_pos <= 128);
  15093. }
  15094. static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
  15095. {
  15096. assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
  15097. assert(val < (1ULL << num_bits));
  15098. for (uint32_t i = 0; i < num_bits; i++)
  15099. write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h);
  15100. }
  15101. static void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk)
  15102. {
  15103. const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 };
  15104. const uint32_t mode = log_blk.m_mode;
  15105. assert(mode < NUM_BC6H_MODES);
  15106. uint64_t l = s_mode_bits[mode], h = 0;
  15107. uint32_t bit_pos = (mode >= 2) ? 5 : 2;
  15108. const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2;
  15109. assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) ||
  15110. ((num_subsets == 1) && (!log_blk.m_partition_pattern)));
  15111. // Sanity checks
  15112. for (uint32_t c = 0; c < 3; c++)
  15113. {
  15114. assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0])); // 1st subset l, base bits
  15115. assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10
  15116. assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l
  15117. assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h
  15118. }
  15119. const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0];
  15120. while (pLayout->m_comp != -1)
  15121. {
  15122. uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index];
  15123. if (pLayout->m_first_bit == -1)
  15124. {
  15125. write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h);
  15126. }
  15127. else
  15128. {
  15129. const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1;
  15130. v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit);
  15131. v &= ((1 << total_bits) - 1);
  15132. if (pLayout->m_first_bit > pLayout->m_last_bit)
  15133. write_rev_bits(v, total_bits, bit_pos, l, h);
  15134. else
  15135. write_bits(v, total_bits, bit_pos, l, h);
  15136. }
  15137. pLayout++;
  15138. }
  15139. const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3;
  15140. const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0];
  15141. for (uint32_t i = 0; i < 16; i++)
  15142. {
  15143. const uint32_t sel = log_blk.m_weights[i];
  15144. uint32_t num_bits = num_mode_sel_bits;
  15145. if (num_subsets == 2)
  15146. {
  15147. const uint32_t subset_index = pPat[i];
  15148. num_bits -= (subset_index >> 7);
  15149. }
  15150. else if (!i)
  15151. {
  15152. num_bits--;
  15153. }
  15154. assert(sel < (1u << num_bits));
  15155. write_bits(sel, num_bits, bit_pos, l, h);
  15156. }
  15157. assert(bit_pos == 128);
  15158. basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l);
  15159. basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u));
  15160. basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h);
  15161. basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u));
  15162. }
  15163. #if 0
  15164. static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp)
  15165. {
  15166. int unq;
  15167. if (bits_per_comp >= 15)
  15168. unq = comp;
  15169. else if (comp == 0)
  15170. unq = 0;
  15171. else if (comp == ((1u << bits_per_comp) - 1u))
  15172. unq = 0xFFFFu;
  15173. else
  15174. unq = ((comp << 16u) + 0x8000u) >> bits_per_comp;
  15175. return unq;
  15176. }
  15177. #endif
  15178. // Suboptimal, but very close.
  15179. static inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
  15180. {
  15181. assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
  15182. return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
  15183. }
  15184. // 6,7,8,9,10,11,12
  15185. const uint32_t BC6H_BLOG_TAB_MIN = 6;
  15186. const uint32_t BC6H_BLOG_TAB_MAX = 12;
  15187. //const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1;
  15188. // Handles 16, or 6-12 bits. Others assert.
  15189. static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits)
  15190. {
  15191. BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN);
  15192. BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX);
  15193. assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
  15194. if (num_bits == 16)
  15195. {
  15196. return bc6h_half_to_blog(h, 16);
  15197. }
  15198. else
  15199. {
  15200. assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX));
  15201. // Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints).
  15202. return bc6h_half_to_blog(h, num_bits);
  15203. }
  15204. }
  15205. bool g_bc6h_enc_initialized;
  15206. void bc6h_enc_init()
  15207. {
  15208. if (g_bc6h_enc_initialized)
  15209. return;
  15210. g_bc6h_enc_initialized = true;
  15211. }
  15212. // mode 10, 4-bit weights
  15213. void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
  15214. {
  15215. assert(g_bc6h_enc_initialized);
  15216. for (uint32_t i = 0; i < 16; i++)
  15217. {
  15218. assert(pWeights[i] <= 15);
  15219. }
  15220. bc6h_logical_block log_blk;
  15221. log_blk.clear();
  15222. // Convert half endpoints to blog10 (mode 10 doesn't use delta encoding)
  15223. for (uint32_t c = 0; c < 3; c++)
  15224. {
  15225. log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10);
  15226. log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10);
  15227. }
  15228. memcpy(log_blk.m_weights, pWeights, 16);
  15229. if (log_blk.m_weights[0] & 8)
  15230. {
  15231. for (uint32_t i = 0; i < 16; i++)
  15232. log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
  15233. for (uint32_t c = 0; c < 3; c++)
  15234. {
  15235. std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
  15236. }
  15237. }
  15238. log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX;
  15239. pack_bc6h_block(*pPacked_block, log_blk);
  15240. }
  15241. // Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights
  15242. void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
  15243. {
  15244. assert(g_bc6h_enc_initialized);
  15245. for (uint32_t i = 0; i < 16; i++)
  15246. {
  15247. assert(pWeights[i] <= 15);
  15248. }
  15249. bc6h_logical_block log_blk;
  15250. log_blk.clear();
  15251. for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--)
  15252. {
  15253. const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1];
  15254. const int base_bitmask = (1 << num_base_bits) - 1;
  15255. const int delta_bitmask = (1 << num_delta_bits) - 1;
  15256. BASISU_NOTE_UNUSED(base_bitmask);
  15257. assert(num_delta_bits < num_base_bits);
  15258. assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3]));
  15259. uint32_t blog_endpoints[3][2];
  15260. // Convert half endpoints to blog 16, 12, or 11
  15261. for (uint32_t c = 0; c < 3; c++)
  15262. {
  15263. blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
  15264. assert((int)blog_endpoints[c][0] <= base_bitmask);
  15265. blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
  15266. assert((int)blog_endpoints[c][1] <= base_bitmask);
  15267. }
  15268. // Copy weights
  15269. memcpy(log_blk.m_weights, pWeights, 16);
  15270. // Ensure first weight MSB is 0
  15271. if (log_blk.m_weights[0] & 8)
  15272. {
  15273. // Invert weights
  15274. for (uint32_t i = 0; i < 16; i++)
  15275. log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
  15276. // Swap blog quantized endpoints
  15277. for (uint32_t c = 0; c < 3; c++)
  15278. {
  15279. std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
  15280. }
  15281. }
  15282. const int max_delta = (1 << (num_delta_bits - 1)) - 1;
  15283. const int min_delta = -(max_delta + 1);
  15284. assert((max_delta - min_delta) == delta_bitmask);
  15285. bool failed_flag = false;
  15286. for (uint32_t c = 0; c < 3; c++)
  15287. {
  15288. log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
  15289. int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
  15290. if ((delta < min_delta) || (delta > max_delta))
  15291. {
  15292. failed_flag = true;
  15293. break;
  15294. }
  15295. log_blk.m_endpoints[c][1] = delta & delta_bitmask;
  15296. }
  15297. if (failed_flag)
  15298. continue;
  15299. log_blk.m_mode = mode;
  15300. pack_bc6h_block(*pPacked_block, log_blk);
  15301. return;
  15302. }
  15303. // Worst case fall back to mode 10, which can handle any endpoints
  15304. bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights);
  15305. }
  15306. // Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset
  15307. void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
  15308. {
  15309. assert(g_bc6h_enc_initialized);
  15310. for (uint32_t i = 0; i < 16; i++)
  15311. {
  15312. assert(pWeights[i] <= 7);
  15313. }
  15314. bc6h_logical_block log_blk;
  15315. log_blk.clear();
  15316. // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
  15317. for (uint32_t c = 0; c < 3; c++)
  15318. {
  15319. log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6);
  15320. log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0];
  15321. log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6);
  15322. log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1];
  15323. }
  15324. memcpy(log_blk.m_weights, pWeights, 16);
  15325. const uint32_t pat_index = 0;
  15326. const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
  15327. if (log_blk.m_weights[0] & 4)
  15328. {
  15329. for (uint32_t c = 0; c < 3; c++)
  15330. std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
  15331. for (uint32_t i = 0; i < 16; i++)
  15332. if ((pPat[i] & 0x7F) == 0)
  15333. log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
  15334. }
  15335. if (log_blk.m_weights[15] & 4)
  15336. {
  15337. for (uint32_t c = 0; c < 3; c++)
  15338. std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
  15339. for (uint32_t i = 0; i < 16; i++)
  15340. if ((pPat[i] & 0x7F) == 1)
  15341. log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
  15342. }
  15343. log_blk.m_mode = 9;
  15344. log_blk.m_partition_pattern = pat_index;
  15345. pack_bc6h_block(*pPacked_block, log_blk);
  15346. }
  15347. // Tries modes 0-8, falls back to mode 9
  15348. void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
  15349. {
  15350. assert(g_bc6h_enc_initialized);
  15351. for (uint32_t i = 0; i < 16; i++)
  15352. {
  15353. assert(pWeights[i] <= 7);
  15354. }
  15355. bc6h_logical_block log_blk;
  15356. log_blk.clear();
  15357. for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
  15358. {
  15359. static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
  15360. const uint32_t mode = s_mode_order[mode_iter];
  15361. const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
  15362. const int base_bitmask = (1 << num_base_bits) - 1;
  15363. BASISU_NOTE_UNUSED(base_bitmask);
  15364. const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
  15365. const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
  15366. uint32_t blog_endpoints[3][4];
  15367. // Convert half endpoints to blog 7-11
  15368. for (uint32_t c = 0; c < 3; c++)
  15369. {
  15370. blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
  15371. blog_endpoints[c][2] = blog_endpoints[c][0];
  15372. assert((int)blog_endpoints[c][0] <= base_bitmask);
  15373. blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
  15374. blog_endpoints[c][3] = blog_endpoints[c][1];
  15375. assert((int)blog_endpoints[c][1] <= base_bitmask);
  15376. }
  15377. const uint32_t pat_index = 0;
  15378. const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
  15379. memcpy(log_blk.m_weights, pWeights, 16);
  15380. if (log_blk.m_weights[0] & 4)
  15381. {
  15382. // Swap part 0's endpoints/weights
  15383. for (uint32_t c = 0; c < 3; c++)
  15384. std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
  15385. for (uint32_t i = 0; i < 16; i++)
  15386. if ((pPat[i] & 0x7F) == 0)
  15387. log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
  15388. }
  15389. if (log_blk.m_weights[15] & 4)
  15390. {
  15391. // Swap part 1's endpoints/weights
  15392. for (uint32_t c = 0; c < 3; c++)
  15393. std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
  15394. for (uint32_t i = 0; i < 16; i++)
  15395. if ((pPat[i] & 0x7F) == 1)
  15396. log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
  15397. }
  15398. bool failed_flag = false;
  15399. for (uint32_t c = 0; c < 3; c++)
  15400. {
  15401. const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
  15402. const int min_delta = -(max_delta + 1);
  15403. assert((max_delta - min_delta) == delta_bitmasks[c]);
  15404. log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
  15405. int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
  15406. int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
  15407. int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
  15408. if ((delta0 < min_delta) || (delta0 > max_delta) ||
  15409. (delta1 < min_delta) || (delta1 > max_delta) ||
  15410. (delta2 < min_delta) || (delta2 > max_delta))
  15411. {
  15412. failed_flag = true;
  15413. break;
  15414. }
  15415. log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
  15416. log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
  15417. log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
  15418. if (failed_flag)
  15419. break;
  15420. }
  15421. if (failed_flag)
  15422. continue;
  15423. log_blk.m_mode = mode;
  15424. log_blk.m_partition_pattern = pat_index;
  15425. pack_bc6h_block(*pPacked_block, log_blk);
  15426. return;
  15427. } // mode_iter
  15428. bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights);
  15429. }
  15430. // pEndpoints[subset][comp][lh_index]
  15431. void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
  15432. {
  15433. assert(g_bc6h_enc_initialized);
  15434. assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2);
  15435. for (uint32_t i = 0; i < 16; i++)
  15436. {
  15437. assert(pWeights[i] <= 7);
  15438. }
  15439. bc6h_logical_block log_blk;
  15440. log_blk.clear();
  15441. // Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
  15442. for (uint32_t s = 0; s < 2; s++)
  15443. {
  15444. for (uint32_t c = 0; c < 3; c++)
  15445. {
  15446. log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6);
  15447. log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6);
  15448. }
  15449. }
  15450. memcpy(log_blk.m_weights, pWeights, 16);
  15451. //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
  15452. const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
  15453. const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
  15454. if (invert_flag)
  15455. {
  15456. for (uint32_t c = 0; c < 3; c++)
  15457. {
  15458. std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]);
  15459. std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]);
  15460. }
  15461. }
  15462. const uint32_t pat_index = bc7_pattern;
  15463. assert(pat_index < 32);
  15464. const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
  15465. bool swap_flags[2] = { false, false };
  15466. for (uint32_t i = 0; i < 16; i++)
  15467. {
  15468. if ((pPat[i] & 0x80) == 0)
  15469. continue;
  15470. if (log_blk.m_weights[i] & 4)
  15471. {
  15472. const uint32_t p = pPat[i] & 1;
  15473. swap_flags[p] = true;
  15474. }
  15475. }
  15476. if (swap_flags[0])
  15477. {
  15478. for (uint32_t c = 0; c < 3; c++)
  15479. std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
  15480. for (uint32_t i = 0; i < 16; i++)
  15481. if ((pPat[i] & 0x7F) == 0)
  15482. log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
  15483. }
  15484. if (swap_flags[1])
  15485. {
  15486. for (uint32_t c = 0; c < 3; c++)
  15487. std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
  15488. for (uint32_t i = 0; i < 16; i++)
  15489. if ((pPat[i] & 0x7F) == 1)
  15490. log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
  15491. }
  15492. log_blk.m_mode = 9;
  15493. log_blk.m_partition_pattern = pat_index;
  15494. pack_bc6h_block(*pPacked_block, log_blk);
  15495. }
  15496. void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
  15497. {
  15498. assert(g_bc6h_enc_initialized);
  15499. for (uint32_t i = 0; i < 16; i++)
  15500. {
  15501. assert(pWeights[i] <= 7);
  15502. }
  15503. bc6h_logical_block log_blk;
  15504. log_blk.clear();
  15505. for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
  15506. {
  15507. static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
  15508. const uint32_t mode = s_mode_order[mode_iter];
  15509. const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
  15510. const int base_bitmask = (1 << num_base_bits) - 1;
  15511. BASISU_NOTE_UNUSED(base_bitmask);
  15512. const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
  15513. const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
  15514. uint32_t blog_endpoints[3][4];
  15515. // Convert half endpoints to blog 7-11
  15516. for (uint32_t s = 0; s < 2; s++)
  15517. {
  15518. for (uint32_t c = 0; c < 3; c++)
  15519. {
  15520. blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits);
  15521. blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits);
  15522. }
  15523. }
  15524. memcpy(log_blk.m_weights, pWeights, 16);
  15525. //const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
  15526. const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
  15527. const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
  15528. if (invert_flag)
  15529. {
  15530. for (uint32_t c = 0; c < 3; c++)
  15531. {
  15532. std::swap(blog_endpoints[c][0], blog_endpoints[c][2]);
  15533. std::swap(blog_endpoints[c][1], blog_endpoints[c][3]);
  15534. }
  15535. }
  15536. const uint32_t pat_index = bc7_pattern;
  15537. assert(pat_index < 32);
  15538. const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
  15539. bool swap_flags[2] = { false, false };
  15540. for (uint32_t i = 0; i < 16; i++)
  15541. {
  15542. if ((pPat[i] & 0x80) == 0)
  15543. continue;
  15544. if (log_blk.m_weights[i] & 4)
  15545. {
  15546. const uint32_t p = pPat[i] & 1;
  15547. swap_flags[p] = true;
  15548. }
  15549. }
  15550. if (swap_flags[0])
  15551. {
  15552. for (uint32_t c = 0; c < 3; c++)
  15553. std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
  15554. for (uint32_t i = 0; i < 16; i++)
  15555. if ((pPat[i] & 0x7F) == 0)
  15556. log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
  15557. }
  15558. if (swap_flags[1])
  15559. {
  15560. for (uint32_t c = 0; c < 3; c++)
  15561. std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
  15562. for (uint32_t i = 0; i < 16; i++)
  15563. if ((pPat[i] & 0x7F) == 1)
  15564. log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
  15565. }
  15566. // Try packing the endpoints
  15567. bool failed_flag = false;
  15568. for (uint32_t c = 0; c < 3; c++)
  15569. {
  15570. const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
  15571. const int min_delta = -(max_delta + 1);
  15572. assert((max_delta - min_delta) == delta_bitmasks[c]);
  15573. log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
  15574. int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
  15575. int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
  15576. int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
  15577. if ((delta0 < min_delta) || (delta0 > max_delta) ||
  15578. (delta1 < min_delta) || (delta1 > max_delta) ||
  15579. (delta2 < min_delta) || (delta2 > max_delta))
  15580. {
  15581. failed_flag = true;
  15582. break;
  15583. }
  15584. log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
  15585. log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
  15586. log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
  15587. if (failed_flag)
  15588. break;
  15589. }
  15590. if (failed_flag)
  15591. continue;
  15592. log_blk.m_mode = mode;
  15593. log_blk.m_partition_pattern = pat_index;
  15594. pack_bc6h_block(*pPacked_block, log_blk);
  15595. //half_float blk[16 * 3];
  15596. //unpack_bc6h(pPacked_block, blk, false);
  15597. return;
  15598. }
  15599. bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights);
  15600. }
  15601. bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3])
  15602. {
  15603. assert(g_bc6h_enc_initialized);
  15604. if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000)
  15605. return false;
  15606. // ASTC block unpacker won't allow Inf/NaN's to come through.
  15607. //if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2]))
  15608. // return false;
  15609. uint8_t weights[16];
  15610. memset(weights, 0, sizeof(weights));
  15611. half_float endpoints[3][2];
  15612. endpoints[0][0] = pColor[0];
  15613. endpoints[0][1] = pColor[0];
  15614. endpoints[1][0] = pColor[1];
  15615. endpoints[1][1] = pColor[1];
  15616. endpoints[2][0] = pColor[2];
  15617. endpoints[2][1] = pColor[2];
  15618. bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights);
  15619. return true;
  15620. }
  15621. //--------------------------------------------------------------------------------------------------------------------------
  15622. // basisu_astc_hdr_core.cpp
  15623. static bool g_astc_hdr_core_initialized;
  15624. static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024];
  15625. //--------------------------------------------------------------------------------------------------------------------------
  15626. void astc_hdr_core_init()
  15627. {
  15628. if (g_astc_hdr_core_initialized)
  15629. return;
  15630. memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index));
  15631. for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index)
  15632. {
  15633. const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
  15634. //const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
  15635. assert(astc_pattern < 1024);
  15636. g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index;
  15637. }
  15638. g_astc_hdr_core_initialized = true;
  15639. }
  15640. //--------------------------------------------------------------------------------------------------------------------------
  15641. static inline int astc_hdr_sign_extend(int src, int num_src_bits)
  15642. {
  15643. assert(basisu::in_range(num_src_bits, 2, 31));
  15644. const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
  15645. if (negative)
  15646. return src | ~((1 << num_src_bits) - 1);
  15647. else
  15648. return src & ((1 << num_src_bits) - 1);
  15649. }
  15650. static inline void astc_hdr_pack_bit(
  15651. int& dst, int dst_bit,
  15652. int src_val, int src_bit = 0)
  15653. {
  15654. assert(dst_bit >= 0 && dst_bit <= 31);
  15655. int bit = basisu::get_bit(src_val, src_bit);
  15656. dst |= (bit << dst_bit);
  15657. }
  15658. //--------------------------------------------------------------------------------------------------------------------------
  15659. void decode_mode7_to_qlog12_ise20(
  15660. const uint8_t* pEndpoints,
  15661. int e[2][3],
  15662. int* pScale)
  15663. {
  15664. assert(g_astc_hdr_core_initialized);
  15665. for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
  15666. {
  15667. assert(pEndpoints[i] <= 255);
  15668. }
  15669. const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
  15670. // Extract mode bits and unpack to major component and mode.
  15671. const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
  15672. int majcomp, mode;
  15673. if ((modeval & 0xC) != 0xC)
  15674. {
  15675. majcomp = modeval >> 2;
  15676. mode = modeval & 3;
  15677. }
  15678. else if (modeval != 0xF)
  15679. {
  15680. majcomp = modeval & 3;
  15681. mode = 4;
  15682. }
  15683. else
  15684. {
  15685. majcomp = 0;
  15686. mode = 5;
  15687. }
  15688. // Extract low-order bits of r, g, b, and s.
  15689. int red = v0 & 0x3f;
  15690. int green = v1 & 0x1f;
  15691. int blue = v2 & 0x1f;
  15692. int scale = v3 & 0x1f;
  15693. // Extract high-order bits, which may be assigned depending on mode
  15694. int x0 = (v1 >> 6) & 1;
  15695. int x1 = (v1 >> 5) & 1;
  15696. int x2 = (v2 >> 6) & 1;
  15697. int x3 = (v2 >> 5) & 1;
  15698. int x4 = (v3 >> 7) & 1;
  15699. int x5 = (v3 >> 6) & 1;
  15700. int x6 = (v3 >> 5) & 1;
  15701. // Now move the high-order xs into the right place.
  15702. const int ohm = 1 << mode;
  15703. if (ohm & 0x30) green |= x0 << 6;
  15704. if (ohm & 0x3A) green |= x1 << 5;
  15705. if (ohm & 0x30) blue |= x2 << 6;
  15706. if (ohm & 0x3A) blue |= x3 << 5;
  15707. if (ohm & 0x3D) scale |= x6 << 5;
  15708. if (ohm & 0x2D) scale |= x5 << 6;
  15709. if (ohm & 0x04) scale |= x4 << 7;
  15710. if (ohm & 0x3B) red |= x4 << 6;
  15711. if (ohm & 0x04) red |= x3 << 6;
  15712. if (ohm & 0x10) red |= x5 << 7;
  15713. if (ohm & 0x0F) red |= x2 << 7;
  15714. if (ohm & 0x05) red |= x1 << 8;
  15715. if (ohm & 0x0A) red |= x0 << 8;
  15716. if (ohm & 0x05) red |= x0 << 9;
  15717. if (ohm & 0x02) red |= x6 << 9;
  15718. if (ohm & 0x01) red |= x3 << 10;
  15719. if (ohm & 0x02) red |= x5 << 10;
  15720. // Shift the bits to the top of the 12-bit result.
  15721. static const int s_shamts[6] = { 1,1,2,3,4,5 };
  15722. const int shamt = s_shamts[mode];
  15723. red <<= shamt;
  15724. green <<= shamt;
  15725. blue <<= shamt;
  15726. scale <<= shamt;
  15727. // Minor components are stored as differences
  15728. if (mode != 5)
  15729. {
  15730. green = red - green;
  15731. blue = red - blue;
  15732. }
  15733. // Swizzle major component into place
  15734. if (majcomp == 1)
  15735. std::swap(red, green);
  15736. if (majcomp == 2)
  15737. std::swap(red, blue);
  15738. // Clamp output values, set alpha to 1.0
  15739. e[1][0] = basisu::clamp(red, 0, 0xFFF);
  15740. e[1][1] = basisu::clamp(green, 0, 0xFFF);
  15741. e[1][2] = basisu::clamp(blue, 0, 0xFFF);
  15742. e[0][0] = basisu::clamp(red - scale, 0, 0xFFF);
  15743. e[0][1] = basisu::clamp(green - scale, 0, 0xFFF);
  15744. e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF);
  15745. if (pScale)
  15746. *pScale = scale;
  15747. }
  15748. //--------------------------------------------------------------------------------------------------------------------------
  15749. bool decode_mode7_to_qlog12(
  15750. const uint8_t* pEndpoints,
  15751. int e[2][3],
  15752. int* pScale,
  15753. uint32_t ise_endpoint_range)
  15754. {
  15755. assert(g_astc_hdr_core_initialized);
  15756. if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
  15757. {
  15758. decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale);
  15759. }
  15760. else
  15761. {
  15762. uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS];
  15763. for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
  15764. dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
  15765. decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale);
  15766. }
  15767. for (uint32_t i = 0; i < 2; i++)
  15768. {
  15769. if (e[i][0] > (int)MAX_QLOG12)
  15770. return false;
  15771. if (e[i][1] > (int)MAX_QLOG12)
  15772. return false;
  15773. if (e[i][2] > (int)MAX_QLOG12)
  15774. return false;
  15775. }
  15776. return true;
  15777. }
  15778. //--------------------------------------------------------------------------------------------------------------------------
  15779. void decode_mode11_to_qlog12_ise20(
  15780. const uint8_t* pEndpoints,
  15781. int e[2][3])
  15782. {
  15783. #ifdef _DEBUG
  15784. for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
  15785. {
  15786. assert(pEndpoints[i] <= 255);
  15787. }
  15788. #endif
  15789. const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1);
  15790. if (maj_comp == 3)
  15791. {
  15792. // Direct, qlog8 and qlog7
  15793. e[0][0] = pEndpoints[0] << 4;
  15794. e[1][0] = pEndpoints[1] << 4;
  15795. e[0][1] = pEndpoints[2] << 4;
  15796. e[1][1] = pEndpoints[3] << 4;
  15797. e[0][2] = (pEndpoints[4] & 127) << 5;
  15798. e[1][2] = (pEndpoints[5] & 127) << 5;
  15799. }
  15800. else
  15801. {
  15802. int v0 = pEndpoints[0];
  15803. int v1 = pEndpoints[1];
  15804. int v2 = pEndpoints[2];
  15805. int v3 = pEndpoints[3];
  15806. int v4 = pEndpoints[4];
  15807. int v5 = pEndpoints[5];
  15808. int mode = 0;
  15809. astc_hdr_pack_bit(mode, 0, v1, 7);
  15810. astc_hdr_pack_bit(mode, 1, v2, 7);
  15811. astc_hdr_pack_bit(mode, 2, v3, 7);
  15812. int va = v0;
  15813. astc_hdr_pack_bit(va, 8, v1, 6);
  15814. int vb0 = v2 & 63;
  15815. int vb1 = v3 & 63;
  15816. int vc = v1 & 63;
  15817. int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed
  15818. int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed
  15819. static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 };
  15820. vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]);
  15821. vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]);
  15822. int x0 = basisu::get_bit(v2, 6);
  15823. int x1 = basisu::get_bit(v3, 6);
  15824. int x2 = basisu::get_bit(v4, 6);
  15825. int x3 = basisu::get_bit(v5, 6);
  15826. int x4 = basisu::get_bit(v4, 5);
  15827. int x5 = basisu::get_bit(v5, 5);
  15828. const uint32_t ohm = 1U << mode;
  15829. if (ohm & 0xA4) va |= (x0 << 9);
  15830. if (ohm & 0x08) va |= (x2 << 9);
  15831. if (ohm & 0x50) va |= (x4 << 9);
  15832. if (ohm & 0x50) va |= (x5 << 10);
  15833. if (ohm & 0xA0) va |= (x1 << 10);
  15834. if (ohm & 0xC0) va |= (x2 << 11);
  15835. if (ohm & 0x04) vc |= (x1 << 6);
  15836. if (ohm & 0xE8) vc |= (x3 << 6);
  15837. if (ohm & 0x20) vc |= (x2 << 7);
  15838. if (ohm & 0x5B) vb0 |= (x0 << 6);
  15839. if (ohm & 0x5B) vb1 |= (x1 << 6);
  15840. if (ohm & 0x12) vb0 |= (x2 << 7);
  15841. if (ohm & 0x12) vb1 |= (x3 << 7);
  15842. const int shamt = (mode >> 1) ^ 3;
  15843. va = (uint32_t)va << shamt;
  15844. vb0 = (uint32_t)vb0 << shamt;
  15845. vb1 = (uint32_t)vb1 << shamt;
  15846. vc = (uint32_t)vc << shamt;
  15847. vd0 = (uint32_t)vd0 << shamt;
  15848. vd1 = (uint32_t)vd1 << shamt;
  15849. // qlog12
  15850. e[1][0] = basisu::clamp<int>(va, 0, 0xFFF);
  15851. e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF);
  15852. e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF);
  15853. e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF);
  15854. e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF);
  15855. e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF);
  15856. if (maj_comp)
  15857. {
  15858. std::swap(e[0][0], e[0][maj_comp]);
  15859. std::swap(e[1][0], e[1][maj_comp]);
  15860. }
  15861. }
  15862. }
  15863. //--------------------------------------------------------------------------------------------------------------------------
  15864. bool decode_mode11_to_qlog12(
  15865. const uint8_t* pEndpoints,
  15866. int e[2][3],
  15867. uint32_t ise_endpoint_range)
  15868. {
  15869. assert(g_astc_hdr_core_initialized);
  15870. assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
  15871. if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
  15872. {
  15873. decode_mode11_to_qlog12_ise20(pEndpoints, e);
  15874. }
  15875. else
  15876. {
  15877. uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS];
  15878. for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
  15879. dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
  15880. decode_mode11_to_qlog12_ise20(dequantized_endpoints, e);
  15881. }
  15882. for (uint32_t i = 0; i < 2; i++)
  15883. {
  15884. if (e[i][0] > (int)MAX_QLOG12)
  15885. return false;
  15886. if (e[i][1] > (int)MAX_QLOG12)
  15887. return false;
  15888. if (e[i][2] > (int)MAX_QLOG12)
  15889. return false;
  15890. }
  15891. return true;
  15892. }
  15893. //--------------------------------------------------------------------------------------------------------------------------
  15894. bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
  15895. {
  15896. assert(g_astc_hdr_core_initialized);
  15897. assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8));
  15898. if (best_blk.m_weight_ise_range == 5)
  15899. {
  15900. // Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets
  15901. bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights);
  15902. }
  15903. else
  15904. {
  15905. uint8_t bc6h_weights[16];
  15906. if (best_blk.m_weight_ise_range == 1)
  15907. {
  15908. // weight ISE 1: 3 levels
  15909. static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 };
  15910. for (uint32_t i = 0; i < 16; i++)
  15911. bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
  15912. }
  15913. else if (best_blk.m_weight_ise_range == 2)
  15914. {
  15915. // weight ISE 2: 4 levels
  15916. static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 };
  15917. for (uint32_t i = 0; i < 16; i++)
  15918. bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]];
  15919. }
  15920. else if (best_blk.m_weight_ise_range == 3)
  15921. {
  15922. // weight ISE 3: 5 levels
  15923. static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 };
  15924. for (uint32_t i = 0; i < 16; i++)
  15925. bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]];
  15926. }
  15927. else if (best_blk.m_weight_ise_range == 4)
  15928. {
  15929. // weight ISE 4: 6 levels
  15930. static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 };
  15931. for (uint32_t i = 0; i < 16; i++)
  15932. bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]];
  15933. }
  15934. else if (best_blk.m_weight_ise_range == 6)
  15935. {
  15936. // weight ISE 6: 10 levels
  15937. static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 };
  15938. for (uint32_t i = 0; i < 16; i++)
  15939. bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]];
  15940. }
  15941. else if (best_blk.m_weight_ise_range == 7)
  15942. {
  15943. // weight ISE 7: 12 levels
  15944. static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 };
  15945. for (uint32_t i = 0; i < 16; i++)
  15946. bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]];
  15947. }
  15948. else if (best_blk.m_weight_ise_range == 8)
  15949. {
  15950. // 16 levels
  15951. memcpy(bc6h_weights, best_blk.m_weights, 16);
  15952. }
  15953. else
  15954. {
  15955. assert(0);
  15956. return false;
  15957. }
  15958. bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights);
  15959. }
  15960. return true;
  15961. }
  15962. //--------------------------------------------------------------------------------------------------------------------------
  15963. bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
  15964. {
  15965. assert(g_astc_hdr_core_initialized);
  15966. assert(best_blk.m_num_partitions == 2);
  15967. assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
  15968. half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index]
  15969. // UASTC HDR checks
  15970. // Both CEM's must be equal in 2-subset UASTC HDR.
  15971. if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1])
  15972. return false;
  15973. if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11))
  15974. return false;
  15975. if (best_blk.m_color_endpoint_modes[0] == 7)
  15976. {
  15977. if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) ||
  15978. ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) ||
  15979. ((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) ||
  15980. ((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) ||
  15981. ((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15))))
  15982. {
  15983. return false;
  15984. }
  15985. }
  15986. else
  15987. {
  15988. if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) ||
  15989. ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12))))
  15990. {
  15991. return false;
  15992. }
  15993. }
  15994. for (uint32_t s = 0; s < 2; s++)
  15995. {
  15996. int e[2][3];
  15997. if (best_blk.m_color_endpoint_modes[0] == 7)
  15998. {
  15999. bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range);
  16000. if (!success)
  16001. return false;
  16002. }
  16003. else
  16004. {
  16005. bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range);
  16006. if (!success)
  16007. return false;
  16008. }
  16009. for (uint32_t c = 0; c < 3; c++)
  16010. {
  16011. bc6h_endpoints[s][c][0] = qlog_to_half_slow(e[0][c], 12);
  16012. if (is_half_inf_or_nan(bc6h_endpoints[s][c][0]))
  16013. return false;
  16014. bc6h_endpoints[s][c][1] = qlog_to_half_slow(e[1][c], 12);
  16015. if (is_half_inf_or_nan(bc6h_endpoints[s][c][1]))
  16016. return false;
  16017. }
  16018. }
  16019. uint8_t bc6h_weights[16];
  16020. if (best_blk.m_weight_ise_range == 1)
  16021. {
  16022. static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 };
  16023. for (uint32_t i = 0; i < 16; i++)
  16024. bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
  16025. }
  16026. else if (best_blk.m_weight_ise_range == 2)
  16027. {
  16028. static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 };
  16029. for (uint32_t i = 0; i < 16; i++)
  16030. bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]];
  16031. }
  16032. else if (best_blk.m_weight_ise_range == 3)
  16033. {
  16034. static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 };
  16035. for (uint32_t i = 0; i < 16; i++)
  16036. bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]];
  16037. }
  16038. else if (best_blk.m_weight_ise_range == 4)
  16039. {
  16040. static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 };
  16041. for (uint32_t i = 0; i < 16; i++)
  16042. bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]];
  16043. }
  16044. else if (best_blk.m_weight_ise_range == 5)
  16045. {
  16046. memcpy(bc6h_weights, best_blk.m_weights, 16);
  16047. }
  16048. else
  16049. {
  16050. assert(0);
  16051. return false;
  16052. }
  16053. bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights);
  16054. return true;
  16055. }
  16056. //--------------------------------------------------------------------------------------------------------------------------
  16057. // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
  16058. bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk)
  16059. {
  16060. assert(g_astc_hdr_core_initialized);
  16061. if (!g_astc_hdr_core_initialized)
  16062. {
  16063. assert(0);
  16064. return false;
  16065. }
  16066. astc_helpers::log_astc_block log_blk;
  16067. if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4))
  16068. {
  16069. // Failed unpacking ASTC data
  16070. return false;
  16071. }
  16072. return astc_hdr_transcode_to_bc6h(log_blk, dst_blk);
  16073. }
  16074. //--------------------------------------------------------------------------------------------------------------------------
  16075. // Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
  16076. bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk)
  16077. {
  16078. assert(g_astc_hdr_core_initialized);
  16079. if (!g_astc_hdr_core_initialized)
  16080. {
  16081. assert(0);
  16082. return false;
  16083. }
  16084. if (log_blk.m_solid_color_flag_ldr)
  16085. {
  16086. // Don't support LDR solid colors.
  16087. return false;
  16088. }
  16089. if (log_blk.m_solid_color_flag_hdr)
  16090. {
  16091. // Solid color HDR block
  16092. return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color);
  16093. }
  16094. // Only support 4x4 grid sizes
  16095. if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4))
  16096. return false;
  16097. // Don't support dual plane encoding
  16098. if (log_blk.m_dual_plane)
  16099. return false;
  16100. if (log_blk.m_num_partitions == 1)
  16101. {
  16102. // Handle 1 partition (or subset)
  16103. // UASTC HDR checks
  16104. if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8))
  16105. return false;
  16106. int e[2][3];
  16107. bool success;
  16108. if (log_blk.m_color_endpoint_modes[0] == 7)
  16109. {
  16110. if (log_blk.m_endpoint_ise_range != 20)
  16111. return false;
  16112. success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range);
  16113. }
  16114. else if (log_blk.m_color_endpoint_modes[0] == 11)
  16115. {
  16116. // UASTC HDR checks
  16117. if (log_blk.m_weight_ise_range <= 7)
  16118. {
  16119. if (log_blk.m_endpoint_ise_range != 20)
  16120. return false;
  16121. }
  16122. else if (log_blk.m_endpoint_ise_range != 19)
  16123. {
  16124. return false;
  16125. }
  16126. success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range);
  16127. }
  16128. else
  16129. {
  16130. return false;
  16131. }
  16132. if (!success)
  16133. return false;
  16134. // Transform endpoints to half float
  16135. half_float h_e[3][2] =
  16136. {
  16137. { qlog_to_half_slow(e[0][0], 12), qlog_to_half_slow(e[1][0], 12) },
  16138. { qlog_to_half_slow(e[0][1], 12), qlog_to_half_slow(e[1][1], 12) },
  16139. { qlog_to_half_slow(e[0][2], 12), qlog_to_half_slow(e[1][2], 12) }
  16140. };
  16141. // Sanity check for NaN/Inf
  16142. for (uint32_t i = 0; i < 2; i++)
  16143. if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i]))
  16144. return false;
  16145. // Transcode to bc6h
  16146. if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk))
  16147. return false;
  16148. }
  16149. else if (log_blk.m_num_partitions == 2)
  16150. {
  16151. // Handle 2 partition (or subset)
  16152. int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id];
  16153. if (common_bc7_pat_index < 0)
  16154. return false;
  16155. assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
  16156. if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk))
  16157. return false;
  16158. }
  16159. else
  16160. {
  16161. // Only supports 1 or 2 partitions (or subsets)
  16162. return false;
  16163. }
  16164. return true;
  16165. }
  16166. #endif // BASISD_SUPPORT_UASTC_HDR
  16167. } // namespace basist