basisu_gpu_texture.cpp 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630
  1. // basisu_gpu_texture.cpp
  2. // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. #include "basisu_gpu_texture.h"
  16. #include "basisu_enc.h"
  17. #include "basisu_pvrtc1_4.h"
  18. #if BASISU_USE_ASTC_DECOMPRESS
  19. #include "basisu_astc_decomp.h"
  20. #endif
  21. #include "basisu_bc7enc.h"
  22. namespace basisu
  23. {
  24. void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)
  25. {
  26. static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");
  27. const eac_a8_block *pBlock = static_cast<const eac_a8_block *>(pBlock_bits);
  28. const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table];
  29. const uint64_t selector_bits = pBlock->get_selector_bits();
  30. const int32_t base = pBlock->m_base;
  31. const int32_t mul = pBlock->m_multiplier;
  32. pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul);
  33. pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul);
  34. pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul);
  35. pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul);
  36. pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul);
  37. pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul);
  38. pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul);
  39. pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul);
  40. pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul);
  41. pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul);
  42. pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul);
  43. pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul);
  44. pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul);
  45. pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul);
  46. pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul);
  47. pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);
  48. }
  49. struct bc1_block
  50. {
  51. enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
  52. uint8_t m_low_color[cTotalEndpointBytes];
  53. uint8_t m_high_color[cTotalEndpointBytes];
  54. uint8_t m_selectors[cTotalSelectorBytes];
  55. inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
  56. inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
  57. static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b)
  58. {
  59. r = (c >> 11) & 31;
  60. g = (c >> 5) & 63;
  61. b = c & 31;
  62. r = (r << 3) | (r >> 2);
  63. g = (g << 2) | (g >> 4);
  64. b = (b << 3) | (b >> 2);
  65. }
  66. inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; }
  67. };
  68. // Returns true if the block uses 3 color punchthrough alpha mode.
  69. bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
  70. {
  71. static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
  72. const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
  73. const uint32_t l = pBlock->get_low_color();
  74. const uint32_t h = pBlock->get_high_color();
  75. color_rgba c[4];
  76. uint32_t r0, g0, b0, r1, g1, b1;
  77. bc1_block::unpack_color(l, r0, g0, b0);
  78. bc1_block::unpack_color(h, r1, g1, b1);
  79. c[0].set_noclamp_rgba(r0, g0, b0, 255);
  80. c[1].set_noclamp_rgba(r1, g1, b1, 255);
  81. bool used_punchthrough = false;
  82. if (l > h)
  83. {
  84. c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
  85. c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
  86. }
  87. else
  88. {
  89. c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
  90. c[3].set_noclamp_rgba(0, 0, 0, 0);
  91. used_punchthrough = true;
  92. }
  93. if (set_alpha)
  94. {
  95. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  96. {
  97. pPixels[0] = c[pBlock->get_selector(0, y)];
  98. pPixels[1] = c[pBlock->get_selector(1, y)];
  99. pPixels[2] = c[pBlock->get_selector(2, y)];
  100. pPixels[3] = c[pBlock->get_selector(3, y)];
  101. }
  102. }
  103. else
  104. {
  105. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  106. {
  107. pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
  108. pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
  109. pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
  110. pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
  111. }
  112. }
  113. return used_punchthrough;
  114. }
  115. bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
  116. {
  117. static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
  118. const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
  119. const uint32_t l = pBlock->get_low_color();
  120. const uint32_t h = pBlock->get_high_color();
  121. color_rgba c[4];
  122. int r0 = (l >> 11) & 31;
  123. int g0 = (l >> 5) & 63;
  124. int b0 = l & 31;
  125. int r1 = (h >> 11) & 31;
  126. int g1 = (h >> 5) & 63;
  127. int b1 = h & 31;
  128. c[0].b = (uint8_t)((3 * b0 * 22) / 8);
  129. c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4));
  130. c[0].r = (uint8_t)((3 * r0 * 22) / 8);
  131. c[0].a = 0xFF;
  132. c[1].r = (uint8_t)((3 * r1 * 22) / 8);
  133. c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4));
  134. c[1].b = (uint8_t)((3 * b1 * 22) / 8);
  135. c[1].a = 0xFF;
  136. int gdiff = c[1].g - c[0].g;
  137. bool used_punchthrough = false;
  138. if (l > h)
  139. {
  140. c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8);
  141. c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256));
  142. c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8);
  143. c[2].a = 0xFF;
  144. c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8);
  145. c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256);
  146. c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8);
  147. c[3].a = 0xFF;
  148. }
  149. else
  150. {
  151. c[2].r = (uint8_t)(((r0 + r1) * 33) / 8);
  152. c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256);
  153. c[2].b = (uint8_t)(((b0 + b1) * 33) / 8);
  154. c[2].a = 0xFF;
  155. c[3].set_noclamp_rgba(0, 0, 0, 0);
  156. used_punchthrough = true;
  157. }
  158. if (set_alpha)
  159. {
  160. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  161. {
  162. pPixels[0] = c[pBlock->get_selector(0, y)];
  163. pPixels[1] = c[pBlock->get_selector(1, y)];
  164. pPixels[2] = c[pBlock->get_selector(2, y)];
  165. pPixels[3] = c[pBlock->get_selector(3, y)];
  166. }
  167. }
  168. else
  169. {
  170. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  171. {
  172. pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
  173. pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
  174. pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
  175. pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
  176. }
  177. }
  178. return used_punchthrough;
  179. }
  180. static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }
  181. static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }
  182. bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
  183. {
  184. const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
  185. const uint32_t l = pBlock->get_low_color();
  186. const uint32_t h = pBlock->get_high_color();
  187. color_rgba c[4];
  188. uint32_t r0, g0, b0, r1, g1, b1;
  189. bc1_block::unpack_color(l, r0, g0, b0);
  190. bc1_block::unpack_color(h, r1, g1, b1);
  191. c[0].set_noclamp_rgba(r0, g0, b0, 255);
  192. c[1].set_noclamp_rgba(r1, g1, b1, 255);
  193. bool used_punchthrough = false;
  194. if (l > h)
  195. {
  196. c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
  197. c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
  198. }
  199. else
  200. {
  201. c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
  202. c[3].set_noclamp_rgba(0, 0, 0, 0);
  203. used_punchthrough = true;
  204. }
  205. if (set_alpha)
  206. {
  207. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  208. {
  209. pPixels[0] = c[pBlock->get_selector(0, y)];
  210. pPixels[1] = c[pBlock->get_selector(1, y)];
  211. pPixels[2] = c[pBlock->get_selector(2, y)];
  212. pPixels[3] = c[pBlock->get_selector(3, y)];
  213. }
  214. }
  215. else
  216. {
  217. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  218. {
  219. pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
  220. pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
  221. pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
  222. pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
  223. }
  224. }
  225. return used_punchthrough;
  226. }
  227. struct bc4_block
  228. {
  229. enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
  230. uint8_t m_endpoints[2];
  231. uint8_t m_selectors[cTotalSelectorBytes];
  232. inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
  233. inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
  234. inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
  235. inline uint64_t get_selector_bits() const
  236. {
  237. return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |
  238. (((uint64_t)m_selectors[4]) << 32U) |
  239. (((uint64_t)m_selectors[5]) << 40U);
  240. }
  241. inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
  242. {
  243. assert((x < 4U) && (y < 4U));
  244. return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
  245. }
  246. static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h)
  247. {
  248. pDst[0] = static_cast<uint8_t>(l);
  249. pDst[1] = static_cast<uint8_t>(h);
  250. pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
  251. pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
  252. pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
  253. pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
  254. pDst[6] = 0;
  255. pDst[7] = 255;
  256. return 6;
  257. }
  258. static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h)
  259. {
  260. pDst[0] = static_cast<uint8_t>(l);
  261. pDst[1] = static_cast<uint8_t>(h);
  262. pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
  263. pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
  264. pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
  265. pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
  266. pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
  267. pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
  268. return 8;
  269. }
  270. static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h)
  271. {
  272. if (l > h)
  273. return get_block_values8(pDst, l, h);
  274. else
  275. return get_block_values6(pDst, l, h);
  276. }
  277. };
  278. void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride)
  279. {
  280. static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
  281. const bc4_block *pBlock = static_cast<const bc4_block *>(pBlock_bits);
  282. uint8_t sel_values[8];
  283. bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
  284. const uint64_t selector_bits = pBlock->get_selector_bits();
  285. for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))
  286. {
  287. pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
  288. pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
  289. pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
  290. pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
  291. }
  292. }
  293. // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
  294. bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels)
  295. {
  296. bool success = true;
  297. if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true))
  298. success = false;
  299. unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba));
  300. return success;
  301. }
  302. // writes RG
  303. void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels)
  304. {
  305. unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));
  306. unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));
  307. }
  308. // ATC isn't officially documented, so I'm assuming these references:
  309. // http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf
  310. // https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c
  311. // The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8.
  312. void unpack_atc(const void* pBlock_bits, color_rgba* pPixels)
  313. {
  314. const uint8_t* pBytes = static_cast<const uint8_t*>(pBlock_bits);
  315. const uint16_t color0 = pBytes[0] | (pBytes[1] << 8U);
  316. const uint16_t color1 = pBytes[2] | (pBytes[3] << 8U);
  317. uint32_t sels = pBytes[4] | (pBytes[5] << 8U) | (pBytes[6] << 16U) | (pBytes[7] << 24U);
  318. const bool mode = (color0 & 0x8000) != 0;
  319. color_rgba c[4];
  320. c[0].set((color0 >> 10) & 31, (color0 >> 5) & 31, color0 & 31, 255);
  321. c[0].r = (c[0].r << 3) | (c[0].r >> 2);
  322. c[0].g = (c[0].g << 3) | (c[0].g >> 2);
  323. c[0].b = (c[0].b << 3) | (c[0].b >> 2);
  324. c[3].set((color1 >> 11) & 31, (color1 >> 5) & 63, color1 & 31, 255);
  325. c[3].r = (c[3].r << 3) | (c[3].r >> 2);
  326. c[3].g = (c[3].g << 2) | (c[3].g >> 4);
  327. c[3].b = (c[3].b << 3) | (c[3].b >> 2);
  328. if (mode)
  329. {
  330. c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255);
  331. c[2] = c[0];
  332. c[0].set(0, 0, 0, 255);
  333. }
  334. else
  335. {
  336. c[1].r = (c[0].r * 5 + c[3].r * 3) >> 3;
  337. c[1].g = (c[0].g * 5 + c[3].g * 3) >> 3;
  338. c[1].b = (c[0].b * 5 + c[3].b * 3) >> 3;
  339. c[2].r = (c[0].r * 3 + c[3].r * 5) >> 3;
  340. c[2].g = (c[0].g * 3 + c[3].g * 5) >> 3;
  341. c[2].b = (c[0].b * 3 + c[3].b * 5) >> 3;
  342. }
  343. for (uint32_t i = 0; i < 16; i++)
  344. {
  345. const uint32_t s = sels & 3;
  346. pPixels[i] = c[s];
  347. sels >>= 2;
  348. }
  349. }
  350. // BC7 mode 0-7 decompression.
  351. // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
  352. static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; }
  353. static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; }
  354. static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; }
  355. static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; }
  356. static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; }
  357. static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits)
  358. {
  359. assert(l <= 255 && h <= 255);
  360. switch (bits)
  361. {
  362. case 2: return bc7_interp2(l, h, w);
  363. case 3: return bc7_interp3(l, h, w);
  364. case 4: return bc7_interp4(l, h, w);
  365. default:
  366. break;
  367. }
  368. return 0;
  369. }
  370. bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
  371. {
  372. //const uint32_t SUBSETS = 3;
  373. const uint32_t ENDPOINTS = 6;
  374. const uint32_t COMPS = 3;
  375. const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2;
  376. const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;
  377. const uint32_t PBITS = (mode == 0) ? 6 : 0;
  378. const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
  379. uint32_t bit_offset = 0;
  380. const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
  381. if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
  382. const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6);
  383. color_rgba endpoints[ENDPOINTS];
  384. for (uint32_t c = 0; c < COMPS; c++)
  385. for (uint32_t e = 0; e < ENDPOINTS; e++)
  386. endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
  387. uint32_t pbits[6];
  388. for (uint32_t p = 0; p < PBITS; p++)
  389. pbits[p] = read_bits32(pBuf, bit_offset, 1);
  390. uint32_t weights[16];
  391. for (uint32_t i = 0; i < 16; i++)
  392. weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
  393. assert(bit_offset == 128);
  394. for (uint32_t e = 0; e < ENDPOINTS; e++)
  395. for (uint32_t c = 0; c < 4; c++)
  396. endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
  397. color_rgba block_colors[3][8];
  398. for (uint32_t s = 0; s < 3; s++)
  399. for (uint32_t i = 0; i < WEIGHT_VALS; i++)
  400. {
  401. for (uint32_t c = 0; c < 3; c++)
  402. block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
  403. block_colors[s][i][3] = 255;
  404. }
  405. for (uint32_t i = 0; i < 16; i++)
  406. pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]];
  407. return true;
  408. }
  409. bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
  410. {
  411. //const uint32_t SUBSETS = 2;
  412. const uint32_t ENDPOINTS = 4;
  413. const uint32_t COMPS = (mode == 7) ? 4 : 3;
  414. const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2;
  415. const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7);
  416. const uint32_t PBITS = (mode == 1) ? 2 : 4;
  417. const uint32_t SHARED_PBITS = (mode == 1) ? true : false;
  418. const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
  419. uint32_t bit_offset = 0;
  420. const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
  421. if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
  422. const uint32_t part = read_bits32(pBuf, bit_offset, 6);
  423. color_rgba endpoints[ENDPOINTS];
  424. for (uint32_t c = 0; c < COMPS; c++)
  425. for (uint32_t e = 0; e < ENDPOINTS; e++)
  426. endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
  427. uint32_t pbits[4];
  428. for (uint32_t p = 0; p < PBITS; p++)
  429. pbits[p] = read_bits32(pBuf, bit_offset, 1);
  430. uint32_t weights[16];
  431. for (uint32_t i = 0; i < 16; i++)
  432. weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
  433. assert(bit_offset == 128);
  434. for (uint32_t e = 0; e < ENDPOINTS; e++)
  435. for (uint32_t c = 0; c < 4; c++)
  436. endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));
  437. color_rgba block_colors[2][8];
  438. for (uint32_t s = 0; s < 2; s++)
  439. for (uint32_t i = 0; i < WEIGHT_VALS; i++)
  440. {
  441. for (uint32_t c = 0; c < COMPS; c++)
  442. block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
  443. block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3];
  444. }
  445. for (uint32_t i = 0; i < 16; i++)
  446. pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]];
  447. return true;
  448. }
  449. bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
  450. {
  451. const uint32_t ENDPOINTS = 2;
  452. const uint32_t COMPS = 4;
  453. const uint32_t WEIGHT_BITS = 2;
  454. const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2;
  455. const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7;
  456. const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8;
  457. //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
  458. //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;
  459. uint32_t bit_offset = 0;
  460. const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
  461. if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
  462. const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2);
  463. const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0;
  464. color_rgba endpoints[ENDPOINTS];
  465. for (uint32_t c = 0; c < COMPS; c++)
  466. for (uint32_t e = 0; e < ENDPOINTS; e++)
  467. endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
  468. const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };
  469. uint32_t weights[16], a_weights[16];
  470. for (uint32_t i = 0; i < 16; i++)
  471. (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0));
  472. for (uint32_t i = 0; i < 16; i++)
  473. (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0));
  474. assert(bit_offset == 128);
  475. for (uint32_t e = 0; e < ENDPOINTS; e++)
  476. for (uint32_t c = 0; c < 4; c++)
  477. endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
  478. color_rgba block_colors[8];
  479. for (uint32_t i = 0; i < (1U << weight_bits[0]); i++)
  480. for (uint32_t c = 0; c < 3; c++)
  481. block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]);
  482. for (uint32_t i = 0; i < (1U << weight_bits[1]); i++)
  483. block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]);
  484. for (uint32_t i = 0; i < 16; i++)
  485. {
  486. pPixels[i] = block_colors[weights[i]];
  487. pPixels[i].a = block_colors[a_weights[i]].a;
  488. if (comp_rot >= 1)
  489. std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]);
  490. }
  491. return true;
  492. }
  493. struct bc7_mode_6
  494. {
  495. struct
  496. {
  497. uint64_t m_mode : 7;
  498. uint64_t m_r0 : 7;
  499. uint64_t m_r1 : 7;
  500. uint64_t m_g0 : 7;
  501. uint64_t m_g1 : 7;
  502. uint64_t m_b0 : 7;
  503. uint64_t m_b1 : 7;
  504. uint64_t m_a0 : 7;
  505. uint64_t m_a1 : 7;
  506. uint64_t m_p0 : 1;
  507. } m_lo;
  508. union
  509. {
  510. struct
  511. {
  512. uint64_t m_p1 : 1;
  513. uint64_t m_s00 : 3;
  514. uint64_t m_s10 : 4;
  515. uint64_t m_s20 : 4;
  516. uint64_t m_s30 : 4;
  517. uint64_t m_s01 : 4;
  518. uint64_t m_s11 : 4;
  519. uint64_t m_s21 : 4;
  520. uint64_t m_s31 : 4;
  521. uint64_t m_s02 : 4;
  522. uint64_t m_s12 : 4;
  523. uint64_t m_s22 : 4;
  524. uint64_t m_s32 : 4;
  525. uint64_t m_s03 : 4;
  526. uint64_t m_s13 : 4;
  527. uint64_t m_s23 : 4;
  528. uint64_t m_s33 : 4;
  529. } m_hi;
  530. uint64_t m_hi_bits;
  531. };
  532. };
  533. bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
  534. {
  535. static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");
  536. const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
  537. if (block.m_lo.m_mode != (1 << 6))
  538. return false;
  539. const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
  540. const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
  541. const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
  542. const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
  543. const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
  544. const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
  545. const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
  546. const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
  547. color_rgba vals[16];
  548. for (uint32_t i = 0; i < 16; i++)
  549. {
  550. const uint32_t w = basist::g_bc7_weights4[i];
  551. const uint32_t iw = 64 - w;
  552. vals[i].set_noclamp_rgba(
  553. (r0 * iw + r1 * w + 32) >> 6,
  554. (g0 * iw + g1 * w + 32) >> 6,
  555. (b0 * iw + b1 * w + 32) >> 6,
  556. (a0 * iw + a1 * w + 32) >> 6);
  557. }
  558. pPixels[0] = vals[block.m_hi.m_s00];
  559. pPixels[1] = vals[block.m_hi.m_s10];
  560. pPixels[2] = vals[block.m_hi.m_s20];
  561. pPixels[3] = vals[block.m_hi.m_s30];
  562. pPixels[4] = vals[block.m_hi.m_s01];
  563. pPixels[5] = vals[block.m_hi.m_s11];
  564. pPixels[6] = vals[block.m_hi.m_s21];
  565. pPixels[7] = vals[block.m_hi.m_s31];
  566. pPixels[8] = vals[block.m_hi.m_s02];
  567. pPixels[9] = vals[block.m_hi.m_s12];
  568. pPixels[10] = vals[block.m_hi.m_s22];
  569. pPixels[11] = vals[block.m_hi.m_s32];
  570. pPixels[12] = vals[block.m_hi.m_s03];
  571. pPixels[13] = vals[block.m_hi.m_s13];
  572. pPixels[14] = vals[block.m_hi.m_s23];
  573. pPixels[15] = vals[block.m_hi.m_s33];
  574. return true;
  575. }
  576. bool unpack_bc7(const void *pBlock, color_rgba *pPixels)
  577. {
  578. const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
  579. for (uint32_t mode = 0; mode <= 7; mode++)
  580. {
  581. if (first_byte & (1U << mode))
  582. {
  583. switch (mode)
  584. {
  585. case 0:
  586. case 2:
  587. return unpack_bc7_mode0_2(mode, pBlock, pPixels);
  588. case 1:
  589. case 3:
  590. case 7:
  591. return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);
  592. case 4:
  593. case 5:
  594. return unpack_bc7_mode4_5(mode, pBlock, pPixels);
  595. case 6:
  596. return unpack_bc7_mode6(pBlock, pPixels);
  597. default:
  598. break;
  599. }
  600. }
  601. }
  602. return false;
  603. }
  604. struct fxt1_block
  605. {
  606. union
  607. {
  608. struct
  609. {
  610. uint64_t m_t00 : 2;
  611. uint64_t m_t01 : 2;
  612. uint64_t m_t02 : 2;
  613. uint64_t m_t03 : 2;
  614. uint64_t m_t04 : 2;
  615. uint64_t m_t05 : 2;
  616. uint64_t m_t06 : 2;
  617. uint64_t m_t07 : 2;
  618. uint64_t m_t08 : 2;
  619. uint64_t m_t09 : 2;
  620. uint64_t m_t10 : 2;
  621. uint64_t m_t11 : 2;
  622. uint64_t m_t12 : 2;
  623. uint64_t m_t13 : 2;
  624. uint64_t m_t14 : 2;
  625. uint64_t m_t15 : 2;
  626. uint64_t m_t16 : 2;
  627. uint64_t m_t17 : 2;
  628. uint64_t m_t18 : 2;
  629. uint64_t m_t19 : 2;
  630. uint64_t m_t20 : 2;
  631. uint64_t m_t21 : 2;
  632. uint64_t m_t22 : 2;
  633. uint64_t m_t23 : 2;
  634. uint64_t m_t24 : 2;
  635. uint64_t m_t25 : 2;
  636. uint64_t m_t26 : 2;
  637. uint64_t m_t27 : 2;
  638. uint64_t m_t28 : 2;
  639. uint64_t m_t29 : 2;
  640. uint64_t m_t30 : 2;
  641. uint64_t m_t31 : 2;
  642. } m_lo;
  643. uint64_t m_lo_bits;
  644. uint8_t m_sels[8];
  645. };
  646. union
  647. {
  648. struct
  649. {
  650. #ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
  651. // This is the format that 3DFX's DECOMP.EXE tool expects, which I'm assuming is what the actual 3DFX hardware wanted.
  652. // Unfortunately, color0/color1 and color2/color3 are flipped relative to the official OpenGL extension and Intel's documentation!
  653. uint64_t m_b1 : 5;
  654. uint64_t m_g1 : 5;
  655. uint64_t m_r1 : 5;
  656. uint64_t m_b0 : 5;
  657. uint64_t m_g0 : 5;
  658. uint64_t m_r0 : 5;
  659. uint64_t m_b3 : 5;
  660. uint64_t m_g3 : 5;
  661. uint64_t m_r3 : 5;
  662. uint64_t m_b2 : 5;
  663. uint64_t m_g2 : 5;
  664. uint64_t m_r2 : 5;
  665. #else
  666. // Intel's encoding, and the encoding in the OpenGL FXT1 spec.
  667. uint64_t m_b0 : 5;
  668. uint64_t m_g0 : 5;
  669. uint64_t m_r0 : 5;
  670. uint64_t m_b1 : 5;
  671. uint64_t m_g1 : 5;
  672. uint64_t m_r1 : 5;
  673. uint64_t m_b2 : 5;
  674. uint64_t m_g2 : 5;
  675. uint64_t m_r2 : 5;
  676. uint64_t m_b3 : 5;
  677. uint64_t m_g3 : 5;
  678. uint64_t m_r3 : 5;
  679. #endif
  680. uint64_t m_alpha : 1;
  681. uint64_t m_glsb : 2;
  682. uint64_t m_mode : 1;
  683. } m_hi;
  684. uint64_t m_hi_bits;
  685. };
  686. };
  687. static color_rgba expand_565(const color_rgba& c)
  688. {
  689. return color_rgba((c.r << 3) | (c.r >> 2), (c.g << 2) | (c.g >> 4), (c.b << 3) | (c.b >> 2), 255);
  690. }
  691. // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment.
  692. bool unpack_fxt1(const void *p, color_rgba *pPixels)
  693. {
  694. const fxt1_block* pBlock = static_cast<const fxt1_block*>(p);
  695. if (pBlock->m_hi.m_mode == 0)
  696. return false;
  697. if (pBlock->m_hi.m_alpha == 1)
  698. return false;
  699. color_rgba colors[4];
  700. colors[0].r = pBlock->m_hi.m_r0;
  701. colors[0].g = (uint8_t)((pBlock->m_hi.m_g0 << 1) | ((pBlock->m_lo.m_t00 >> 1) ^ (pBlock->m_hi.m_glsb & 1)));
  702. colors[0].b = pBlock->m_hi.m_b0;
  703. colors[0].a = 255;
  704. colors[1].r = pBlock->m_hi.m_r1;
  705. colors[1].g = (uint8_t)((pBlock->m_hi.m_g1 << 1) | (pBlock->m_hi.m_glsb & 1));
  706. colors[1].b = pBlock->m_hi.m_b1;
  707. colors[1].a = 255;
  708. colors[2].r = pBlock->m_hi.m_r2;
  709. colors[2].g = (uint8_t)((pBlock->m_hi.m_g2 << 1) | ((pBlock->m_lo.m_t16 >> 1) ^ (pBlock->m_hi.m_glsb >> 1)));
  710. colors[2].b = pBlock->m_hi.m_b2;
  711. colors[2].a = 255;
  712. colors[3].r = pBlock->m_hi.m_r3;
  713. colors[3].g = (uint8_t)((pBlock->m_hi.m_g3 << 1) | (pBlock->m_hi.m_glsb >> 1));
  714. colors[3].b = pBlock->m_hi.m_b3;
  715. colors[3].a = 255;
  716. for (uint32_t i = 0; i < 4; i++)
  717. colors[i] = expand_565(colors[i]);
  718. color_rgba block0_colors[4];
  719. block0_colors[0] = colors[0];
  720. block0_colors[1] = color_rgba((colors[0].r * 2 + colors[1].r + 1) / 3, (colors[0].g * 2 + colors[1].g + 1) / 3, (colors[0].b * 2 + colors[1].b + 1) / 3, 255);
  721. block0_colors[2] = color_rgba((colors[1].r * 2 + colors[0].r + 1) / 3, (colors[1].g * 2 + colors[0].g + 1) / 3, (colors[1].b * 2 + colors[0].b + 1) / 3, 255);
  722. block0_colors[3] = colors[1];
  723. for (uint32_t i = 0; i < 16; i++)
  724. {
  725. const uint32_t sel = (pBlock->m_sels[i >> 2] >> ((i & 3) * 2)) & 3;
  726. const uint32_t x = i & 3;
  727. const uint32_t y = i >> 2;
  728. pPixels[x + y * 8] = block0_colors[sel];
  729. }
  730. color_rgba block1_colors[4];
  731. block1_colors[0] = colors[2];
  732. block1_colors[1] = color_rgba((colors[2].r * 2 + colors[3].r + 1) / 3, (colors[2].g * 2 + colors[3].g + 1) / 3, (colors[2].b * 2 + colors[3].b + 1) / 3, 255);
  733. block1_colors[2] = color_rgba((colors[3].r * 2 + colors[2].r + 1) / 3, (colors[3].g * 2 + colors[2].g + 1) / 3, (colors[3].b * 2 + colors[2].b + 1) / 3, 255);
  734. block1_colors[3] = colors[3];
  735. for (uint32_t i = 0; i < 16; i++)
  736. {
  737. const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3;
  738. const uint32_t x = i & 3;
  739. const uint32_t y = i >> 2;
  740. pPixels[4 + x + y * 8] = block1_colors[sel];
  741. }
  742. return true;
  743. }
  744. struct pvrtc2_block
  745. {
  746. uint8_t m_modulation[4];
  747. union
  748. {
  749. union
  750. {
  751. // Opaque mode: RGB colora=554 and colorb=555
  752. struct
  753. {
  754. uint32_t m_mod_flag : 1;
  755. uint32_t m_blue_a : 4;
  756. uint32_t m_green_a : 5;
  757. uint32_t m_red_a : 5;
  758. uint32_t m_hard_flag : 1;
  759. uint32_t m_blue_b : 5;
  760. uint32_t m_green_b : 5;
  761. uint32_t m_red_b : 5;
  762. uint32_t m_opaque_flag : 1;
  763. } m_opaque_color_data;
  764. // Transparent mode: RGBA colora=4433 and colorb=4443
  765. struct
  766. {
  767. uint32_t m_mod_flag : 1;
  768. uint32_t m_blue_a : 3;
  769. uint32_t m_green_a : 4;
  770. uint32_t m_red_a : 4;
  771. uint32_t m_alpha_a : 3;
  772. uint32_t m_hard_flag : 1;
  773. uint32_t m_blue_b : 4;
  774. uint32_t m_green_b : 4;
  775. uint32_t m_red_b : 4;
  776. uint32_t m_alpha_b : 3;
  777. uint32_t m_opaque_flag : 1;
  778. } m_trans_color_data;
  779. };
  780. uint32_t m_color_data_bits;
  781. };
  782. };
  783. static color_rgba convert_rgb_555_to_888(const color_rgba& col)
  784. {
  785. return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255);
  786. }
  787. static color_rgba convert_rgba_5554_to_8888(const color_rgba& col)
  788. {
  789. return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
  790. }
  791. // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC.
  792. bool unpack_pvrtc2(const void *p, color_rgba *pPixels)
  793. {
  794. const pvrtc2_block* pBlock = static_cast<const pvrtc2_block*>(p);
  795. if ((!pBlock->m_opaque_color_data.m_hard_flag) || (pBlock->m_opaque_color_data.m_mod_flag))
  796. {
  797. // This mode isn't supported by the transcoder, so we aren't bothering with it here.
  798. return false;
  799. }
  800. color_rgba colors[4];
  801. if (pBlock->m_opaque_color_data.m_opaque_flag)
  802. {
  803. // colora=554
  804. color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255);
  805. // colora=555
  806. color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255);
  807. colors[0] = convert_rgb_555_to_888(color_a);
  808. colors[3] = convert_rgb_555_to_888(color_b);
  809. colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, 255);
  810. colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, 255);
  811. }
  812. else
  813. {
  814. // colora=4433
  815. color_rgba color_a(
  816. (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3),
  817. (pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3),
  818. (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1),
  819. pBlock->m_trans_color_data.m_alpha_a << 1);
  820. //colorb=4443
  821. color_rgba color_b(
  822. (pBlock->m_trans_color_data.m_red_b << 1) | (pBlock->m_trans_color_data.m_red_b >> 3),
  823. (pBlock->m_trans_color_data.m_green_b << 1) | (pBlock->m_trans_color_data.m_green_b >> 3),
  824. (pBlock->m_trans_color_data.m_blue_b << 1) | (pBlock->m_trans_color_data.m_blue_b >> 3),
  825. (pBlock->m_trans_color_data.m_alpha_b << 1) | 1);
  826. colors[0] = convert_rgba_5554_to_8888(color_a);
  827. colors[3] = convert_rgba_5554_to_8888(color_b);
  828. }
  829. colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, (colors[0].a * 5 + colors[3].a * 3) / 8);
  830. colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, (colors[0].a * 3 + colors[3].a * 5) / 8);
  831. for (uint32_t i = 0; i < 16; i++)
  832. {
  833. const uint32_t sel = (pBlock->m_modulation[i >> 2] >> ((i & 3) * 2)) & 3;
  834. pPixels[i] = colors[sel];
  835. }
  836. return true;
  837. }
  838. struct etc2_eac_r11
  839. {
  840. uint64_t m_base : 8;
  841. uint64_t m_table : 4;
  842. uint64_t m_mul : 4;
  843. uint64_t m_sels_0 : 8;
  844. uint64_t m_sels_1 : 8;
  845. uint64_t m_sels_2 : 8;
  846. uint64_t m_sels_3 : 8;
  847. uint64_t m_sels_4 : 8;
  848. uint64_t m_sels_5 : 8;
  849. uint64_t get_sels() const
  850. {
  851. return ((uint64_t)m_sels_0 << 40U) | ((uint64_t)m_sels_1 << 32U) | ((uint64_t)m_sels_2 << 24U) | ((uint64_t)m_sels_3 << 16U) | ((uint64_t)m_sels_4 << 8U) | m_sels_5;
  852. }
  853. void set_sels(uint64_t v)
  854. {
  855. m_sels_0 = (v >> 40U) & 0xFF;
  856. m_sels_1 = (v >> 32U) & 0xFF;
  857. m_sels_2 = (v >> 24U) & 0xFF;
  858. m_sels_3 = (v >> 16U) & 0xFF;
  859. m_sels_4 = (v >> 8U) & 0xFF;
  860. m_sels_5 = v & 0xFF;
  861. }
  862. };
  863. struct etc2_eac_rg11
  864. {
  865. etc2_eac_r11 m_c[2];
  866. };
  867. void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c)
  868. {
  869. const etc2_eac_r11* pBlock = static_cast<const etc2_eac_r11*>(p);
  870. const uint64_t sels = pBlock->get_sels();
  871. const int base = (int)pBlock->m_base * 8 + 4;
  872. const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1;
  873. const int table = (int)pBlock->m_table;
  874. for (uint32_t y = 0; y < 4; y++)
  875. {
  876. for (uint32_t x = 0; x < 4; x++)
  877. {
  878. const uint32_t shift = 45 - ((y + x * 4) * 3);
  879. const uint32_t sel = (uint32_t)((sels >> shift) & 7);
  880. int val = base + g_etc2_eac_tables[table][sel] * mul;
  881. val = clamp<int>(val, 0, 2047);
  882. // Convert to 8-bits with rounding
  883. //pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047);
  884. pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1023) / 2047);
  885. } // x
  886. } // y
  887. }
  888. void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels)
  889. {
  890. for (uint32_t c = 0; c < 2; c++)
  891. {
  892. const etc2_eac_r11* pBlock = &static_cast<const etc2_eac_rg11*>(p)->m_c[c];
  893. unpack_etc2_eac_r(pBlock, pPixels, c);
  894. }
  895. }
  896. void unpack_uastc(const void* p, color_rgba* pPixels)
  897. {
  898. basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false);
  899. }
  900. // Unpacks to RGBA, R, RG, or A
  901. bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
  902. {
  903. switch (fmt)
  904. {
  905. case texture_format::cBC1:
  906. {
  907. unpack_bc1(pBlock, pPixels, true);
  908. break;
  909. }
  910. case texture_format::cBC1_NV:
  911. {
  912. unpack_bc1_nv(pBlock, pPixels, true);
  913. break;
  914. }
  915. case texture_format::cBC1_AMD:
  916. {
  917. unpack_bc1_amd(pBlock, pPixels, true);
  918. break;
  919. }
  920. case texture_format::cBC3:
  921. {
  922. return unpack_bc3(pBlock, pPixels);
  923. }
  924. case texture_format::cBC4:
  925. {
  926. // Unpack to R
  927. unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba));
  928. break;
  929. }
  930. case texture_format::cBC5:
  931. {
  932. unpack_bc5(pBlock, pPixels);
  933. break;
  934. }
  935. case texture_format::cBC7:
  936. {
  937. return unpack_bc7(pBlock, pPixels);
  938. }
  939. // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color)
  940. case texture_format::cETC2_RGB:
  941. case texture_format::cETC1:
  942. case texture_format::cETC1S:
  943. {
  944. return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);
  945. }
  946. case texture_format::cETC2_RGBA:
  947. {
  948. if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels))
  949. return false;
  950. unpack_etc2_eac(pBlock, pPixels);
  951. break;
  952. }
  953. case texture_format::cETC2_ALPHA:
  954. {
  955. // Unpack to A
  956. unpack_etc2_eac(pBlock, pPixels);
  957. break;
  958. }
  959. case texture_format::cASTC4x4:
  960. {
  961. #if BASISU_USE_ASTC_DECOMPRESS
  962. const bool astc_srgb = false;
  963. basisu_astc::astc::decompress(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
  964. #else
  965. memset(pPixels, 255, 16 * sizeof(color_rgba));
  966. #endif
  967. break;
  968. }
  969. case texture_format::cATC_RGB:
  970. {
  971. unpack_atc(pBlock, pPixels);
  972. break;
  973. }
  974. case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
  975. {
  976. unpack_atc(static_cast<const uint8_t*>(pBlock) + 8, pPixels);
  977. unpack_bc4(pBlock, &pPixels[0].a, sizeof(color_rgba));
  978. break;
  979. }
  980. case texture_format::cFXT1_RGB:
  981. {
  982. unpack_fxt1(pBlock, pPixels);
  983. break;
  984. }
  985. case texture_format::cPVRTC2_4_RGBA:
  986. {
  987. unpack_pvrtc2(pBlock, pPixels);
  988. break;
  989. }
  990. case texture_format::cETC2_R11_EAC:
  991. {
  992. unpack_etc2_eac_r(static_cast<const etc2_eac_r11 *>(pBlock), pPixels, 0);
  993. break;
  994. }
  995. case texture_format::cETC2_RG11_EAC:
  996. {
  997. unpack_etc2_eac_rg(pBlock, pPixels);
  998. break;
  999. }
  1000. case texture_format::cUASTC4x4:
  1001. {
  1002. unpack_uastc(pBlock, pPixels);
  1003. break;
  1004. }
  1005. default:
  1006. {
  1007. assert(0);
  1008. // TODO
  1009. return false;
  1010. }
  1011. }
  1012. return true;
  1013. }
  1014. bool gpu_image::unpack(image& img) const
  1015. {
  1016. img.resize(get_pixel_width(), get_pixel_height());
  1017. img.set_all(g_black_color);
  1018. if (!img.get_width() || !img.get_height())
  1019. return true;
  1020. if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA))
  1021. {
  1022. pvrtc4_image pi(m_width, m_height);
  1023. if (get_total_blocks() != pi.get_total_blocks())
  1024. return false;
  1025. memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes());
  1026. pi.deswizzle();
  1027. pi.unpack_all_pixels(img);
  1028. return true;
  1029. }
  1030. assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
  1031. color_rgba pixels[cMaxBlockSize * cMaxBlockSize];
  1032. for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++)
  1033. pixels[i] = g_black_color;
  1034. bool success = true;
  1035. for (uint32_t by = 0; by < m_blocks_y; by++)
  1036. {
  1037. for (uint32_t bx = 0; bx < m_blocks_x; bx++)
  1038. {
  1039. const void* pBlock = get_block_ptr(bx, by);
  1040. if (!unpack_block(m_fmt, pBlock, pixels))
  1041. success = false;
  1042. img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
  1043. } // bx
  1044. } // by
  1045. return success;
  1046. }
  1047. static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
  1048. // KTX/GL enums
  1049. enum
  1050. {
  1051. KTX_ENDIAN = 0x04030201,
  1052. KTX_OPPOSITE_ENDIAN = 0x01020304,
  1053. KTX_ETC1_RGB8_OES = 0x8D64,
  1054. KTX_RED = 0x1903,
  1055. KTX_RG = 0x8227,
  1056. KTX_RGB = 0x1907,
  1057. KTX_RGBA = 0x1908,
  1058. KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0,
  1059. KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3,
  1060. KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB,
  1061. KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD,
  1062. KTX_COMPRESSED_RGB8_ETC2 = 0x9274,
  1063. KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278,
  1064. KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C,
  1065. KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D,
  1066. KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,
  1067. KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,
  1068. KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,
  1069. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0,
  1070. KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value!
  1071. KTX_ATC_RGB_AMD = 0x8C92,
  1072. KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE,
  1073. KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0,
  1074. KTX_COMPRESSED_RGBA_FXT1_3DFX = 0x86B1,
  1075. KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = 0x9138,
  1076. KTX_COMPRESSED_R11_EAC = 0x9270,
  1077. KTX_COMPRESSED_RG11_EAC = 0x9272
  1078. };
  1079. struct ktx_header
  1080. {
  1081. uint8_t m_identifier[12];
  1082. packed_uint<4> m_endianness;
  1083. packed_uint<4> m_glType;
  1084. packed_uint<4> m_glTypeSize;
  1085. packed_uint<4> m_glFormat;
  1086. packed_uint<4> m_glInternalFormat;
  1087. packed_uint<4> m_glBaseInternalFormat;
  1088. packed_uint<4> m_pixelWidth;
  1089. packed_uint<4> m_pixelHeight;
  1090. packed_uint<4> m_pixelDepth;
  1091. packed_uint<4> m_numberOfArrayElements;
  1092. packed_uint<4> m_numberOfFaces;
  1093. packed_uint<4> m_numberOfMipmapLevels;
  1094. packed_uint<4> m_bytesOfKeyValueData;
  1095. void clear() { clear_obj(*this); }
  1096. };
  1097. // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index]
  1098. bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag)
  1099. {
  1100. if (!gpu_images.size())
  1101. {
  1102. assert(0);
  1103. return false;
  1104. }
  1105. uint32_t width = 0, height = 0, total_levels = 0;
  1106. basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
  1107. if (cubemap_flag)
  1108. {
  1109. if ((gpu_images.size() % 6) != 0)
  1110. {
  1111. assert(0);
  1112. return false;
  1113. }
  1114. }
  1115. for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
  1116. {
  1117. const gpu_image_vec &levels = gpu_images[array_index];
  1118. if (!levels.size())
  1119. {
  1120. // Empty mip chain
  1121. assert(0);
  1122. return false;
  1123. }
  1124. if (!array_index)
  1125. {
  1126. width = levels[0].get_pixel_width();
  1127. height = levels[0].get_pixel_height();
  1128. total_levels = (uint32_t)levels.size();
  1129. fmt = levels[0].get_format();
  1130. }
  1131. else
  1132. {
  1133. if ((width != levels[0].get_pixel_width()) ||
  1134. (height != levels[0].get_pixel_height()) ||
  1135. (total_levels != levels.size()))
  1136. {
  1137. // All cubemap/texture array faces must be the same dimension
  1138. assert(0);
  1139. return false;
  1140. }
  1141. }
  1142. for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
  1143. {
  1144. if (level_index)
  1145. {
  1146. if ( (levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||
  1147. (levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)) )
  1148. {
  1149. // Malformed mipmap chain
  1150. assert(0);
  1151. return false;
  1152. }
  1153. }
  1154. if (fmt != levels[level_index].get_format())
  1155. {
  1156. // All input textures must use the same GPU format
  1157. assert(0);
  1158. return false;
  1159. }
  1160. }
  1161. }
  1162. uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB;
  1163. switch (fmt)
  1164. {
  1165. case texture_format::cBC1:
  1166. case texture_format::cBC1_NV:
  1167. case texture_format::cBC1_AMD:
  1168. {
  1169. internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;
  1170. break;
  1171. }
  1172. case texture_format::cBC3:
  1173. {
  1174. internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT;
  1175. base_internal_fmt = KTX_RGBA;
  1176. break;
  1177. }
  1178. case texture_format::cBC4:
  1179. {
  1180. internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT;
  1181. base_internal_fmt = KTX_RED;
  1182. break;
  1183. }
  1184. case texture_format::cBC5:
  1185. {
  1186. internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT;
  1187. base_internal_fmt = KTX_RG;
  1188. break;
  1189. }
  1190. case texture_format::cETC1:
  1191. case texture_format::cETC1S:
  1192. {
  1193. internal_fmt = KTX_ETC1_RGB8_OES;
  1194. break;
  1195. }
  1196. case texture_format::cETC2_RGB:
  1197. {
  1198. internal_fmt = KTX_COMPRESSED_RGB8_ETC2;
  1199. break;
  1200. }
  1201. case texture_format::cETC2_RGBA:
  1202. {
  1203. internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC;
  1204. base_internal_fmt = KTX_RGBA;
  1205. break;
  1206. }
  1207. case texture_format::cBC7:
  1208. {
  1209. internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;
  1210. base_internal_fmt = KTX_RGBA;
  1211. break;
  1212. }
  1213. case texture_format::cPVRTC1_4_RGB:
  1214. {
  1215. internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG;
  1216. break;
  1217. }
  1218. case texture_format::cPVRTC1_4_RGBA:
  1219. {
  1220. internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG;
  1221. base_internal_fmt = KTX_RGBA;
  1222. break;
  1223. }
  1224. case texture_format::cASTC4x4:
  1225. {
  1226. internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;
  1227. base_internal_fmt = KTX_RGBA;
  1228. break;
  1229. }
  1230. case texture_format::cATC_RGB:
  1231. {
  1232. internal_fmt = KTX_ATC_RGB_AMD;
  1233. break;
  1234. }
  1235. case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
  1236. {
  1237. internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD;
  1238. base_internal_fmt = KTX_RGBA;
  1239. break;
  1240. }
  1241. case texture_format::cETC2_R11_EAC:
  1242. {
  1243. internal_fmt = KTX_COMPRESSED_R11_EAC;
  1244. base_internal_fmt = KTX_RED;
  1245. break;
  1246. }
  1247. case texture_format::cETC2_RG11_EAC:
  1248. {
  1249. internal_fmt = KTX_COMPRESSED_RG11_EAC;
  1250. base_internal_fmt = KTX_RG;
  1251. break;
  1252. }
  1253. case texture_format::cUASTC4x4:
  1254. {
  1255. internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR;
  1256. base_internal_fmt = KTX_RGBA;
  1257. break;
  1258. }
  1259. case texture_format::cFXT1_RGB:
  1260. {
  1261. internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX;
  1262. break;
  1263. }
  1264. case texture_format::cPVRTC2_4_RGBA:
  1265. {
  1266. internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG;
  1267. base_internal_fmt = KTX_RGBA;
  1268. break;
  1269. }
  1270. default:
  1271. {
  1272. // TODO
  1273. assert(0);
  1274. return false;
  1275. }
  1276. }
  1277. ktx_header header;
  1278. header.clear();
  1279. memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));
  1280. header.m_endianness = KTX_ENDIAN;
  1281. header.m_pixelWidth = width;
  1282. header.m_pixelHeight = height;
  1283. header.m_glTypeSize = 1;
  1284. header.m_glInternalFormat = internal_fmt;
  1285. header.m_glBaseInternalFormat = base_internal_fmt;
  1286. header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / 6) : gpu_images.size());
  1287. if (header.m_numberOfArrayElements == 1)
  1288. header.m_numberOfArrayElements = 0;
  1289. header.m_numberOfMipmapLevels = total_levels;
  1290. header.m_numberOfFaces = cubemap_flag ? 6 : 1;
  1291. append_vector(ktx_data, (uint8_t *)&header, sizeof(header));
  1292. for (uint32_t level_index = 0; level_index < total_levels; level_index++)
  1293. {
  1294. uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes();
  1295. if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1))
  1296. {
  1297. img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements);
  1298. }
  1299. assert(img_size && ((img_size & 3) == 0));
  1300. packed_uint<4> packed_img_size(img_size);
  1301. append_vector(ktx_data, (uint8_t *)&packed_img_size, sizeof(packed_img_size));
  1302. uint32_t bytes_written = 0;
  1303. for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++)
  1304. {
  1305. for (uint32_t face_index = 0; face_index < header.m_numberOfFaces; face_index++)
  1306. {
  1307. const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index];
  1308. append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes());
  1309. bytes_written += img.get_size_in_bytes();
  1310. }
  1311. } // array_index
  1312. } // level_index
  1313. return true;
  1314. }
  1315. bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag)
  1316. {
  1317. std::string extension(string_tolower(string_get_extension(pFilename)));
  1318. uint8_vec filedata;
  1319. if (extension == "ktx")
  1320. {
  1321. if (!create_ktx_texture_file(filedata, g, cubemap_flag))
  1322. return false;
  1323. }
  1324. else if (extension == "pvr")
  1325. {
  1326. // TODO
  1327. return false;
  1328. }
  1329. else if (extension == "dds")
  1330. {
  1331. // TODO
  1332. return false;
  1333. }
  1334. else
  1335. {
  1336. // unsupported texture format
  1337. assert(0);
  1338. return false;
  1339. }
  1340. return basisu::write_vec_to_file(pFilename, filedata);
  1341. }
  1342. bool write_compressed_texture_file(const char* pFilename, const gpu_image& g)
  1343. {
  1344. basisu::vector<gpu_image_vec> v;
  1345. enlarge_vector(v, 1)->push_back(g);
  1346. return write_compressed_texture_file(pFilename, v, false);
  1347. }
  1348. //const uint32_t OUT_FILE_MAGIC = 'TEXC';
  1349. struct out_file_header
  1350. {
  1351. packed_uint<4> m_magic;
  1352. packed_uint<4> m_pad;
  1353. packed_uint<4> m_width;
  1354. packed_uint<4> m_height;
  1355. };
  1356. // As no modern tool supports FXT1 format .KTX files, let's write .OUT files and make sure 3DFX's original tools shipped in 1999 can decode our encoded output.
  1357. bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi)
  1358. {
  1359. out_file_header hdr;
  1360. //hdr.m_magic = OUT_FILE_MAGIC;
  1361. hdr.m_magic.m_bytes[0] = 67;
  1362. hdr.m_magic.m_bytes[1] = 88;
  1363. hdr.m_magic.m_bytes[2] = 69;
  1364. hdr.m_magic.m_bytes[3] = 84;
  1365. hdr.m_pad = 0;
  1366. hdr.m_width = gi.get_blocks_x() * 8;
  1367. hdr.m_height = gi.get_blocks_y() * 4;
  1368. FILE* pFile = nullptr;
  1369. #ifdef _WIN32
  1370. fopen_s(&pFile, pFilename, "wb");
  1371. #else
  1372. pFile = fopen(pFilename, "wb");
  1373. #endif
  1374. if (!pFile)
  1375. return false;
  1376. fwrite(&hdr, sizeof(hdr), 1, pFile);
  1377. fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile);
  1378. return fclose(pFile) != EOF;
  1379. }
  1380. } // basisu