basisu_gpu_texture.cpp 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028
  1. // basisu_gpu_texture.cpp
  2. // Copyright (C) 2019 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. #include "basisu_gpu_texture.h"
  16. #include "basisu_enc.h"
  17. #include "basisu_pvrtc1_4.h"
  18. #include "basisu_astc_decomp.h"
  19. namespace basisu
  20. {
  21. const int8_t g_etc2_eac_tables[16][8] =
  22. {
  23. { -3, -6, -9, -15, 2, 5, 8, 14 }, { -3, -7, -10, -13, 2, 6, 9, 12 }, { -2, -5, -8, -13, 1, 4, 7, 12 }, { -2, -4, -6, -13, 1, 3, 5, 12 },
  24. { -3, -6, -8, -12, 2, 5, 7, 11 }, { -3, -7, -9, -11, 2, 6, 8, 10 }, { -4, -7, -8, -11, 3, 6, 7, 10 }, { -3, -5, -8, -11, 2, 4, 7, 10 },
  25. { -2, -6, -8, -10, 1, 5, 7, 9 }, { -2, -5, -8, -10, 1, 4, 7, 9 }, { -2, -4, -8, -10, 1, 3, 7, 9 }, { -2, -5, -7, -10, 1, 4, 6, 9 },
  26. { -3, -4, -7, -10, 2, 3, 6, 9 }, { -1, -2, -3, -10, 0, 1, 2, 9 }, { -4, -6, -8, -9, 3, 5, 7, 8 }, { -3, -5, -7, -9, 2, 4, 6, 8 }
  27. };
  28. struct eac_a8_block
  29. {
  30. uint16_t m_base : 8;
  31. uint16_t m_table : 4;
  32. uint16_t m_multiplier : 4;
  33. uint8_t m_selectors[6];
  34. inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
  35. {
  36. assert((x < 4) && (y < 4));
  37. return static_cast<uint32_t>((selector_bits >> (45 - (y + x * 4) * 3)) & 7);
  38. }
  39. inline uint64_t get_selector_bits() const
  40. {
  41. uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | ((uint64_t)m_selectors[2] << 24) | ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5];
  42. return pixels;
  43. }
  44. };
  45. void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)
  46. {
  47. static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");
  48. const eac_a8_block *pBlock = static_cast<const eac_a8_block *>(pBlock_bits);
  49. const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table];
  50. const uint64_t selector_bits = pBlock->get_selector_bits();
  51. const int32_t base = pBlock->m_base;
  52. const int32_t mul = pBlock->m_multiplier;
  53. pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul);
  54. pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul);
  55. pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul);
  56. pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul);
  57. pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul);
  58. pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul);
  59. pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul);
  60. pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul);
  61. pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul);
  62. pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul);
  63. pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul);
  64. pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul);
  65. pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul);
  66. pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul);
  67. pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul);
  68. pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);
  69. }
  70. struct bc1_block
  71. {
  72. enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
  73. uint8_t m_low_color[cTotalEndpointBytes];
  74. uint8_t m_high_color[cTotalEndpointBytes];
  75. uint8_t m_selectors[cTotalSelectorBytes];
  76. inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
  77. inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
  78. static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b)
  79. {
  80. r = (c >> 11) & 31;
  81. g = (c >> 5) & 63;
  82. b = c & 31;
  83. r = (r << 3) | (r >> 2);
  84. g = (g << 2) | (g >> 4);
  85. b = (b << 3) | (b >> 2);
  86. }
  87. inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; }
  88. };
  89. // Returns true if the block uses 3 color punchthrough alpha mode.
  90. bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
  91. {
  92. static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
  93. const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
  94. const uint32_t l = pBlock->get_low_color();
  95. const uint32_t h = pBlock->get_high_color();
  96. color_rgba c[4];
  97. uint32_t r0, g0, b0, r1, g1, b1;
  98. bc1_block::unpack_color(l, r0, g0, b0);
  99. bc1_block::unpack_color(h, r1, g1, b1);
  100. bool used_punchthrough = false;
  101. if (l > h)
  102. {
  103. c[0].set_noclamp_rgba(r0, g0, b0, 255);
  104. c[1].set_noclamp_rgba(r1, g1, b1, 255);
  105. c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
  106. c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
  107. }
  108. else
  109. {
  110. c[0].set_noclamp_rgba(r0, g0, b0, 255);
  111. c[1].set_noclamp_rgba(r1, g1, b1, 255);
  112. c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
  113. c[3].set_noclamp_rgba(0, 0, 0, 0);
  114. used_punchthrough = true;
  115. }
  116. if (set_alpha)
  117. {
  118. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  119. {
  120. pPixels[0] = c[pBlock->get_selector(0, y)];
  121. pPixels[1] = c[pBlock->get_selector(1, y)];
  122. pPixels[2] = c[pBlock->get_selector(2, y)];
  123. pPixels[3] = c[pBlock->get_selector(3, y)];
  124. }
  125. }
  126. else
  127. {
  128. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  129. {
  130. pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
  131. pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
  132. pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
  133. pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
  134. }
  135. }
  136. return used_punchthrough;
  137. }
  138. struct bc4_block
  139. {
  140. enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
  141. uint8_t m_endpoints[2];
  142. uint8_t m_selectors[cTotalSelectorBytes];
  143. inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
  144. inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
  145. inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
  146. inline uint64_t get_selector_bits() const
  147. {
  148. return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |
  149. (((uint64_t)m_selectors[4]) << 32U) |
  150. (((uint64_t)m_selectors[5]) << 40U);
  151. }
  152. inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
  153. {
  154. assert((x < 4U) && (y < 4U));
  155. return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
  156. }
  157. static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h)
  158. {
  159. pDst[0] = static_cast<uint8_t>(l);
  160. pDst[1] = static_cast<uint8_t>(h);
  161. pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
  162. pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
  163. pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
  164. pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
  165. pDst[6] = 0;
  166. pDst[7] = 255;
  167. return 6;
  168. }
  169. static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h)
  170. {
  171. pDst[0] = static_cast<uint8_t>(l);
  172. pDst[1] = static_cast<uint8_t>(h);
  173. pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
  174. pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
  175. pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
  176. pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
  177. pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
  178. pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
  179. return 8;
  180. }
  181. static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h)
  182. {
  183. if (l > h)
  184. return get_block_values8(pDst, l, h);
  185. else
  186. return get_block_values6(pDst, l, h);
  187. }
  188. };
  189. void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride)
  190. {
  191. static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
  192. const bc4_block *pBlock = static_cast<const bc4_block *>(pBlock_bits);
  193. uint8_t sel_values[8];
  194. bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
  195. const uint64_t selector_bits = pBlock->get_selector_bits();
  196. for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))
  197. {
  198. pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
  199. pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
  200. pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
  201. pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
  202. }
  203. }
  204. // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
  205. bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels)
  206. {
  207. bool success = true;
  208. if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true))
  209. success = false;
  210. unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba));
  211. return success;
  212. }
  213. // writes RG
  214. void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels)
  215. {
  216. unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));
  217. unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));
  218. }
  219. // ATC isn't officially documented, so I'm assuming these references:
  220. // http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf
  221. // https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c
  222. // The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8.
  223. void unpack_atc(const void* pBlock_bits, color_rgba* pPixels)
  224. {
  225. const uint8_t* pBytes = static_cast<const uint8_t*>(pBlock_bits);
  226. const uint16_t color0 = pBytes[0] | (pBytes[1] << 8U);
  227. const uint16_t color1 = pBytes[2] | (pBytes[3] << 8U);
  228. uint32_t sels = pBytes[4] | (pBytes[5] << 8U) | (pBytes[6] << 16U) | (pBytes[7] << 24U);
  229. const bool mode = (color0 & 0x8000) != 0;
  230. color_rgba c[4];
  231. c[0].set((color0 >> 10) & 31, (color0 >> 5) & 31, color0 & 31, 255);
  232. c[0].r = (c[0].r << 3) | (c[0].r >> 2);
  233. c[0].g = (c[0].g << 3) | (c[0].g >> 2);
  234. c[0].b = (c[0].b << 3) | (c[0].b >> 2);
  235. c[3].set((color1 >> 11) & 31, (color1 >> 5) & 63, color1 & 31, 255);
  236. c[3].r = (c[3].r << 3) | (c[3].r >> 2);
  237. c[3].g = (c[3].g << 2) | (c[3].g >> 4);
  238. c[3].b = (c[3].b << 3) | (c[3].b >> 2);
  239. if (mode)
  240. {
  241. c[1].set(std::max(0, c[0].r - (c[3].r >> 2)), std::max(0, c[0].g - (c[3].g >> 2)), std::max(0, c[0].b - (c[3].b >> 2)), 255);
  242. c[2] = c[0];
  243. c[0].set(0, 0, 0, 255);
  244. }
  245. else
  246. {
  247. c[1].r = (c[0].r * 5 + c[3].r * 3) >> 3;
  248. c[1].g = (c[0].g * 5 + c[3].g * 3) >> 3;
  249. c[1].b = (c[0].b * 5 + c[3].b * 3) >> 3;
  250. c[2].r = (c[0].r * 3 + c[3].r * 5) >> 3;
  251. c[2].g = (c[0].g * 3 + c[3].g * 5) >> 3;
  252. c[2].b = (c[0].b * 3 + c[3].b * 5) >> 3;
  253. }
  254. for (uint32_t i = 0; i < 16; i++)
  255. {
  256. const uint32_t s = sels & 3;
  257. pPixels[i] = c[s];
  258. sels >>= 2;
  259. }
  260. }
  261. struct bc7_mode_6
  262. {
  263. struct
  264. {
  265. uint64_t m_mode : 7;
  266. uint64_t m_r0 : 7;
  267. uint64_t m_r1 : 7;
  268. uint64_t m_g0 : 7;
  269. uint64_t m_g1 : 7;
  270. uint64_t m_b0 : 7;
  271. uint64_t m_b1 : 7;
  272. uint64_t m_a0 : 7;
  273. uint64_t m_a1 : 7;
  274. uint64_t m_p0 : 1;
  275. } m_lo;
  276. union
  277. {
  278. struct
  279. {
  280. uint64_t m_p1 : 1;
  281. uint64_t m_s00 : 3;
  282. uint64_t m_s10 : 4;
  283. uint64_t m_s20 : 4;
  284. uint64_t m_s30 : 4;
  285. uint64_t m_s01 : 4;
  286. uint64_t m_s11 : 4;
  287. uint64_t m_s21 : 4;
  288. uint64_t m_s31 : 4;
  289. uint64_t m_s02 : 4;
  290. uint64_t m_s12 : 4;
  291. uint64_t m_s22 : 4;
  292. uint64_t m_s32 : 4;
  293. uint64_t m_s03 : 4;
  294. uint64_t m_s13 : 4;
  295. uint64_t m_s23 : 4;
  296. uint64_t m_s33 : 4;
  297. } m_hi;
  298. uint64_t m_hi_bits;
  299. };
  300. };
  301. static const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
  302. // The transcoder only outputs mode 6 at the moment, so this is easy.
  303. bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
  304. {
  305. static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");
  306. const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
  307. if (block.m_lo.m_mode != (1 << 6))
  308. return false;
  309. const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
  310. const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
  311. const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
  312. const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
  313. const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
  314. const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
  315. const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
  316. const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
  317. color_rgba vals[16];
  318. for (uint32_t i = 0; i < 16; i++)
  319. {
  320. const uint32_t w = g_bc7_weights4[i];
  321. const uint32_t iw = 64 - w;
  322. vals[i].set_noclamp_rgba(
  323. (r0 * iw + r1 * w + 32) >> 6,
  324. (g0 * iw + g1 * w + 32) >> 6,
  325. (b0 * iw + b1 * w + 32) >> 6,
  326. (a0 * iw + a1 * w + 32) >> 6);
  327. }
  328. pPixels[0] = vals[block.m_hi.m_s00];
  329. pPixels[1] = vals[block.m_hi.m_s10];
  330. pPixels[2] = vals[block.m_hi.m_s20];
  331. pPixels[3] = vals[block.m_hi.m_s30];
  332. pPixels[4] = vals[block.m_hi.m_s01];
  333. pPixels[5] = vals[block.m_hi.m_s11];
  334. pPixels[6] = vals[block.m_hi.m_s21];
  335. pPixels[7] = vals[block.m_hi.m_s31];
  336. pPixels[8] = vals[block.m_hi.m_s02];
  337. pPixels[9] = vals[block.m_hi.m_s12];
  338. pPixels[10] = vals[block.m_hi.m_s22];
  339. pPixels[11] = vals[block.m_hi.m_s32];
  340. pPixels[12] = vals[block.m_hi.m_s03];
  341. pPixels[13] = vals[block.m_hi.m_s13];
  342. pPixels[14] = vals[block.m_hi.m_s23];
  343. pPixels[15] = vals[block.m_hi.m_s33];
  344. return true;
  345. }
  346. static inline uint32_t get_block_bits(const uint8_t* pBytes, uint32_t bit_ofs, uint32_t bits_wanted)
  347. {
  348. assert(bits_wanted < 32);
  349. uint32_t v = 0;
  350. uint32_t total_bits = 0;
  351. while (total_bits < bits_wanted)
  352. {
  353. uint32_t k = pBytes[bit_ofs >> 3];
  354. k >>= (bit_ofs & 7);
  355. uint32_t num_bits_in_byte = 8 - (bit_ofs & 7);
  356. v |= (k << total_bits);
  357. total_bits += num_bits_in_byte;
  358. bit_ofs += num_bits_in_byte;
  359. }
  360. return v & ((1 << bits_wanted) - 1);
  361. }
  362. struct bc7_mode_5
  363. {
  364. union
  365. {
  366. struct
  367. {
  368. uint64_t m_mode : 6;
  369. uint64_t m_rot : 2;
  370. uint64_t m_r0 : 7;
  371. uint64_t m_r1 : 7;
  372. uint64_t m_g0 : 7;
  373. uint64_t m_g1 : 7;
  374. uint64_t m_b0 : 7;
  375. uint64_t m_b1 : 7;
  376. uint64_t m_a0 : 8;
  377. uint64_t m_a1_0 : 6;
  378. } m_lo;
  379. uint64_t m_lo_bits;
  380. };
  381. union
  382. {
  383. struct
  384. {
  385. uint64_t m_a1_1 : 2;
  386. // bit 2
  387. uint64_t m_c00 : 1;
  388. uint64_t m_c10 : 2;
  389. uint64_t m_c20 : 2;
  390. uint64_t m_c30 : 2;
  391. uint64_t m_c01 : 2;
  392. uint64_t m_c11 : 2;
  393. uint64_t m_c21 : 2;
  394. uint64_t m_c31 : 2;
  395. uint64_t m_c02 : 2;
  396. uint64_t m_c12 : 2;
  397. uint64_t m_c22 : 2;
  398. uint64_t m_c32 : 2;
  399. uint64_t m_c03 : 2;
  400. uint64_t m_c13 : 2;
  401. uint64_t m_c23 : 2;
  402. uint64_t m_c33 : 2;
  403. // bit 33
  404. uint64_t m_a00 : 1;
  405. uint64_t m_a10 : 2;
  406. uint64_t m_a20 : 2;
  407. uint64_t m_a30 : 2;
  408. uint64_t m_a01 : 2;
  409. uint64_t m_a11 : 2;
  410. uint64_t m_a21 : 2;
  411. uint64_t m_a31 : 2;
  412. uint64_t m_a02 : 2;
  413. uint64_t m_a12 : 2;
  414. uint64_t m_a22 : 2;
  415. uint64_t m_a32 : 2;
  416. uint64_t m_a03 : 2;
  417. uint64_t m_a13 : 2;
  418. uint64_t m_a23 : 2;
  419. uint64_t m_a33 : 2;
  420. } m_hi;
  421. uint64_t m_hi_bits;
  422. };
  423. color_rgba get_low_color() const
  424. {
  425. return color_rgba(cNoClamp,
  426. (int)((m_lo.m_r0 << 1) | (m_lo.m_r0 >> 6)),
  427. (int)((m_lo.m_g0 << 1) | (m_lo.m_g0 >> 6)),
  428. (int)((m_lo.m_b0 << 1) | (m_lo.m_b0 >> 6)),
  429. m_lo.m_a0);
  430. }
  431. color_rgba get_high_color() const
  432. {
  433. return color_rgba(cNoClamp,
  434. (int)((m_lo.m_r1 << 1) | (m_lo.m_r1 >> 6)),
  435. (int)((m_lo.m_g1 << 1) | (m_lo.m_g1 >> 6)),
  436. (int)((m_lo.m_b1 << 1) | (m_lo.m_b1 >> 6)),
  437. (int)m_lo.m_a1_0 | ((int)m_hi.m_a1_1 << 6));
  438. }
  439. void get_block_colors(color_rgba* pColors) const
  440. {
  441. const color_rgba low_color(get_low_color());
  442. const color_rgba high_color(get_high_color());
  443. for (uint32_t i = 0; i < 4; i++)
  444. {
  445. static const uint32_t s_bc7_weights2[4] = { 0, 21, 43, 64 };
  446. pColors[i].set_noclamp_rgba(
  447. (low_color.r * (64 - s_bc7_weights2[i]) + high_color.r * s_bc7_weights2[i] + 32) >> 6,
  448. (low_color.g * (64 - s_bc7_weights2[i]) + high_color.g * s_bc7_weights2[i] + 32) >> 6,
  449. (low_color.b * (64 - s_bc7_weights2[i]) + high_color.b * s_bc7_weights2[i] + 32) >> 6,
  450. (low_color.a * (64 - s_bc7_weights2[i]) + high_color.a * s_bc7_weights2[i] + 32) >> 6);
  451. }
  452. }
  453. uint32_t get_selector(uint32_t idx, bool alpha) const
  454. {
  455. const uint32_t size = (idx == 0) ? 1 : 2;
  456. uint32_t ofs = alpha ? 97 : 66;
  457. if (idx)
  458. ofs += 1 + 2 * (idx - 1);
  459. return get_block_bits(reinterpret_cast<const uint8_t*>(this), ofs, size);
  460. }
  461. };
  462. bool unpack_bc7_mode5(const void* pBlock_bits, color_rgba* pPixels)
  463. {
  464. static_assert(sizeof(bc7_mode_5) == 16, "sizeof(bc7_mode_5) == 16");
  465. const bc7_mode_5& block = *static_cast<const bc7_mode_5*>(pBlock_bits);
  466. if (block.m_lo.m_mode != (1 << 5))
  467. return false;
  468. color_rgba block_colors[4];
  469. block.get_block_colors(block_colors);
  470. const uint32_t rot = block.m_lo.m_rot;
  471. for (uint32_t i = 0; i < 16; i++)
  472. {
  473. const uint32_t cs = block.get_selector(i, false);
  474. color_rgba c(block_colors[cs]);
  475. const uint32_t as = block.get_selector(i, true);
  476. c.a = block_colors[as].a;
  477. if (rot > 0)
  478. std::swap(c[3], c[rot - 1]);
  479. pPixels[i] = c;
  480. }
  481. return true;
  482. }
  483. // Unpacks to RGBA, R, RG, or A
  484. bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
  485. {
  486. switch (fmt)
  487. {
  488. case cBC1:
  489. {
  490. unpack_bc1(pBlock, pPixels, true);
  491. break;
  492. }
  493. case cBC3:
  494. {
  495. return unpack_bc3(pBlock, pPixels);
  496. }
  497. case cBC4:
  498. {
  499. // Unpack to R
  500. unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba));
  501. break;
  502. }
  503. case cBC5:
  504. {
  505. unpack_bc5(pBlock, pPixels);
  506. break;
  507. }
  508. case cBC7:
  509. {
  510. // We only support modes 5 and 6.
  511. if (!unpack_bc7_mode5(pBlock, pPixels))
  512. {
  513. if (!unpack_bc7_mode6(pBlock, pPixels))
  514. return false;
  515. }
  516. break;
  517. }
  518. // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color)
  519. case cETC2_RGB:
  520. case cETC1:
  521. case cETC1S:
  522. {
  523. return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);
  524. }
  525. case cETC2_RGBA:
  526. {
  527. if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels))
  528. return false;
  529. unpack_etc2_eac(pBlock, pPixels);
  530. break;
  531. }
  532. case cETC2_ALPHA:
  533. {
  534. // Unpack to A
  535. unpack_etc2_eac(pBlock, pPixels);
  536. break;
  537. }
  538. case cASTC4x4:
  539. {
  540. const bool astc_srgb = false;
  541. basisu_astc::astc::decompress(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
  542. break;
  543. }
  544. case cATC_RGB:
  545. {
  546. unpack_atc(pBlock, pPixels);
  547. break;
  548. }
  549. case cATC_RGBA_INTERPOLATED_ALPHA:
  550. {
  551. unpack_atc(static_cast<const uint8_t*>(pBlock) + 8, pPixels);
  552. unpack_bc4(pBlock, &pPixels[0].a, sizeof(color_rgba));
  553. break;
  554. }
  555. default:
  556. {
  557. assert(0);
  558. // TODO
  559. return false;
  560. }
  561. }
  562. return true;
  563. }
  564. bool gpu_image::unpack(image& img, bool pvrtc_wrap_addressing) const
  565. {
  566. img.resize(get_pixel_width(), get_pixel_height());
  567. img.set_all(g_black_color);
  568. if (!img.get_width() || !img.get_height())
  569. return true;
  570. if ((m_fmt == cPVRTC1_4_RGB) || (m_fmt == cPVRTC1_4_RGBA))
  571. {
  572. pvrtc4_image pi(m_width, m_height, pvrtc_wrap_addressing);
  573. if (get_total_blocks() != pi.get_total_blocks())
  574. return false;
  575. memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes());
  576. pi.deswizzle();
  577. pi.unpack_all_pixels(img);
  578. return true;
  579. }
  580. color_rgba pixels[cMaxBlockSize * cMaxBlockSize];
  581. for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++)
  582. pixels[i] = g_black_color;
  583. bool success = true;
  584. for (uint32_t by = 0; by < m_blocks_y; by++)
  585. {
  586. for (uint32_t bx = 0; bx < m_blocks_x; bx++)
  587. {
  588. const void* pBlock = get_block_ptr(bx, by);
  589. if (!unpack_block(m_fmt, pBlock, pixels))
  590. success = false;
  591. img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
  592. } // bx
  593. } // by
  594. return success;
  595. }
  596. static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
  597. // KTX/GL enums
  598. enum
  599. {
  600. KTX_ENDIAN = 0x04030201,
  601. KTX_OPPOSITE_ENDIAN = 0x01020304,
  602. KTX_ETC1_RGB8_OES = 0x8D64,
  603. KTX_RED = 0x1903,
  604. KTX_RG = 0x8227,
  605. KTX_RGB = 0x1907,
  606. KTX_RGBA = 0x1908,
  607. KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0,
  608. KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3,
  609. KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB,
  610. KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD,
  611. KTX_COMPRESSED_RGB8_ETC2 = 0x9274,
  612. KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278,
  613. KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C,
  614. KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D,
  615. KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,
  616. KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,
  617. KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,
  618. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0,
  619. KTX_ATC_RGB_AMD = 0x8C92,
  620. KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE
  621. };
  622. struct ktx_header
  623. {
  624. uint8_t m_identifier[12];
  625. packed_uint<4> m_endianness;
  626. packed_uint<4> m_glType;
  627. packed_uint<4> m_glTypeSize;
  628. packed_uint<4> m_glFormat;
  629. packed_uint<4> m_glInternalFormat;
  630. packed_uint<4> m_glBaseInternalFormat;
  631. packed_uint<4> m_pixelWidth;
  632. packed_uint<4> m_pixelHeight;
  633. packed_uint<4> m_pixelDepth;
  634. packed_uint<4> m_numberOfArrayElements;
  635. packed_uint<4> m_numberOfFaces;
  636. packed_uint<4> m_numberOfMipmapLevels;
  637. packed_uint<4> m_bytesOfKeyValueData;
  638. void clear() { clear_obj(*this); }
  639. };
  640. // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index]
  641. bool create_ktx_texture_file(uint8_vec &ktx_data, const std::vector<gpu_image_vec>& gpu_images, bool cubemap_flag)
  642. {
  643. if (!gpu_images.size())
  644. {
  645. assert(0);
  646. return false;
  647. }
  648. uint32_t width = 0, height = 0, total_levels = 0;
  649. basisu::texture_format fmt = cInvalidTextureFormat;
  650. if (cubemap_flag)
  651. {
  652. if ((gpu_images.size() % 6) != 0)
  653. {
  654. assert(0);
  655. return false;
  656. }
  657. }
  658. for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
  659. {
  660. const gpu_image_vec &levels = gpu_images[array_index];
  661. if (!levels.size())
  662. {
  663. // Empty mip chain
  664. assert(0);
  665. return false;
  666. }
  667. if (!array_index)
  668. {
  669. width = levels[0].get_pixel_width();
  670. height = levels[0].get_pixel_height();
  671. total_levels = (uint32_t)levels.size();
  672. fmt = levels[0].get_format();
  673. }
  674. else
  675. {
  676. if ((width != levels[0].get_pixel_width()) ||
  677. (height != levels[0].get_pixel_height()) ||
  678. (total_levels != levels.size()))
  679. {
  680. // All cubemap/texture array faces must be the same dimension
  681. assert(0);
  682. return false;
  683. }
  684. }
  685. for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
  686. {
  687. if (level_index)
  688. {
  689. if ( (levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||
  690. (levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)) )
  691. {
  692. // Malformed mipmap chain
  693. assert(0);
  694. return false;
  695. }
  696. }
  697. if (fmt != levels[level_index].get_format())
  698. {
  699. // All input textures must use the same GPU format
  700. assert(0);
  701. return false;
  702. }
  703. }
  704. }
  705. uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB;
  706. switch (fmt)
  707. {
  708. case cBC1:
  709. {
  710. internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;
  711. break;
  712. }
  713. case cBC3:
  714. {
  715. internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT;
  716. base_internal_fmt = KTX_RGBA;
  717. break;
  718. }
  719. case cBC4:
  720. {
  721. internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT;
  722. base_internal_fmt = KTX_RED;
  723. break;
  724. }
  725. case cBC5:
  726. {
  727. internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT;
  728. base_internal_fmt = KTX_RG;
  729. break;
  730. }
  731. case cETC1:
  732. case cETC1S:
  733. {
  734. internal_fmt = KTX_ETC1_RGB8_OES;
  735. break;
  736. }
  737. case cETC2_RGB:
  738. {
  739. internal_fmt = KTX_COMPRESSED_RGB8_ETC2;
  740. break;
  741. }
  742. case cETC2_RGBA:
  743. {
  744. internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC;
  745. base_internal_fmt = KTX_RGBA;
  746. break;
  747. }
  748. case cBC7:
  749. {
  750. internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;
  751. base_internal_fmt = KTX_RGBA;
  752. break;
  753. }
  754. case cPVRTC1_4_RGB:
  755. {
  756. internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG;
  757. break;
  758. }
  759. case cPVRTC1_4_RGBA:
  760. {
  761. internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG;
  762. base_internal_fmt = KTX_RGBA;
  763. break;
  764. }
  765. case cASTC4x4:
  766. {
  767. internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;
  768. base_internal_fmt = KTX_RGBA;
  769. break;
  770. }
  771. case cATC_RGB:
  772. {
  773. internal_fmt = KTX_ATC_RGB_AMD;
  774. break;
  775. }
  776. case cATC_RGBA_INTERPOLATED_ALPHA:
  777. {
  778. internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD;
  779. base_internal_fmt = KTX_RGBA;
  780. break;
  781. }
  782. default:
  783. {
  784. // TODO
  785. assert(0);
  786. return false;
  787. }
  788. }
  789. ktx_header header;
  790. header.clear();
  791. memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));
  792. header.m_endianness = KTX_ENDIAN;
  793. header.m_pixelWidth = width;
  794. header.m_pixelHeight = height;
  795. header.m_glInternalFormat = internal_fmt;
  796. header.m_glBaseInternalFormat = base_internal_fmt;
  797. header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / 6) : gpu_images.size());
  798. if (header.m_numberOfArrayElements == 1)
  799. header.m_numberOfArrayElements = 0;
  800. header.m_numberOfMipmapLevels = total_levels;
  801. header.m_numberOfFaces = cubemap_flag ? 6 : 1;
  802. append_vector(ktx_data, (uint8_t *)&header, sizeof(header));
  803. for (uint32_t level_index = 0; level_index < total_levels; level_index++)
  804. {
  805. uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes();
  806. if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1))
  807. {
  808. img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements);
  809. }
  810. assert(img_size && ((img_size & 3) == 0));
  811. packed_uint<4> packed_img_size(img_size);
  812. append_vector(ktx_data, (uint8_t *)&packed_img_size, sizeof(packed_img_size));
  813. uint32_t bytes_written = 0;
  814. for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++)
  815. {
  816. for (uint32_t face_index = 0; face_index < header.m_numberOfFaces; face_index++)
  817. {
  818. const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index];
  819. append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes());
  820. bytes_written += img.get_size_in_bytes();
  821. }
  822. } // array_index
  823. } // level_index
  824. return true;
  825. }
  826. bool write_compressed_texture_file(const char* pFilename, const std::vector<gpu_image_vec>& g, bool cubemap_flag)
  827. {
  828. std::string extension(string_tolower(string_get_extension(pFilename)));
  829. uint8_vec filedata;
  830. if (extension == "ktx")
  831. {
  832. if (!create_ktx_texture_file(filedata, g, cubemap_flag))
  833. return false;
  834. }
  835. else if (extension == "pvr")
  836. {
  837. // TODO
  838. return false;
  839. }
  840. else if (extension == "dds")
  841. {
  842. // TODO
  843. return false;
  844. }
  845. else
  846. {
  847. // unsupported texture format
  848. assert(0);
  849. return false;
  850. }
  851. return basisu::write_vec_to_file(pFilename, filedata);
  852. }
  853. bool write_compressed_texture_file(const char* pFilename, const gpu_image& g)
  854. {
  855. std::vector<gpu_image_vec> v;
  856. enlarge_vector(v, 1)->push_back(g);
  857. return write_compressed_texture_file(pFilename, v, false);
  858. }
  859. } // basisu