2
0

basisu_gpu_texture.cpp 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198
  1. // basisu_gpu_texture.cpp
  2. // Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. #include "basisu_gpu_texture.h"
  16. #include "basisu_enc.h"
  17. #include "basisu_pvrtc1_4.h"
  18. #include "3rdparty/android_astc_decomp.h"
  19. #include "basisu_bc7enc.h"
  20. #include "../transcoder/basisu_astc_hdr_core.h"
  21. #define BASISU_USE_GOOGLE_ASTC_DECODER (1)
  22. namespace basisu
  23. {
  24. //------------------------------------------------------------------------------------------------
  25. // ETC2 EAC
  26. void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)
  27. {
  28. static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");
  29. const eac_a8_block *pBlock = static_cast<const eac_a8_block *>(pBlock_bits);
  30. const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table];
  31. const uint64_t selector_bits = pBlock->get_selector_bits();
  32. const int32_t base = pBlock->m_base;
  33. const int32_t mul = pBlock->m_multiplier;
  34. pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul);
  35. pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul);
  36. pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul);
  37. pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul);
  38. pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul);
  39. pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul);
  40. pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul);
  41. pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul);
  42. pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul);
  43. pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul);
  44. pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul);
  45. pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul);
  46. pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul);
  47. pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul);
  48. pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul);
  49. pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);
  50. }
  51. //------------------------------------------------------------------------------------------------
  52. // BC1
  53. struct bc1_block
  54. {
  55. enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
  56. uint8_t m_low_color[cTotalEndpointBytes];
  57. uint8_t m_high_color[cTotalEndpointBytes];
  58. uint8_t m_selectors[cTotalSelectorBytes];
  59. inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
  60. inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
  61. static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b)
  62. {
  63. r = (c >> 11) & 31;
  64. g = (c >> 5) & 63;
  65. b = c & 31;
  66. r = (r << 3) | (r >> 2);
  67. g = (g << 2) | (g >> 4);
  68. b = (b << 3) | (b >> 2);
  69. }
  70. inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; }
  71. };
  72. // Returns true if the block uses 3 color punchthrough alpha mode.
  73. bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
  74. {
  75. static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
  76. const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
  77. const uint32_t l = pBlock->get_low_color();
  78. const uint32_t h = pBlock->get_high_color();
  79. color_rgba c[4];
  80. uint32_t r0, g0, b0, r1, g1, b1;
  81. bc1_block::unpack_color(l, r0, g0, b0);
  82. bc1_block::unpack_color(h, r1, g1, b1);
  83. c[0].set_noclamp_rgba(r0, g0, b0, 255);
  84. c[1].set_noclamp_rgba(r1, g1, b1, 255);
  85. bool used_punchthrough = false;
  86. if (l > h)
  87. {
  88. c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
  89. c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
  90. }
  91. else
  92. {
  93. c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
  94. c[3].set_noclamp_rgba(0, 0, 0, 0);
  95. used_punchthrough = true;
  96. }
  97. if (set_alpha)
  98. {
  99. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  100. {
  101. pPixels[0] = c[pBlock->get_selector(0, y)];
  102. pPixels[1] = c[pBlock->get_selector(1, y)];
  103. pPixels[2] = c[pBlock->get_selector(2, y)];
  104. pPixels[3] = c[pBlock->get_selector(3, y)];
  105. }
  106. }
  107. else
  108. {
  109. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  110. {
  111. pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
  112. pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
  113. pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
  114. pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
  115. }
  116. }
  117. return used_punchthrough;
  118. }
  119. bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
  120. {
  121. static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
  122. const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
  123. const uint32_t l = pBlock->get_low_color();
  124. const uint32_t h = pBlock->get_high_color();
  125. color_rgba c[4];
  126. int r0 = (l >> 11) & 31;
  127. int g0 = (l >> 5) & 63;
  128. int b0 = l & 31;
  129. int r1 = (h >> 11) & 31;
  130. int g1 = (h >> 5) & 63;
  131. int b1 = h & 31;
  132. c[0].b = (uint8_t)((3 * b0 * 22) / 8);
  133. c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4));
  134. c[0].r = (uint8_t)((3 * r0 * 22) / 8);
  135. c[0].a = 0xFF;
  136. c[1].r = (uint8_t)((3 * r1 * 22) / 8);
  137. c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4));
  138. c[1].b = (uint8_t)((3 * b1 * 22) / 8);
  139. c[1].a = 0xFF;
  140. int gdiff = c[1].g - c[0].g;
  141. bool used_punchthrough = false;
  142. if (l > h)
  143. {
  144. c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8);
  145. c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256));
  146. c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8);
  147. c[2].a = 0xFF;
  148. c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8);
  149. c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256);
  150. c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8);
  151. c[3].a = 0xFF;
  152. }
  153. else
  154. {
  155. c[2].r = (uint8_t)(((r0 + r1) * 33) / 8);
  156. c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256);
  157. c[2].b = (uint8_t)(((b0 + b1) * 33) / 8);
  158. c[2].a = 0xFF;
  159. c[3].set_noclamp_rgba(0, 0, 0, 0);
  160. used_punchthrough = true;
  161. }
  162. if (set_alpha)
  163. {
  164. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  165. {
  166. pPixels[0] = c[pBlock->get_selector(0, y)];
  167. pPixels[1] = c[pBlock->get_selector(1, y)];
  168. pPixels[2] = c[pBlock->get_selector(2, y)];
  169. pPixels[3] = c[pBlock->get_selector(3, y)];
  170. }
  171. }
  172. else
  173. {
  174. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  175. {
  176. pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
  177. pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
  178. pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
  179. pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
  180. }
  181. }
  182. return used_punchthrough;
  183. }
  184. static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }
  185. static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }
  186. bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
  187. {
  188. const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
  189. const uint32_t l = pBlock->get_low_color();
  190. const uint32_t h = pBlock->get_high_color();
  191. color_rgba c[4];
  192. uint32_t r0, g0, b0, r1, g1, b1;
  193. bc1_block::unpack_color(l, r0, g0, b0);
  194. bc1_block::unpack_color(h, r1, g1, b1);
  195. c[0].set_noclamp_rgba(r0, g0, b0, 255);
  196. c[1].set_noclamp_rgba(r1, g1, b1, 255);
  197. bool used_punchthrough = false;
  198. if (l > h)
  199. {
  200. c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
  201. c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
  202. }
  203. else
  204. {
  205. c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
  206. c[3].set_noclamp_rgba(0, 0, 0, 0);
  207. used_punchthrough = true;
  208. }
  209. if (set_alpha)
  210. {
  211. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  212. {
  213. pPixels[0] = c[pBlock->get_selector(0, y)];
  214. pPixels[1] = c[pBlock->get_selector(1, y)];
  215. pPixels[2] = c[pBlock->get_selector(2, y)];
  216. pPixels[3] = c[pBlock->get_selector(3, y)];
  217. }
  218. }
  219. else
  220. {
  221. for (uint32_t y = 0; y < 4; y++, pPixels += 4)
  222. {
  223. pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
  224. pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
  225. pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
  226. pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
  227. }
  228. }
  229. return used_punchthrough;
  230. }
  231. //------------------------------------------------------------------------------------------------
  232. // BC3-5
  233. struct bc4_block
  234. {
  235. enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
  236. uint8_t m_endpoints[2];
  237. uint8_t m_selectors[cTotalSelectorBytes];
  238. inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
  239. inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
  240. inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
  241. inline uint64_t get_selector_bits() const
  242. {
  243. return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |
  244. (((uint64_t)m_selectors[4]) << 32U) |
  245. (((uint64_t)m_selectors[5]) << 40U);
  246. }
  247. inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
  248. {
  249. assert((x < 4U) && (y < 4U));
  250. return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
  251. }
  252. static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h)
  253. {
  254. pDst[0] = static_cast<uint8_t>(l);
  255. pDst[1] = static_cast<uint8_t>(h);
  256. pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
  257. pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
  258. pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
  259. pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
  260. pDst[6] = 0;
  261. pDst[7] = 255;
  262. return 6;
  263. }
  264. static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h)
  265. {
  266. pDst[0] = static_cast<uint8_t>(l);
  267. pDst[1] = static_cast<uint8_t>(h);
  268. pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
  269. pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
  270. pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
  271. pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
  272. pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
  273. pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
  274. return 8;
  275. }
  276. static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h)
  277. {
  278. if (l > h)
  279. return get_block_values8(pDst, l, h);
  280. else
  281. return get_block_values6(pDst, l, h);
  282. }
  283. };
  284. void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride)
  285. {
  286. static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
  287. const bc4_block *pBlock = static_cast<const bc4_block *>(pBlock_bits);
  288. uint8_t sel_values[8];
  289. bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
  290. const uint64_t selector_bits = pBlock->get_selector_bits();
  291. for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))
  292. {
  293. pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
  294. pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
  295. pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
  296. pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
  297. }
  298. }
  299. // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
  300. bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels)
  301. {
  302. bool success = true;
  303. if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true))
  304. success = false;
  305. unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba));
  306. return success;
  307. }
  308. // writes RG
  309. void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels)
  310. {
  311. unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));
  312. unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));
  313. }
  314. //------------------------------------------------------------------------------------------------
  315. // ATC isn't officially documented, so I'm assuming these references:
  316. // http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf
  317. // https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c
  318. // The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8.
  319. void unpack_atc(const void* pBlock_bits, color_rgba* pPixels)
  320. {
  321. const uint8_t* pBytes = static_cast<const uint8_t*>(pBlock_bits);
  322. const uint16_t color0 = pBytes[0] | (pBytes[1] << 8U);
  323. const uint16_t color1 = pBytes[2] | (pBytes[3] << 8U);
  324. uint32_t sels = pBytes[4] | (pBytes[5] << 8U) | (pBytes[6] << 16U) | (pBytes[7] << 24U);
  325. const bool mode = (color0 & 0x8000) != 0;
  326. color_rgba c[4];
  327. c[0].set((color0 >> 10) & 31, (color0 >> 5) & 31, color0 & 31, 255);
  328. c[0].r = (c[0].r << 3) | (c[0].r >> 2);
  329. c[0].g = (c[0].g << 3) | (c[0].g >> 2);
  330. c[0].b = (c[0].b << 3) | (c[0].b >> 2);
  331. c[3].set((color1 >> 11) & 31, (color1 >> 5) & 63, color1 & 31, 255);
  332. c[3].r = (c[3].r << 3) | (c[3].r >> 2);
  333. c[3].g = (c[3].g << 2) | (c[3].g >> 4);
  334. c[3].b = (c[3].b << 3) | (c[3].b >> 2);
  335. if (mode)
  336. {
  337. c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255);
  338. c[2] = c[0];
  339. c[0].set(0, 0, 0, 255);
  340. }
  341. else
  342. {
  343. c[1].r = (c[0].r * 5 + c[3].r * 3) >> 3;
  344. c[1].g = (c[0].g * 5 + c[3].g * 3) >> 3;
  345. c[1].b = (c[0].b * 5 + c[3].b * 3) >> 3;
  346. c[2].r = (c[0].r * 3 + c[3].r * 5) >> 3;
  347. c[2].g = (c[0].g * 3 + c[3].g * 5) >> 3;
  348. c[2].b = (c[0].b * 3 + c[3].b * 5) >> 3;
  349. }
  350. for (uint32_t i = 0; i < 16; i++)
  351. {
  352. const uint32_t s = sels & 3;
  353. pPixels[i] = c[s];
  354. sels >>= 2;
  355. }
  356. }
  357. //------------------------------------------------------------------------------------------------
  358. // BC7 mode 0-7 decompression.
  359. // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
  360. static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; }
  361. static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; }
  362. static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; }
  363. static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; }
  364. static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; }
  365. static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits)
  366. {
  367. assert(l <= 255 && h <= 255);
  368. switch (bits)
  369. {
  370. case 2: return bc7_interp2(l, h, w);
  371. case 3: return bc7_interp3(l, h, w);
  372. case 4: return bc7_interp4(l, h, w);
  373. default:
  374. break;
  375. }
  376. return 0;
  377. }
  378. bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
  379. {
  380. //const uint32_t SUBSETS = 3;
  381. const uint32_t ENDPOINTS = 6;
  382. const uint32_t COMPS = 3;
  383. const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2;
  384. const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;
  385. const uint32_t PBITS = (mode == 0) ? 6 : 0;
  386. const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
  387. uint32_t bit_offset = 0;
  388. const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
  389. if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
  390. const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6);
  391. color_rgba endpoints[ENDPOINTS];
  392. for (uint32_t c = 0; c < COMPS; c++)
  393. for (uint32_t e = 0; e < ENDPOINTS; e++)
  394. endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
  395. uint32_t pbits[6];
  396. for (uint32_t p = 0; p < PBITS; p++)
  397. pbits[p] = read_bits32(pBuf, bit_offset, 1);
  398. uint32_t weights[16];
  399. for (uint32_t i = 0; i < 16; i++)
  400. weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
  401. assert(bit_offset == 128);
  402. for (uint32_t e = 0; e < ENDPOINTS; e++)
  403. for (uint32_t c = 0; c < 4; c++)
  404. endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
  405. color_rgba block_colors[3][8];
  406. for (uint32_t s = 0; s < 3; s++)
  407. for (uint32_t i = 0; i < WEIGHT_VALS; i++)
  408. {
  409. for (uint32_t c = 0; c < 3; c++)
  410. block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
  411. block_colors[s][i][3] = 255;
  412. }
  413. for (uint32_t i = 0; i < 16; i++)
  414. pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]];
  415. return true;
  416. }
  417. bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
  418. {
  419. //const uint32_t SUBSETS = 2;
  420. const uint32_t ENDPOINTS = 4;
  421. const uint32_t COMPS = (mode == 7) ? 4 : 3;
  422. const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2;
  423. const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7);
  424. const uint32_t PBITS = (mode == 1) ? 2 : 4;
  425. const uint32_t SHARED_PBITS = (mode == 1) ? true : false;
  426. const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
  427. uint32_t bit_offset = 0;
  428. const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
  429. if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
  430. const uint32_t part = read_bits32(pBuf, bit_offset, 6);
  431. color_rgba endpoints[ENDPOINTS];
  432. for (uint32_t c = 0; c < COMPS; c++)
  433. for (uint32_t e = 0; e < ENDPOINTS; e++)
  434. endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
  435. uint32_t pbits[4];
  436. for (uint32_t p = 0; p < PBITS; p++)
  437. pbits[p] = read_bits32(pBuf, bit_offset, 1);
  438. uint32_t weights[16];
  439. for (uint32_t i = 0; i < 16; i++)
  440. weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
  441. assert(bit_offset == 128);
  442. for (uint32_t e = 0; e < ENDPOINTS; e++)
  443. for (uint32_t c = 0; c < 4; c++)
  444. endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));
  445. color_rgba block_colors[2][8];
  446. for (uint32_t s = 0; s < 2; s++)
  447. for (uint32_t i = 0; i < WEIGHT_VALS; i++)
  448. {
  449. for (uint32_t c = 0; c < COMPS; c++)
  450. block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
  451. block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3];
  452. }
  453. for (uint32_t i = 0; i < 16; i++)
  454. pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]];
  455. return true;
  456. }
  457. bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
  458. {
  459. const uint32_t ENDPOINTS = 2;
  460. const uint32_t COMPS = 4;
  461. const uint32_t WEIGHT_BITS = 2;
  462. const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2;
  463. const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7;
  464. const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8;
  465. //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
  466. //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;
  467. uint32_t bit_offset = 0;
  468. const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
  469. if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
  470. const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2);
  471. const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0;
  472. color_rgba endpoints[ENDPOINTS];
  473. for (uint32_t c = 0; c < COMPS; c++)
  474. for (uint32_t e = 0; e < ENDPOINTS; e++)
  475. endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
  476. const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };
  477. uint32_t weights[16], a_weights[16];
  478. for (uint32_t i = 0; i < 16; i++)
  479. (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0));
  480. for (uint32_t i = 0; i < 16; i++)
  481. (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0));
  482. assert(bit_offset == 128);
  483. for (uint32_t e = 0; e < ENDPOINTS; e++)
  484. for (uint32_t c = 0; c < 4; c++)
  485. endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
  486. color_rgba block_colors[8];
  487. for (uint32_t i = 0; i < (1U << weight_bits[0]); i++)
  488. for (uint32_t c = 0; c < 3; c++)
  489. block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]);
  490. for (uint32_t i = 0; i < (1U << weight_bits[1]); i++)
  491. block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]);
  492. for (uint32_t i = 0; i < 16; i++)
  493. {
  494. pPixels[i] = block_colors[weights[i]];
  495. pPixels[i].a = block_colors[a_weights[i]].a;
  496. if (comp_rot >= 1)
  497. std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]);
  498. }
  499. return true;
  500. }
  501. struct bc7_mode_6
  502. {
  503. struct
  504. {
  505. uint64_t m_mode : 7;
  506. uint64_t m_r0 : 7;
  507. uint64_t m_r1 : 7;
  508. uint64_t m_g0 : 7;
  509. uint64_t m_g1 : 7;
  510. uint64_t m_b0 : 7;
  511. uint64_t m_b1 : 7;
  512. uint64_t m_a0 : 7;
  513. uint64_t m_a1 : 7;
  514. uint64_t m_p0 : 1;
  515. } m_lo;
  516. union
  517. {
  518. struct
  519. {
  520. uint64_t m_p1 : 1;
  521. uint64_t m_s00 : 3;
  522. uint64_t m_s10 : 4;
  523. uint64_t m_s20 : 4;
  524. uint64_t m_s30 : 4;
  525. uint64_t m_s01 : 4;
  526. uint64_t m_s11 : 4;
  527. uint64_t m_s21 : 4;
  528. uint64_t m_s31 : 4;
  529. uint64_t m_s02 : 4;
  530. uint64_t m_s12 : 4;
  531. uint64_t m_s22 : 4;
  532. uint64_t m_s32 : 4;
  533. uint64_t m_s03 : 4;
  534. uint64_t m_s13 : 4;
  535. uint64_t m_s23 : 4;
  536. uint64_t m_s33 : 4;
  537. } m_hi;
  538. uint64_t m_hi_bits;
  539. };
  540. };
  541. bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
  542. {
  543. static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");
  544. const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
  545. if (block.m_lo.m_mode != (1 << 6))
  546. return false;
  547. const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
  548. const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
  549. const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
  550. const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
  551. const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
  552. const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
  553. const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
  554. const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
  555. color_rgba vals[16];
  556. for (uint32_t i = 0; i < 16; i++)
  557. {
  558. const uint32_t w = basist::g_bc7_weights4[i];
  559. const uint32_t iw = 64 - w;
  560. vals[i].set_noclamp_rgba(
  561. (r0 * iw + r1 * w + 32) >> 6,
  562. (g0 * iw + g1 * w + 32) >> 6,
  563. (b0 * iw + b1 * w + 32) >> 6,
  564. (a0 * iw + a1 * w + 32) >> 6);
  565. }
  566. pPixels[0] = vals[block.m_hi.m_s00];
  567. pPixels[1] = vals[block.m_hi.m_s10];
  568. pPixels[2] = vals[block.m_hi.m_s20];
  569. pPixels[3] = vals[block.m_hi.m_s30];
  570. pPixels[4] = vals[block.m_hi.m_s01];
  571. pPixels[5] = vals[block.m_hi.m_s11];
  572. pPixels[6] = vals[block.m_hi.m_s21];
  573. pPixels[7] = vals[block.m_hi.m_s31];
  574. pPixels[8] = vals[block.m_hi.m_s02];
  575. pPixels[9] = vals[block.m_hi.m_s12];
  576. pPixels[10] = vals[block.m_hi.m_s22];
  577. pPixels[11] = vals[block.m_hi.m_s32];
  578. pPixels[12] = vals[block.m_hi.m_s03];
  579. pPixels[13] = vals[block.m_hi.m_s13];
  580. pPixels[14] = vals[block.m_hi.m_s23];
  581. pPixels[15] = vals[block.m_hi.m_s33];
  582. return true;
  583. }
  584. bool unpack_bc7(const void *pBlock, color_rgba *pPixels)
  585. {
  586. const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
  587. for (uint32_t mode = 0; mode <= 7; mode++)
  588. {
  589. if (first_byte & (1U << mode))
  590. {
  591. switch (mode)
  592. {
  593. case 0:
  594. case 2:
  595. return unpack_bc7_mode0_2(mode, pBlock, pPixels);
  596. case 1:
  597. case 3:
  598. case 7:
  599. return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);
  600. case 4:
  601. case 5:
  602. return unpack_bc7_mode4_5(mode, pBlock, pPixels);
  603. case 6:
  604. return unpack_bc7_mode6(pBlock, pPixels);
  605. default:
  606. break;
  607. }
  608. }
  609. }
  610. return false;
  611. }
  612. static inline int bc6h_sign_extend(int val, int bits)
  613. {
  614. assert((bits >= 1) && (bits < 32));
  615. assert((val >= 0) && (val < (1 << bits)));
  616. return (val << (32 - bits)) >> (32 - bits);
  617. }
  618. static inline int bc6h_apply_delta(int base, int delta, int num_bits, int is_signed)
  619. {
  620. int bitmask = ((1 << num_bits) - 1);
  621. int v = (base + delta) & bitmask;
  622. return is_signed ? bc6h_sign_extend(v, num_bits) : v;
  623. }
  624. static int bc6h_dequantize(int val, int bits, int is_signed)
  625. {
  626. int result;
  627. if (is_signed)
  628. {
  629. if (bits >= 16)
  630. result = val;
  631. else
  632. {
  633. int s_flag = 0;
  634. if (val < 0)
  635. {
  636. s_flag = 1;
  637. val = -val;
  638. }
  639. if (val == 0)
  640. result = 0;
  641. else if (val >= ((1 << (bits - 1)) - 1))
  642. result = 0x7FFF;
  643. else
  644. result = ((val << 15) + 0x4000) >> (bits - 1);
  645. if (s_flag)
  646. result = -result;
  647. }
  648. }
  649. else
  650. {
  651. if (bits >= 15)
  652. result = val;
  653. else if (!val)
  654. result = 0;
  655. else if (val == ((1 << bits) - 1))
  656. result = 0xFFFF;
  657. else
  658. result = ((val << 16) + 0x8000) >> bits;
  659. }
  660. return result;
  661. }
  662. static inline int bc6h_interpolate(int a, int b, const uint8_t* pWeights, int index)
  663. {
  664. return (a * (64 - (int)pWeights[index]) + b * (int)pWeights[index] + 32) >> 6;
  665. }
  666. static inline basist::half_float bc6h_convert_to_half(int val, int is_signed)
  667. {
  668. if (!is_signed)
  669. {
  670. // scale by 31/64
  671. return (basist::half_float)((val * 31) >> 6);
  672. }
  673. // scale by 31/32
  674. val = (val < 0) ? -(((-val) * 31) >> 5) : (val * 31) >> 5;
  675. int s = 0;
  676. if (val < 0)
  677. {
  678. s = 0x8000;
  679. val = -val;
  680. }
  681. return (basist::half_float)(s | val);
  682. }
  683. static inline uint32_t bc6h_get_bits(uint32_t num_bits, uint64_t& l, uint64_t& h, uint32_t& total_bits)
  684. {
  685. assert((num_bits) && (num_bits <= 63));
  686. uint32_t v = (uint32_t)(l & ((1U << num_bits) - 1U));
  687. l >>= num_bits;
  688. l |= (h << (64U - num_bits));
  689. h >>= num_bits;
  690. total_bits += num_bits;
  691. assert(total_bits <= 128);
  692. return v;
  693. }
  694. static inline uint32_t bc6h_reverse_bits(uint32_t v, uint32_t num_bits)
  695. {
  696. uint32_t res = 0;
  697. for (uint32_t i = 0; i < num_bits; i++)
  698. {
  699. uint32_t bit = (v & (1u << i)) != 0u;
  700. res |= (bit << (num_bits - 1u - i));
  701. }
  702. return res;
  703. }
  704. static inline uint64_t bc6h_read_le_qword(const void* p)
  705. {
  706. const uint8_t* pSrc = static_cast<const uint8_t*>(p);
  707. return ((uint64_t)read_le_dword(pSrc)) | (((uint64_t)read_le_dword(pSrc + sizeof(uint32_t))) << 32U);
  708. }
  709. bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs)
  710. {
  711. assert(dest_pitch_in_halfs >= 4 * 3);
  712. const uint32_t MAX_SUBSETS = 2, MAX_COMPS = 3;
  713. const uint8_t* pSrc = static_cast<const uint8_t*>(pSrc_block);
  714. basist::half_float* pDst = static_cast<basist::half_float*>(pDst_block);
  715. uint64_t blo = bc6h_read_le_qword(pSrc), bhi = bc6h_read_le_qword(pSrc + sizeof(uint64_t));
  716. // Unpack mode
  717. const int mode = basist::g_bc6h_mode_lookup[blo & 31];
  718. if (mode < 0)
  719. {
  720. for (int y = 0; y < 4; y++)
  721. {
  722. memset(pDst, 0, sizeof(basist::half_float) * 4);
  723. pDst += dest_pitch_in_halfs;
  724. }
  725. return false;
  726. }
  727. // Skip mode bits
  728. uint32_t total_bits_read = 0;
  729. bc6h_get_bits((mode < 2) ? 2 : 5, blo, bhi, total_bits_read);
  730. assert(mode < (int)basist::NUM_BC6H_MODES);
  731. const uint32_t num_subsets = (mode >= 10) ? 1 : 2;
  732. const bool is_mode_9_or_10 = (mode == 9) || (mode == 10);
  733. // Unpack endpoint components
  734. int comps[MAX_SUBSETS][MAX_COMPS][2] = { { { 0 } } }; // [subset][comp][l/h]
  735. int part_index = 0;
  736. uint32_t layout_index = 0;
  737. while (layout_index < basist::MAX_BC6H_LAYOUT_INDEX)
  738. {
  739. const basist::bc6h_bit_layout& layout = basist::g_bc6h_bit_layouts[mode][layout_index];
  740. if (layout.m_comp < 0)
  741. break;
  742. const int subset = layout.m_index >> 1, lh_index = layout.m_index & 1;
  743. assert((layout.m_comp == 3) || ((subset >= 0) && (subset < (int)MAX_SUBSETS)));
  744. const int last_bit = layout.m_last_bit, first_bit = layout.m_first_bit;
  745. assert(last_bit >= 0);
  746. int& res = (layout.m_comp == 3) ? part_index : comps[subset][layout.m_comp][lh_index];
  747. if (first_bit < 0)
  748. {
  749. res |= (bc6h_get_bits(1, blo, bhi, total_bits_read) << last_bit);
  750. }
  751. else
  752. {
  753. const int total_bits = iabs(last_bit - first_bit) + 1;
  754. const int bit_shift = basisu::minimum(first_bit, last_bit);
  755. int b = bc6h_get_bits(total_bits, blo, bhi, total_bits_read);
  756. if (last_bit < first_bit)
  757. b = bc6h_reverse_bits(b, total_bits);
  758. res |= (b << bit_shift);
  759. }
  760. layout_index++;
  761. }
  762. assert(layout_index != basist::MAX_BC6H_LAYOUT_INDEX);
  763. // Sign extend/dequantize endpoints
  764. const int num_sig_bits = basist::g_bc6h_mode_sig_bits[mode][0];
  765. if (is_signed)
  766. {
  767. for (uint32_t comp = 0; comp < 3; comp++)
  768. comps[0][comp][0] = bc6h_sign_extend(comps[0][comp][0], num_sig_bits);
  769. }
  770. if (is_signed || !is_mode_9_or_10)
  771. {
  772. for (uint32_t subset = 0; subset < num_subsets; subset++)
  773. for (uint32_t comp = 0; comp < 3; comp++)
  774. for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)
  775. comps[subset][comp][lh] = bc6h_sign_extend(comps[subset][comp][lh], basist::g_bc6h_mode_sig_bits[mode][1 + comp]);
  776. }
  777. if (!is_mode_9_or_10)
  778. {
  779. for (uint32_t subset = 0; subset < num_subsets; subset++)
  780. for (uint32_t comp = 0; comp < 3; comp++)
  781. for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)
  782. comps[subset][comp][lh] = bc6h_apply_delta(comps[0][comp][0], comps[subset][comp][lh], num_sig_bits, is_signed);
  783. }
  784. for (uint32_t subset = 0; subset < num_subsets; subset++)
  785. for (uint32_t comp = 0; comp < 3; comp++)
  786. for (uint32_t lh = 0; lh < 2; lh++)
  787. comps[subset][comp][lh] = bc6h_dequantize(comps[subset][comp][lh], num_sig_bits, is_signed);
  788. // Now unpack weights and output texels
  789. const int weight_bits = (mode >= 10) ? 4 : 3;
  790. const uint8_t* pWeights = (mode >= 10) ? basist::g_bc6h_weight4 : basist::g_bc6h_weight3;
  791. dest_pitch_in_halfs -= 4 * 3;
  792. for (uint32_t y = 0; y < 4; y++)
  793. {
  794. for (uint32_t x = 0; x < 4; x++)
  795. {
  796. int subset = (num_subsets == 1) ? ((x | y) ? 0 : 0x80) : basist::g_bc6h_2subset_patterns[part_index][y][x];
  797. const int num_bits = weight_bits + ((subset & 0x80) ? -1 : 0);
  798. subset &= 1;
  799. const int weight_index = bc6h_get_bits(num_bits, blo, bhi, total_bits_read);
  800. pDst[0] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][0][0], comps[subset][0][1], pWeights, weight_index), is_signed);
  801. pDst[1] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][1][0], comps[subset][1][1], pWeights, weight_index), is_signed);
  802. pDst[2] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][2][0], comps[subset][2][1], pWeights, weight_index), is_signed);
  803. pDst += 3;
  804. }
  805. pDst += dest_pitch_in_halfs;
  806. }
  807. assert(total_bits_read == 128);
  808. return true;
  809. }
  810. //------------------------------------------------------------------------------------------------
  811. // FXT1 (for fun, and because some modern Intel parts support it, and because a subset is like BC1)
  812. struct fxt1_block
  813. {
  814. union
  815. {
  816. struct
  817. {
  818. uint64_t m_t00 : 2;
  819. uint64_t m_t01 : 2;
  820. uint64_t m_t02 : 2;
  821. uint64_t m_t03 : 2;
  822. uint64_t m_t04 : 2;
  823. uint64_t m_t05 : 2;
  824. uint64_t m_t06 : 2;
  825. uint64_t m_t07 : 2;
  826. uint64_t m_t08 : 2;
  827. uint64_t m_t09 : 2;
  828. uint64_t m_t10 : 2;
  829. uint64_t m_t11 : 2;
  830. uint64_t m_t12 : 2;
  831. uint64_t m_t13 : 2;
  832. uint64_t m_t14 : 2;
  833. uint64_t m_t15 : 2;
  834. uint64_t m_t16 : 2;
  835. uint64_t m_t17 : 2;
  836. uint64_t m_t18 : 2;
  837. uint64_t m_t19 : 2;
  838. uint64_t m_t20 : 2;
  839. uint64_t m_t21 : 2;
  840. uint64_t m_t22 : 2;
  841. uint64_t m_t23 : 2;
  842. uint64_t m_t24 : 2;
  843. uint64_t m_t25 : 2;
  844. uint64_t m_t26 : 2;
  845. uint64_t m_t27 : 2;
  846. uint64_t m_t28 : 2;
  847. uint64_t m_t29 : 2;
  848. uint64_t m_t30 : 2;
  849. uint64_t m_t31 : 2;
  850. } m_lo;
  851. uint64_t m_lo_bits;
  852. uint8_t m_sels[8];
  853. };
  854. union
  855. {
  856. struct
  857. {
  858. #ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
  859. // This is the format that 3DFX's DECOMP.EXE tool expects, which I'm assuming is what the actual 3DFX hardware wanted.
  860. // Unfortunately, color0/color1 and color2/color3 are flipped relative to the official OpenGL extension and Intel's documentation!
  861. uint64_t m_b1 : 5;
  862. uint64_t m_g1 : 5;
  863. uint64_t m_r1 : 5;
  864. uint64_t m_b0 : 5;
  865. uint64_t m_g0 : 5;
  866. uint64_t m_r0 : 5;
  867. uint64_t m_b3 : 5;
  868. uint64_t m_g3 : 5;
  869. uint64_t m_r3 : 5;
  870. uint64_t m_b2 : 5;
  871. uint64_t m_g2 : 5;
  872. uint64_t m_r2 : 5;
  873. #else
  874. // Intel's encoding, and the encoding in the OpenGL FXT1 spec.
  875. uint64_t m_b0 : 5;
  876. uint64_t m_g0 : 5;
  877. uint64_t m_r0 : 5;
  878. uint64_t m_b1 : 5;
  879. uint64_t m_g1 : 5;
  880. uint64_t m_r1 : 5;
  881. uint64_t m_b2 : 5;
  882. uint64_t m_g2 : 5;
  883. uint64_t m_r2 : 5;
  884. uint64_t m_b3 : 5;
  885. uint64_t m_g3 : 5;
  886. uint64_t m_r3 : 5;
  887. #endif
  888. uint64_t m_alpha : 1;
  889. uint64_t m_glsb : 2;
  890. uint64_t m_mode : 1;
  891. } m_hi;
  892. uint64_t m_hi_bits;
  893. };
  894. };
  895. static color_rgba expand_565(const color_rgba& c)
  896. {
  897. return color_rgba((c.r << 3) | (c.r >> 2), (c.g << 2) | (c.g >> 4), (c.b << 3) | (c.b >> 2), 255);
  898. }
  899. // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment.
  900. bool unpack_fxt1(const void *p, color_rgba *pPixels)
  901. {
  902. const fxt1_block* pBlock = static_cast<const fxt1_block*>(p);
  903. if (pBlock->m_hi.m_mode == 0)
  904. return false;
  905. if (pBlock->m_hi.m_alpha == 1)
  906. return false;
  907. color_rgba colors[4];
  908. colors[0].r = pBlock->m_hi.m_r0;
  909. colors[0].g = (uint8_t)((pBlock->m_hi.m_g0 << 1) | ((pBlock->m_lo.m_t00 >> 1) ^ (pBlock->m_hi.m_glsb & 1)));
  910. colors[0].b = pBlock->m_hi.m_b0;
  911. colors[0].a = 255;
  912. colors[1].r = pBlock->m_hi.m_r1;
  913. colors[1].g = (uint8_t)((pBlock->m_hi.m_g1 << 1) | (pBlock->m_hi.m_glsb & 1));
  914. colors[1].b = pBlock->m_hi.m_b1;
  915. colors[1].a = 255;
  916. colors[2].r = pBlock->m_hi.m_r2;
  917. colors[2].g = (uint8_t)((pBlock->m_hi.m_g2 << 1) | ((pBlock->m_lo.m_t16 >> 1) ^ (pBlock->m_hi.m_glsb >> 1)));
  918. colors[2].b = pBlock->m_hi.m_b2;
  919. colors[2].a = 255;
  920. colors[3].r = pBlock->m_hi.m_r3;
  921. colors[3].g = (uint8_t)((pBlock->m_hi.m_g3 << 1) | (pBlock->m_hi.m_glsb >> 1));
  922. colors[3].b = pBlock->m_hi.m_b3;
  923. colors[3].a = 255;
  924. for (uint32_t i = 0; i < 4; i++)
  925. colors[i] = expand_565(colors[i]);
  926. color_rgba block0_colors[4];
  927. block0_colors[0] = colors[0];
  928. block0_colors[1] = color_rgba((colors[0].r * 2 + colors[1].r + 1) / 3, (colors[0].g * 2 + colors[1].g + 1) / 3, (colors[0].b * 2 + colors[1].b + 1) / 3, 255);
  929. block0_colors[2] = color_rgba((colors[1].r * 2 + colors[0].r + 1) / 3, (colors[1].g * 2 + colors[0].g + 1) / 3, (colors[1].b * 2 + colors[0].b + 1) / 3, 255);
  930. block0_colors[3] = colors[1];
  931. for (uint32_t i = 0; i < 16; i++)
  932. {
  933. const uint32_t sel = (pBlock->m_sels[i >> 2] >> ((i & 3) * 2)) & 3;
  934. const uint32_t x = i & 3;
  935. const uint32_t y = i >> 2;
  936. pPixels[x + y * 8] = block0_colors[sel];
  937. }
  938. color_rgba block1_colors[4];
  939. block1_colors[0] = colors[2];
  940. block1_colors[1] = color_rgba((colors[2].r * 2 + colors[3].r + 1) / 3, (colors[2].g * 2 + colors[3].g + 1) / 3, (colors[2].b * 2 + colors[3].b + 1) / 3, 255);
  941. block1_colors[2] = color_rgba((colors[3].r * 2 + colors[2].r + 1) / 3, (colors[3].g * 2 + colors[2].g + 1) / 3, (colors[3].b * 2 + colors[2].b + 1) / 3, 255);
  942. block1_colors[3] = colors[3];
  943. for (uint32_t i = 0; i < 16; i++)
  944. {
  945. const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3;
  946. const uint32_t x = i & 3;
  947. const uint32_t y = i >> 2;
  948. pPixels[4 + x + y * 8] = block1_colors[sel];
  949. }
  950. return true;
  951. }
  952. //------------------------------------------------------------------------------------------------
  953. // PVRTC2 (non-interpolated, hard_flag=1 modulation=0 subset only!)
  954. struct pvrtc2_block
  955. {
  956. uint8_t m_modulation[4];
  957. union
  958. {
  959. union
  960. {
  961. // Opaque mode: RGB colora=554 and colorb=555
  962. struct
  963. {
  964. uint32_t m_mod_flag : 1;
  965. uint32_t m_blue_a : 4;
  966. uint32_t m_green_a : 5;
  967. uint32_t m_red_a : 5;
  968. uint32_t m_hard_flag : 1;
  969. uint32_t m_blue_b : 5;
  970. uint32_t m_green_b : 5;
  971. uint32_t m_red_b : 5;
  972. uint32_t m_opaque_flag : 1;
  973. } m_opaque_color_data;
  974. // Transparent mode: RGBA colora=4433 and colorb=4443
  975. struct
  976. {
  977. uint32_t m_mod_flag : 1;
  978. uint32_t m_blue_a : 3;
  979. uint32_t m_green_a : 4;
  980. uint32_t m_red_a : 4;
  981. uint32_t m_alpha_a : 3;
  982. uint32_t m_hard_flag : 1;
  983. uint32_t m_blue_b : 4;
  984. uint32_t m_green_b : 4;
  985. uint32_t m_red_b : 4;
  986. uint32_t m_alpha_b : 3;
  987. uint32_t m_opaque_flag : 1;
  988. } m_trans_color_data;
  989. };
  990. uint32_t m_color_data_bits;
  991. };
  992. };
  993. static color_rgba convert_rgb_555_to_888(const color_rgba& col)
  994. {
  995. return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255);
  996. }
  997. static color_rgba convert_rgba_5554_to_8888(const color_rgba& col)
  998. {
  999. return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
  1000. }
  1001. // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC.
  1002. bool unpack_pvrtc2(const void *p, color_rgba *pPixels)
  1003. {
  1004. const pvrtc2_block* pBlock = static_cast<const pvrtc2_block*>(p);
  1005. if ((!pBlock->m_opaque_color_data.m_hard_flag) || (pBlock->m_opaque_color_data.m_mod_flag))
  1006. {
  1007. // This mode isn't supported by the transcoder, so we aren't bothering with it here.
  1008. return false;
  1009. }
  1010. color_rgba colors[4];
  1011. if (pBlock->m_opaque_color_data.m_opaque_flag)
  1012. {
  1013. // colora=554
  1014. color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255);
  1015. // colora=555
  1016. color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255);
  1017. colors[0] = convert_rgb_555_to_888(color_a);
  1018. colors[3] = convert_rgb_555_to_888(color_b);
  1019. colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, 255);
  1020. colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, 255);
  1021. }
  1022. else
  1023. {
  1024. // colora=4433
  1025. color_rgba color_a(
  1026. (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3),
  1027. (pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3),
  1028. (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1),
  1029. pBlock->m_trans_color_data.m_alpha_a << 1);
  1030. //colorb=4443
  1031. color_rgba color_b(
  1032. (pBlock->m_trans_color_data.m_red_b << 1) | (pBlock->m_trans_color_data.m_red_b >> 3),
  1033. (pBlock->m_trans_color_data.m_green_b << 1) | (pBlock->m_trans_color_data.m_green_b >> 3),
  1034. (pBlock->m_trans_color_data.m_blue_b << 1) | (pBlock->m_trans_color_data.m_blue_b >> 3),
  1035. (pBlock->m_trans_color_data.m_alpha_b << 1) | 1);
  1036. colors[0] = convert_rgba_5554_to_8888(color_a);
  1037. colors[3] = convert_rgba_5554_to_8888(color_b);
  1038. }
  1039. colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, (colors[0].a * 5 + colors[3].a * 3) / 8);
  1040. colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, (colors[0].a * 3 + colors[3].a * 5) / 8);
  1041. for (uint32_t i = 0; i < 16; i++)
  1042. {
  1043. const uint32_t sel = (pBlock->m_modulation[i >> 2] >> ((i & 3) * 2)) & 3;
  1044. pPixels[i] = colors[sel];
  1045. }
  1046. return true;
  1047. }
  1048. //------------------------------------------------------------------------------------------------
  1049. // ETC2 EAC R11 or RG11
  1050. struct etc2_eac_r11
  1051. {
  1052. uint64_t m_base : 8;
  1053. uint64_t m_table : 4;
  1054. uint64_t m_mul : 4;
  1055. uint64_t m_sels_0 : 8;
  1056. uint64_t m_sels_1 : 8;
  1057. uint64_t m_sels_2 : 8;
  1058. uint64_t m_sels_3 : 8;
  1059. uint64_t m_sels_4 : 8;
  1060. uint64_t m_sels_5 : 8;
  1061. uint64_t get_sels() const
  1062. {
  1063. return ((uint64_t)m_sels_0 << 40U) | ((uint64_t)m_sels_1 << 32U) | ((uint64_t)m_sels_2 << 24U) | ((uint64_t)m_sels_3 << 16U) | ((uint64_t)m_sels_4 << 8U) | m_sels_5;
  1064. }
  1065. void set_sels(uint64_t v)
  1066. {
  1067. m_sels_0 = (v >> 40U) & 0xFF;
  1068. m_sels_1 = (v >> 32U) & 0xFF;
  1069. m_sels_2 = (v >> 24U) & 0xFF;
  1070. m_sels_3 = (v >> 16U) & 0xFF;
  1071. m_sels_4 = (v >> 8U) & 0xFF;
  1072. m_sels_5 = v & 0xFF;
  1073. }
  1074. };
  1075. struct etc2_eac_rg11
  1076. {
  1077. etc2_eac_r11 m_c[2];
  1078. };
  1079. void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c)
  1080. {
  1081. const etc2_eac_r11* pBlock = static_cast<const etc2_eac_r11*>(p);
  1082. const uint64_t sels = pBlock->get_sels();
  1083. const int base = (int)pBlock->m_base * 8 + 4;
  1084. const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1;
  1085. const int table = (int)pBlock->m_table;
  1086. for (uint32_t y = 0; y < 4; y++)
  1087. {
  1088. for (uint32_t x = 0; x < 4; x++)
  1089. {
  1090. const uint32_t shift = 45 - ((y + x * 4) * 3);
  1091. const uint32_t sel = (uint32_t)((sels >> shift) & 7);
  1092. int val = base + g_etc2_eac_tables[table][sel] * mul;
  1093. val = clamp<int>(val, 0, 2047);
  1094. // Convert to 8-bits with rounding
  1095. //pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047);
  1096. pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1023) / 2047);
  1097. } // x
  1098. } // y
  1099. }
  1100. void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels)
  1101. {
  1102. for (uint32_t c = 0; c < 2; c++)
  1103. {
  1104. const etc2_eac_r11* pBlock = &static_cast<const etc2_eac_rg11*>(p)->m_c[c];
  1105. unpack_etc2_eac_r(pBlock, pPixels, c);
  1106. }
  1107. }
  1108. //------------------------------------------------------------------------------------------------
  1109. // UASTC
  1110. void unpack_uastc(const void* p, color_rgba* pPixels)
  1111. {
  1112. basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false);
  1113. }
  1114. // Unpacks to RGBA, R, RG, or A. LDR GPU texture formats only.
  1115. bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
  1116. {
  1117. switch (fmt)
  1118. {
  1119. case texture_format::cBC1:
  1120. {
  1121. unpack_bc1(pBlock, pPixels, true);
  1122. break;
  1123. }
  1124. case texture_format::cBC1_NV:
  1125. {
  1126. unpack_bc1_nv(pBlock, pPixels, true);
  1127. break;
  1128. }
  1129. case texture_format::cBC1_AMD:
  1130. {
  1131. unpack_bc1_amd(pBlock, pPixels, true);
  1132. break;
  1133. }
  1134. case texture_format::cBC3:
  1135. {
  1136. return unpack_bc3(pBlock, pPixels);
  1137. }
  1138. case texture_format::cBC4:
  1139. {
  1140. // Unpack to R
  1141. unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba));
  1142. break;
  1143. }
  1144. case texture_format::cBC5:
  1145. {
  1146. unpack_bc5(pBlock, pPixels);
  1147. break;
  1148. }
  1149. case texture_format::cBC7:
  1150. {
  1151. return unpack_bc7(pBlock, pPixels);
  1152. }
  1153. // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color)
  1154. case texture_format::cETC2_RGB:
  1155. case texture_format::cETC1:
  1156. case texture_format::cETC1S:
  1157. {
  1158. return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);
  1159. }
  1160. case texture_format::cETC2_RGBA:
  1161. {
  1162. if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels))
  1163. return false;
  1164. unpack_etc2_eac(pBlock, pPixels);
  1165. break;
  1166. }
  1167. case texture_format::cETC2_ALPHA:
  1168. {
  1169. // Unpack to A
  1170. unpack_etc2_eac(pBlock, pPixels);
  1171. break;
  1172. }
  1173. case texture_format::cBC6HSigned:
  1174. case texture_format::cBC6HUnsigned:
  1175. case texture_format::cASTC_HDR_4x4:
  1176. case texture_format::cUASTC_HDR_4x4:
  1177. case texture_format::cASTC_HDR_6x6:
  1178. {
  1179. // Can't unpack HDR blocks in unpack_block() because it returns 32bpp pixel data.
  1180. assert(0);
  1181. return false;
  1182. }
  1183. case texture_format::cASTC_LDR_4x4:
  1184. {
  1185. const bool astc_srgb = false;
  1186. bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
  1187. assert(status);
  1188. if (!status)
  1189. return false;
  1190. break;
  1191. }
  1192. case texture_format::cATC_RGB:
  1193. {
  1194. unpack_atc(pBlock, pPixels);
  1195. break;
  1196. }
  1197. case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
  1198. {
  1199. unpack_atc(static_cast<const uint8_t*>(pBlock) + 8, pPixels);
  1200. unpack_bc4(pBlock, &pPixels[0].a, sizeof(color_rgba));
  1201. break;
  1202. }
  1203. case texture_format::cFXT1_RGB:
  1204. {
  1205. unpack_fxt1(pBlock, pPixels);
  1206. break;
  1207. }
  1208. case texture_format::cPVRTC2_4_RGBA:
  1209. {
  1210. unpack_pvrtc2(pBlock, pPixels);
  1211. break;
  1212. }
  1213. case texture_format::cETC2_R11_EAC:
  1214. {
  1215. unpack_etc2_eac_r(static_cast<const etc2_eac_r11 *>(pBlock), pPixels, 0);
  1216. break;
  1217. }
  1218. case texture_format::cETC2_RG11_EAC:
  1219. {
  1220. unpack_etc2_eac_rg(pBlock, pPixels);
  1221. break;
  1222. }
  1223. case texture_format::cUASTC4x4:
  1224. {
  1225. unpack_uastc(pBlock, pPixels);
  1226. break;
  1227. }
  1228. default:
  1229. {
  1230. assert(0);
  1231. // TODO
  1232. return false;
  1233. }
  1234. }
  1235. return true;
  1236. }
  1237. bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels)
  1238. {
  1239. switch (fmt)
  1240. {
  1241. case texture_format::cASTC_HDR_6x6:
  1242. {
  1243. #if BASISU_USE_GOOGLE_ASTC_DECODER
  1244. bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 6, 6);
  1245. assert(status);
  1246. if (!status)
  1247. return false;
  1248. #else
  1249. // Use our decoder
  1250. basist::half_float half_block[6 * 6][4];
  1251. astc_helpers::log_astc_block log_blk;
  1252. if (!astc_helpers::unpack_block(pBlock, log_blk, 6, 6))
  1253. return false;
  1254. if (!astc_helpers::decode_block(log_blk, half_block, 6, 6, astc_helpers::cDecodeModeHDR16))
  1255. return false;
  1256. for (uint32_t p = 0; p < (6 * 6); p++)
  1257. {
  1258. pPixels[p][0] = basist::half_to_float(half_block[p][0]);
  1259. pPixels[p][1] = basist::half_to_float(half_block[p][1]);
  1260. pPixels[p][2] = basist::half_to_float(half_block[p][2]);
  1261. pPixels[p][3] = basist::half_to_float(half_block[p][3]);
  1262. }
  1263. #endif
  1264. return true;
  1265. }
  1266. case texture_format::cASTC_HDR_4x4:
  1267. case texture_format::cUASTC_HDR_4x4:
  1268. {
  1269. #if BASISU_USE_GOOGLE_ASTC_DECODER
  1270. // Use Google's decoder
  1271. bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 4, 4);
  1272. assert(status);
  1273. if (!status)
  1274. return false;
  1275. #else
  1276. // Use our decoder
  1277. basist::half_float half_block[16][4];
  1278. astc_helpers::log_astc_block log_blk;
  1279. if (!astc_helpers::unpack_block(pBlock, log_blk, 4, 4))
  1280. return false;
  1281. if (!astc_helpers::decode_block(log_blk, half_block, 4, 4, astc_helpers::cDecodeModeHDR16))
  1282. return false;
  1283. for (uint32_t p = 0; p < 16; p++)
  1284. {
  1285. pPixels[p][0] = basist::half_to_float(half_block[p][0]);
  1286. pPixels[p][1] = basist::half_to_float(half_block[p][1]);
  1287. pPixels[p][2] = basist::half_to_float(half_block[p][2]);
  1288. pPixels[p][3] = basist::half_to_float(half_block[p][3]);
  1289. }
  1290. //memset(pPixels, 0, sizeof(vec4F) * 16);
  1291. #endif
  1292. return true;
  1293. }
  1294. case texture_format::cBC6HSigned:
  1295. case texture_format::cBC6HUnsigned:
  1296. {
  1297. basist::half_float half_block[16][3];
  1298. unpack_bc6h(pBlock, half_block, fmt == texture_format::cBC6HSigned);
  1299. for (uint32_t p = 0; p < 16; p++)
  1300. {
  1301. pPixels[p][0] = basist::half_to_float(half_block[p][0]);
  1302. pPixels[p][1] = basist::half_to_float(half_block[p][1]);
  1303. pPixels[p][2] = basist::half_to_float(half_block[p][2]);
  1304. pPixels[p][3] = 1.0f;
  1305. }
  1306. return true;
  1307. }
  1308. default:
  1309. {
  1310. break;
  1311. }
  1312. }
  1313. assert(0);
  1314. return false;
  1315. }
  1316. bool gpu_image::unpack(image& img) const
  1317. {
  1318. img.resize(get_pixel_width(), get_pixel_height());
  1319. img.set_all(g_black_color);
  1320. if (!img.get_width() || !img.get_height())
  1321. return true;
  1322. if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA))
  1323. {
  1324. pvrtc4_image pi(m_width, m_height);
  1325. if (get_total_blocks() != pi.get_total_blocks())
  1326. return false;
  1327. memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes());
  1328. pi.deswizzle();
  1329. pi.unpack_all_pixels(img);
  1330. return true;
  1331. }
  1332. assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
  1333. color_rgba pixels[cMaxBlockSize * cMaxBlockSize];
  1334. for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++)
  1335. pixels[i] = g_black_color;
  1336. bool success = true;
  1337. for (uint32_t by = 0; by < m_blocks_y; by++)
  1338. {
  1339. for (uint32_t bx = 0; bx < m_blocks_x; bx++)
  1340. {
  1341. const void* pBlock = get_block_ptr(bx, by);
  1342. if (!unpack_block(m_fmt, pBlock, pixels))
  1343. success = false;
  1344. img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
  1345. } // bx
  1346. } // by
  1347. return success;
  1348. }
  1349. bool gpu_image::unpack_hdr(imagef& img) const
  1350. {
  1351. if ((m_fmt != texture_format::cASTC_HDR_4x4) && (m_fmt != texture_format::cUASTC_HDR_4x4) && (m_fmt != texture_format::cASTC_HDR_6x6) &&
  1352. (m_fmt != texture_format::cBC6HUnsigned) && (m_fmt != texture_format::cBC6HSigned))
  1353. {
  1354. // Can't call on LDR images, at least currently. (Could unpack the LDR data and convert to float.)
  1355. assert(0);
  1356. return false;
  1357. }
  1358. img.resize(get_pixel_width(), get_pixel_height());
  1359. img.set_all(vec4F(0.0f));
  1360. if (!img.get_width() || !img.get_height())
  1361. return true;
  1362. assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
  1363. vec4F pixels[cMaxBlockSize * cMaxBlockSize];
  1364. clear_obj(pixels);
  1365. bool success = true;
  1366. for (uint32_t by = 0; by < m_blocks_y; by++)
  1367. {
  1368. for (uint32_t bx = 0; bx < m_blocks_x; bx++)
  1369. {
  1370. const void* pBlock = get_block_ptr(bx, by);
  1371. if (!unpack_block_hdr(m_fmt, pBlock, pixels))
  1372. success = false;
  1373. img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
  1374. } // bx
  1375. } // by
  1376. return success;
  1377. }
  1378. // KTX1 texture file writing
  1379. static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
  1380. // KTX/GL enums
  1381. enum
  1382. {
  1383. KTX_ENDIAN = 0x04030201,
  1384. KTX_OPPOSITE_ENDIAN = 0x01020304,
  1385. KTX_ETC1_RGB8_OES = 0x8D64,
  1386. KTX_RED = 0x1903,
  1387. KTX_RG = 0x8227,
  1388. KTX_RGB = 0x1907,
  1389. KTX_RGBA = 0x1908,
  1390. KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0,
  1391. KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3,
  1392. KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB,
  1393. KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD,
  1394. KTX_COMPRESSED_RGB8_ETC2 = 0x9274,
  1395. KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278,
  1396. KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C,
  1397. KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D,
  1398. KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E,
  1399. KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F,
  1400. KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,
  1401. KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,
  1402. KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,
  1403. KTX_COMPRESSED_RGBA_ASTC_5x4_KHR = 0x93B1,
  1404. KTX_COMPRESSED_RGBA_ASTC_5x5_KHR = 0x93B2,
  1405. KTX_COMPRESSED_RGBA_ASTC_6x5_KHR = 0x93B3,
  1406. KTX_COMPRESSED_RGBA_ASTC_6x6_KHR = 0x93B4,
  1407. KTX_COMPRESSED_RGBA_ASTC_8x5_KHR = 0x93B5,
  1408. KTX_COMPRESSED_RGBA_ASTC_8x6_KHR = 0x93B6,
  1409. KTX_COMPRESSED_RGBA_ASTC_8x8_KHR = 0x93B7,
  1410. KTX_COMPRESSED_RGBA_ASTC_10x5_KHR = 0x93B8,
  1411. KTX_COMPRESSED_RGBA_ASTC_10x6_KHR = 0x93B9,
  1412. KTX_COMPRESSED_RGBA_ASTC_10x8_KHR = 0x93BA,
  1413. KTX_COMPRESSED_RGBA_ASTC_10x10_KHR = 0x93BB,
  1414. KTX_COMPRESSED_RGBA_ASTC_12x10_KHR = 0x93BC,
  1415. KTX_COMPRESSED_RGBA_ASTC_12x12_KHR = 0x93BD,
  1416. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0,
  1417. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR = 0x93D1,
  1418. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR = 0x93D2,
  1419. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR = 0x93D3,
  1420. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR = 0x93D4,
  1421. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR = 0x93D5,
  1422. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR = 0x93D6,
  1423. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR = 0x93D7,
  1424. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR = 0x93D8,
  1425. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR = 0x93D9,
  1426. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR = 0x93DA,
  1427. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR = 0x93DB,
  1428. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR = 0x93DC,
  1429. KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR = 0x93DD,
  1430. KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value!
  1431. KTX_ATC_RGB_AMD = 0x8C92,
  1432. KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE,
  1433. KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0,
  1434. KTX_COMPRESSED_RGBA_FXT1_3DFX = 0x86B1,
  1435. KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = 0x9138,
  1436. KTX_COMPRESSED_R11_EAC = 0x9270,
  1437. KTX_COMPRESSED_RG11_EAC = 0x9272
  1438. };
  1439. struct ktx_header
  1440. {
  1441. uint8_t m_identifier[12];
  1442. packed_uint<4> m_endianness;
  1443. packed_uint<4> m_glType;
  1444. packed_uint<4> m_glTypeSize;
  1445. packed_uint<4> m_glFormat;
  1446. packed_uint<4> m_glInternalFormat;
  1447. packed_uint<4> m_glBaseInternalFormat;
  1448. packed_uint<4> m_pixelWidth;
  1449. packed_uint<4> m_pixelHeight;
  1450. packed_uint<4> m_pixelDepth;
  1451. packed_uint<4> m_numberOfArrayElements;
  1452. packed_uint<4> m_numberOfFaces;
  1453. packed_uint<4> m_numberOfMipmapLevels;
  1454. packed_uint<4> m_bytesOfKeyValueData;
  1455. void clear() { clear_obj(*this); }
  1456. };
  1457. // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index]
  1458. bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag)
  1459. {
  1460. if (!gpu_images.size())
  1461. {
  1462. assert(0);
  1463. return false;
  1464. }
  1465. uint32_t width = 0, height = 0, total_levels = 0;
  1466. basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
  1467. // Sanity check the input
  1468. if (cubemap_flag)
  1469. {
  1470. if ((gpu_images.size() % 6) != 0)
  1471. {
  1472. assert(0);
  1473. return false;
  1474. }
  1475. }
  1476. for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
  1477. {
  1478. const gpu_image_vec &levels = gpu_images[array_index];
  1479. if (!levels.size())
  1480. {
  1481. // Empty mip chain
  1482. assert(0);
  1483. return false;
  1484. }
  1485. if (!array_index)
  1486. {
  1487. width = levels[0].get_pixel_width();
  1488. height = levels[0].get_pixel_height();
  1489. total_levels = (uint32_t)levels.size();
  1490. fmt = levels[0].get_format();
  1491. }
  1492. else
  1493. {
  1494. if ((width != levels[0].get_pixel_width()) ||
  1495. (height != levels[0].get_pixel_height()) ||
  1496. (total_levels != levels.size()))
  1497. {
  1498. // All cubemap/texture array faces must be the same dimension
  1499. assert(0);
  1500. return false;
  1501. }
  1502. }
  1503. for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
  1504. {
  1505. if (level_index)
  1506. {
  1507. if ( (levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||
  1508. (levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)) )
  1509. {
  1510. // Malformed mipmap chain
  1511. assert(0);
  1512. return false;
  1513. }
  1514. }
  1515. if (fmt != levels[level_index].get_format())
  1516. {
  1517. // All input textures must use the same GPU format
  1518. assert(0);
  1519. return false;
  1520. }
  1521. }
  1522. }
  1523. uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB;
  1524. switch (fmt)
  1525. {
  1526. case texture_format::cBC1:
  1527. case texture_format::cBC1_NV:
  1528. case texture_format::cBC1_AMD:
  1529. {
  1530. internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;
  1531. break;
  1532. }
  1533. case texture_format::cBC3:
  1534. {
  1535. internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT;
  1536. base_internal_fmt = KTX_RGBA;
  1537. break;
  1538. }
  1539. case texture_format::cBC4:
  1540. {
  1541. internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT;
  1542. base_internal_fmt = KTX_RED;
  1543. break;
  1544. }
  1545. case texture_format::cBC5:
  1546. {
  1547. internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT;
  1548. base_internal_fmt = KTX_RG;
  1549. break;
  1550. }
  1551. case texture_format::cETC1:
  1552. case texture_format::cETC1S:
  1553. {
  1554. internal_fmt = KTX_ETC1_RGB8_OES;
  1555. break;
  1556. }
  1557. case texture_format::cETC2_RGB:
  1558. {
  1559. internal_fmt = KTX_COMPRESSED_RGB8_ETC2;
  1560. break;
  1561. }
  1562. case texture_format::cETC2_RGBA:
  1563. {
  1564. internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC;
  1565. base_internal_fmt = KTX_RGBA;
  1566. break;
  1567. }
  1568. case texture_format::cBC6HSigned:
  1569. {
  1570. internal_fmt = KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
  1571. base_internal_fmt = KTX_RGBA;
  1572. break;
  1573. }
  1574. case texture_format::cBC6HUnsigned:
  1575. {
  1576. internal_fmt = KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;
  1577. base_internal_fmt = KTX_RGBA;
  1578. break;
  1579. }
  1580. case texture_format::cBC7:
  1581. {
  1582. internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;
  1583. base_internal_fmt = KTX_RGBA;
  1584. break;
  1585. }
  1586. case texture_format::cPVRTC1_4_RGB:
  1587. {
  1588. internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG;
  1589. break;
  1590. }
  1591. case texture_format::cPVRTC1_4_RGBA:
  1592. {
  1593. internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG;
  1594. base_internal_fmt = KTX_RGBA;
  1595. break;
  1596. }
  1597. case texture_format::cASTC_HDR_6x6:
  1598. {
  1599. internal_fmt = KTX_COMPRESSED_RGBA_ASTC_6x6_KHR;
  1600. // TODO: should we write RGB? We don't support generating HDR 6x6 with alpha.
  1601. base_internal_fmt = KTX_RGBA;
  1602. break;
  1603. }
  1604. // We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC.
  1605. case texture_format::cASTC_LDR_4x4:
  1606. case texture_format::cASTC_HDR_4x4:
  1607. case texture_format::cUASTC_HDR_4x4: // UASTC_HDR is just HDR-only ASTC
  1608. {
  1609. internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;
  1610. base_internal_fmt = KTX_RGBA;
  1611. break;
  1612. }
  1613. case texture_format::cATC_RGB:
  1614. {
  1615. internal_fmt = KTX_ATC_RGB_AMD;
  1616. break;
  1617. }
  1618. case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
  1619. {
  1620. internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD;
  1621. base_internal_fmt = KTX_RGBA;
  1622. break;
  1623. }
  1624. case texture_format::cETC2_R11_EAC:
  1625. {
  1626. internal_fmt = KTX_COMPRESSED_R11_EAC;
  1627. base_internal_fmt = KTX_RED;
  1628. break;
  1629. }
  1630. case texture_format::cETC2_RG11_EAC:
  1631. {
  1632. internal_fmt = KTX_COMPRESSED_RG11_EAC;
  1633. base_internal_fmt = KTX_RG;
  1634. break;
  1635. }
  1636. case texture_format::cUASTC4x4:
  1637. {
  1638. internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR;
  1639. base_internal_fmt = KTX_RGBA;
  1640. break;
  1641. }
  1642. case texture_format::cFXT1_RGB:
  1643. {
  1644. internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX;
  1645. break;
  1646. }
  1647. case texture_format::cPVRTC2_4_RGBA:
  1648. {
  1649. internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG;
  1650. base_internal_fmt = KTX_RGBA;
  1651. break;
  1652. }
  1653. default:
  1654. {
  1655. // TODO
  1656. assert(0);
  1657. return false;
  1658. }
  1659. }
  1660. ktx_header header;
  1661. header.clear();
  1662. memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));
  1663. header.m_endianness = KTX_ENDIAN;
  1664. header.m_pixelWidth = width;
  1665. header.m_pixelHeight = height;
  1666. header.m_glTypeSize = 1;
  1667. header.m_glInternalFormat = internal_fmt;
  1668. header.m_glBaseInternalFormat = base_internal_fmt;
  1669. header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / 6) : gpu_images.size());
  1670. if (header.m_numberOfArrayElements == 1)
  1671. header.m_numberOfArrayElements = 0;
  1672. header.m_numberOfMipmapLevels = total_levels;
  1673. header.m_numberOfFaces = cubemap_flag ? 6 : 1;
  1674. append_vector(ktx_data, (uint8_t*)&header, sizeof(header));
  1675. for (uint32_t level_index = 0; level_index < total_levels; level_index++)
  1676. {
  1677. uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes();
  1678. if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1))
  1679. {
  1680. img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements);
  1681. }
  1682. assert(img_size && ((img_size & 3) == 0));
  1683. packed_uint<4> packed_img_size(img_size);
  1684. append_vector(ktx_data, (uint8_t*)&packed_img_size, sizeof(packed_img_size));
  1685. uint32_t bytes_written = 0;
  1686. (void)bytes_written;
  1687. for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++)
  1688. {
  1689. for (uint32_t face_index = 0; face_index < header.m_numberOfFaces; face_index++)
  1690. {
  1691. const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index];
  1692. append_vector(ktx_data, (uint8_t*)img.get_ptr(), img.get_size_in_bytes());
  1693. bytes_written += img.get_size_in_bytes();
  1694. }
  1695. } // array_index
  1696. } // level_index
  1697. return true;
  1698. }
  1699. bool does_dds_support_format(texture_format fmt)
  1700. {
  1701. switch (fmt)
  1702. {
  1703. case texture_format::cBC1_NV:
  1704. case texture_format::cBC1_AMD:
  1705. case texture_format::cBC1:
  1706. case texture_format::cBC3:
  1707. case texture_format::cBC4:
  1708. case texture_format::cBC5:
  1709. case texture_format::cBC6HSigned:
  1710. case texture_format::cBC6HUnsigned:
  1711. case texture_format::cBC7:
  1712. return true;
  1713. default:
  1714. break;
  1715. }
  1716. return false;
  1717. }
  1718. // Only supports the basic DirectX BC texture formats.
  1719. // gpu_images array is: [face/layer][mipmap level]
  1720. // For cubemap arrays, # of face/layers must be a multiple of 6.
  1721. // Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped
  1722. // and cubemap, cubemap mipmapped, and cubemap array mipmapped.
  1723. bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
  1724. {
  1725. return false;
  1726. }
  1727. bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
  1728. {
  1729. uint8_vec dds_data;
  1730. if (!write_dds_file(dds_data, gpu_images, cubemap_flag, use_srgb_format))
  1731. return false;
  1732. if (!write_vec_to_file(pFilename, dds_data))
  1733. {
  1734. fprintf(stderr, "write_dds_file: Failed writing DDS file data\n");
  1735. return false;
  1736. }
  1737. return true;
  1738. }
  1739. bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips, basisu::vector<imagef>& hdr_mips)
  1740. {
  1741. return false;
  1742. }
  1743. bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format)
  1744. {
  1745. std::string extension(string_tolower(string_get_extension(pFilename)));
  1746. uint8_vec filedata;
  1747. if (extension == "ktx")
  1748. {
  1749. if (!create_ktx_texture_file(filedata, g, cubemap_flag))
  1750. return false;
  1751. }
  1752. else if (extension == "pvr")
  1753. {
  1754. // TODO
  1755. return false;
  1756. }
  1757. else if (extension == "dds")
  1758. {
  1759. if (!write_dds_file(filedata, g, cubemap_flag, use_srgb_format))
  1760. return false;
  1761. }
  1762. else
  1763. {
  1764. // unsupported texture format
  1765. assert(0);
  1766. return false;
  1767. }
  1768. return basisu::write_vec_to_file(pFilename, filedata);
  1769. }
  1770. bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format)
  1771. {
  1772. basisu::vector<gpu_image_vec> a;
  1773. a.push_back(g);
  1774. return write_compressed_texture_file(pFilename, a, false, use_srgb_format);
  1775. }
  1776. bool write_compressed_texture_file(const char* pFilename, const gpu_image& g, bool use_srgb_format)
  1777. {
  1778. basisu::vector<gpu_image_vec> v;
  1779. enlarge_vector(v, 1)->push_back(g);
  1780. return write_compressed_texture_file(pFilename, v, false, use_srgb_format);
  1781. }
  1782. //const uint32_t OUT_FILE_MAGIC = 'TEXC';
  1783. struct out_file_header
  1784. {
  1785. packed_uint<4> m_magic;
  1786. packed_uint<4> m_pad;
  1787. packed_uint<4> m_width;
  1788. packed_uint<4> m_height;
  1789. };
  1790. // As no modern tool supports FXT1 format .KTX files, let's write .OUT files and make sure 3DFX's original tools shipped in 1999 can decode our encoded output.
  1791. bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi)
  1792. {
  1793. out_file_header hdr;
  1794. //hdr.m_magic = OUT_FILE_MAGIC;
  1795. hdr.m_magic.m_bytes[0] = 67;
  1796. hdr.m_magic.m_bytes[1] = 88;
  1797. hdr.m_magic.m_bytes[2] = 69;
  1798. hdr.m_magic.m_bytes[3] = 84;
  1799. hdr.m_pad = 0;
  1800. hdr.m_width = gi.get_blocks_x() * 8;
  1801. hdr.m_height = gi.get_blocks_y() * 4;
  1802. FILE* pFile = nullptr;
  1803. #ifdef _WIN32
  1804. fopen_s(&pFile, pFilename, "wb");
  1805. #else
  1806. pFile = fopen(pFilename, "wb");
  1807. #endif
  1808. if (!pFile)
  1809. return false;
  1810. fwrite(&hdr, sizeof(hdr), 1, pFile);
  1811. fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile);
  1812. return fclose(pFile) != EOF;
  1813. }
  1814. // The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting
  1815. // its usefulness/relevance.
  1816. // https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md
  1817. bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y)
  1818. {
  1819. assert(pBlocks && (block_width >= 4) && (block_height >= 4) && (dim_x > 0) && (dim_y > 0));
  1820. uint8_vec file_data;
  1821. file_data.push_back(0x13);
  1822. file_data.push_back(0xAB);
  1823. file_data.push_back(0xA1);
  1824. file_data.push_back(0x5C);
  1825. file_data.push_back((uint8_t)block_width);
  1826. file_data.push_back((uint8_t)block_height);
  1827. file_data.push_back(1);
  1828. file_data.push_back((uint8_t)dim_x);
  1829. file_data.push_back((uint8_t)(dim_x >> 8));
  1830. file_data.push_back((uint8_t)(dim_x >> 16));
  1831. file_data.push_back((uint8_t)dim_y);
  1832. file_data.push_back((uint8_t)(dim_y >> 8));
  1833. file_data.push_back((uint8_t)(dim_y >> 16));
  1834. file_data.push_back((uint8_t)1);
  1835. file_data.push_back((uint8_t)0);
  1836. file_data.push_back((uint8_t)0);
  1837. const uint32_t num_blocks_x = (dim_x + block_width - 1) / block_width;
  1838. const uint32_t num_blocks_y = (dim_y + block_height - 1) / block_height;
  1839. const uint32_t total_bytes = num_blocks_x * num_blocks_y * 16;
  1840. const size_t cur_size = file_data.size();
  1841. file_data.resize(cur_size + total_bytes);
  1842. memcpy(&file_data[cur_size], pBlocks, total_bytes);
  1843. return write_vec_to_file(pFilename, file_data);
  1844. }
  1845. } // basisu