bptc.c 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522
  1. /*
  2. Copyright (c) 2015 Harm Hanemaaijer <[email protected]>
  3. Permission to use, copy, modify, and/or distribute this software for any
  4. purpose with or without fee is hereby granted, provided that the above
  5. copyright notice and this permission notice appear in all copies.
  6. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  7. WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  8. MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  9. ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  10. WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  11. ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  12. OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  13. */
  14. enum
  15. {
  16. BPTC_MODE_ALLOWED_ALL =0xFF,
  17. MODES_ALLOWED_OPAQUE_ONLY =0x100,
  18. MODES_ALLOWED_NON_OPAQUE_ONLY =0x200,
  19. MODES_ALLOWED_PUNCHTHROUGH_ONLY =0x400,
  20. BPTC_FLOAT_MODE_ALLOWED_ALL =0x3FFF,
  21. TWO_COLORS =0x20000,
  22. };
  23. static unsigned int pack_r(int r) {return (unsigned int)r;}
  24. static unsigned int pack_g(int g) {return (unsigned int)g << 8;}
  25. static unsigned int pack_b(int b) {return (unsigned int)b << 16;}
  26. static unsigned int pack_a(int a) {return (unsigned int)a << 24;}
  27. #ifndef TGP_SHARED_FUNCTIONS // needed for Android compilation
  28. #define TGP_SHARED_FUNCTIONS
  29. static unsigned int pack_rgba(int r, int g, int b, int a) {return (unsigned int)r | ((unsigned int)g << 8) | ((unsigned int)b << 16) | ((unsigned int)a << 24);}
  30. #endif
  31. static uint64_t pack_r16(unsigned int r16) {return r16;}
  32. static uint64_t pack_g16(unsigned int g16) {return g16 << 16;}
  33. static uint64_t pack_b16(unsigned int b16) {return (uint64_t)b16 << 32;}
  34. static uint64_t pack_a16(unsigned int a16) {return (uint64_t)a16 << 48;}
  35. static uint64_t pack_rgb16(uint16_t r16, uint16_t g16, uint16_t b16) {return (uint64_t)r16 | ((uint64_t)g16 << 16) | ((uint64_t)b16 << 32);}
  36. static uint64_t pack_rgba16(uint16_t r16, uint16_t g16, uint16_t b16, uint16_t a16) {return (uint64_t)r16 | ((uint64_t)g16 << 16) | ((uint64_t)b16 << 32) | ((uint64_t)a16 << 48);}
  37. static int pixel_get_r(unsigned int pixel) {return pixel & 0xFF;}
  38. static int pixel_get_g(unsigned int pixel) {return (pixel & 0xFF00) >> 8;}
  39. static int pixel_get_b(unsigned int pixel) {return (pixel & 0xFF0000) >> 16;}
  40. static int pixel_get_a(unsigned int pixel) {return (pixel & 0xFF000000) >> 24;}
  41. /*#include <stdlib.h>
  42. #include <stdio.h>
  43. #include <stdint.h>
  44. #include "texgenpack.h"
  45. #include "decode.h"
  46. #include "packing.h"*/
  47. // Functions for BPTC/BC7/BC6H decompression.
  48. static const unsigned char table_P2[64 * 16] = {
  49. 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,
  50. 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,
  51. 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,
  52. 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,
  53. 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,
  54. 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,
  55. 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,
  56. 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
  57. 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
  58. 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,
  59. 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1,
  60. 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,
  61. 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,
  62. 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,
  63. 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
  64. 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
  65. 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1,
  66. 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,
  67. 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,
  68. 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,
  69. 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,
  70. 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,
  71. 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,
  72. 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
  73. 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,
  74. 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,
  75. 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,
  76. 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,
  77. 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0,
  78. 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
  79. 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,
  80. 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
  81. 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,
  82. 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,
  83. 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0,
  84. 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,
  85. 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,
  86. 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,
  87. 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,
  88. 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
  89. 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,
  90. 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,
  91. 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0,
  92. 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,
  93. 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
  94. 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1,
  95. 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1,
  96. 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
  97. 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,
  98. 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,
  99. 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,
  100. 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,
  101. 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,
  102. 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,
  103. 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0,
  104. 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
  105. 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,
  106. 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,
  107. 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,
  108. 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1,
  109. 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,
  110. 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0,
  111. 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0,
  112. 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
  113. };
  114. static const unsigned char table_P3[64 * 16] = {
  115. 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2,
  116. 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1,
  117. 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1,
  118. 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1,
  119. 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2,
  120. 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2,
  121. 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1,
  122. 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
  123. 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,
  124. 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,
  125. 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,
  126. 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,
  127. 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,
  128. 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2,
  129. 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2,
  130. 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
  131. 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2,
  132. 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0,
  133. 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2,
  134. 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1,
  135. 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2,
  136. 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1,
  137. 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2,
  138. 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
  139. 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0,
  140. 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2,
  141. 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0,
  142. 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1,
  143. 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2,
  144. 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2,
  145. 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1,
  146. 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
  147. 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2,
  148. 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1,
  149. 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2,
  150. 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0,
  151. 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0,
  152. 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,
  153. 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0,
  154. 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
  155. 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1,
  156. 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2,
  157. 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1,
  158. 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2,
  159. 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1,
  160. 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1,
  161. 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1,
  162. 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
  163. 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2,
  164. 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1,
  165. 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2,
  166. 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2,
  167. 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2,
  168. 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2,
  169. 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2,
  170. 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
  171. 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2,
  172. 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2,
  173. 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2,
  174. 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2,
  175. 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1,
  176. 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2,
  177. 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2,
  178. 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
  179. };
  180. typedef struct {
  181. uint64_t data0;
  182. uint64_t data1;
  183. int index;
  184. } Block;
  185. static uint32_t block_extract_bits(Block *block, int nu_bits) {
  186. uint32_t value = 0;
  187. for (int i = 0; i < nu_bits; i++) {
  188. if (block->index < 64) {
  189. int shift = block->index - i;
  190. if (shift < 0)
  191. value |= (block->data0 & ((uint64_t)1 << block->index)) << (- shift);
  192. else
  193. value |= (block->data0 & ((uint64_t)1 << block->index)) >> shift;
  194. }
  195. else {
  196. int shift = ((block->index - 64) - i);
  197. if (shift < 0)
  198. value |= (block->data1 & ((uint64_t)1 << (block->index - 64))) << (- shift);
  199. else
  200. value |= (block->data1 & ((uint64_t)1 << (block->index - 64))) >> shift;
  201. }
  202. block->index++;
  203. }
  204. // if (block->index > 128)
  205. // printf("Block overflow (%d)\n", block->index);
  206. return value;
  207. }
  208. static inline uint32_t get_bits_uint64(uint64_t data, int bit0, int bit1) {return (data >> bit0) & ((1 << (bit1 - bit0 + 1)) - 1);} // ESENTHEL CHANGED
  209. static const unsigned char color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 };
  210. // Note: precision includes P-bits!
  211. static const unsigned char color_precision_plus_pbit_table[8] = { 5, 7, 5, 8, 5, 7, 8, 6 };
  212. static char color_component_precision(int mode) {
  213. return color_precision_table[mode];
  214. }
  215. static char color_component_precision_plus_pbit(int mode) {
  216. return color_precision_plus_pbit_table[mode];
  217. }
  218. static const unsigned char alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 };
  219. // Note: precision include P-bits!
  220. static const unsigned char alpha_precision_plus_pbit_table[8] = { 0, 0, 0, 0, 6, 8, 8, 6 };
  221. static char alpha_component_precision(int mode) {
  222. return alpha_precision_table[mode];
  223. }
  224. static char alpha_component_precision_plus_pbit(int mode) {
  225. return alpha_precision_plus_pbit_table[mode];
  226. }
  227. // #subsets = { 3, 2, 3, 2, 1, 1, 1, 2 };
  228. // partition bits = { 4, 6, 6, 6, 0, 0, 0, 6 };
  229. // rotation bits = { 0, 0, 0, 0, 2, 2, 0, 0 };
  230. // mode 4 has one index selection bit.
  231. //
  232. // #subsets color alpha part. index before color index after color index after alpha
  233. // Mode 0 3 4 0 1 + 4 = 5 5 + 6 * 3 * 4 = 77 77
  234. // Mode 1 Handled elsewhere.
  235. // Mode 2 3 5 0 3 + 6 = 9 9 + 6 * 3 * 5 = 99 99
  236. // Mode 3 2 7 0 4 + 6 = 10 10 + 4 * 3 * 7 = 94 94
  237. // Mode 4 1 5 6 5 + 2 + 1 = 8 8 + 2 * 3 * 5 = 38 37 + 2 * 6 = 50
  238. // Mode 5 1 7 8 6 + 2 = 8 8 + 2 * 3 * 7 = 50 50 + 2 * 8 = 66
  239. // Mode 6 1 7 7 7 7 + 2 * 3 * 7 = 49 49 + 2 * 7 = 63
  240. // Mode 7 2 5 5 8 + 6 = 14 14 + 4 * 3 * 5 = 74 74 + 4 * 5 = 94
  241. static const signed char components_in_qword0_table[8] = { 2, -1, 1, 1, 3, 3, 3, 2 };
  242. static void extract_endpoints(int mode, int nu_subsets, Block *block, uint8_t *endpoint_array) {
  243. #if 1
  244. // Optimized version avoiding the use of block_extract_bits()
  245. int components_in_qword0 = components_in_qword0_table[mode];
  246. uint64_t data = block->data0 >> block->index;
  247. uint8_t precision = color_component_precision(mode);
  248. uint8_t mask = (1 << precision) - 1;
  249. int total_bits_per_component = nu_subsets * 2 * precision;
  250. for (int i = 0; i < components_in_qword0; i++) // For each color component.
  251. for (int j = 0; j < nu_subsets; j++) // For each subset.
  252. for (int k = 0; k < 2; k++) { // For each endpoint.
  253. endpoint_array[j * 8 + k * 4 + i] = data & mask;
  254. data >>= precision;
  255. }
  256. block->index += components_in_qword0 * total_bits_per_component;
  257. if (components_in_qword0 < 3) {
  258. // Handle the color component that crosses the boundary between data0 and data1
  259. data = block->data0 >> block->index;
  260. data |= block->data1 << (64 - block->index);
  261. int i = components_in_qword0;
  262. for (int j = 0; j < nu_subsets; j++) // For each subset.
  263. for (int k = 0; k < 2; k++) { // For each endpoint.
  264. endpoint_array[j * 8 + k * 4 + i] = data & mask;
  265. data >>= precision;
  266. }
  267. block->index += total_bits_per_component;
  268. }
  269. if (components_in_qword0 < 2) {
  270. // Handle the color component that is wholly in data1.
  271. data = block->data1 >> (block->index - 64);
  272. int i = 2;
  273. for (int j = 0; j < nu_subsets; j++) // For each subset.
  274. for (int k = 0; k < 2; k++) { // For each endpoint.
  275. endpoint_array[j * 8 + k * 4 + i] = data & mask;
  276. data >>= precision;
  277. }
  278. block->index += total_bits_per_component;
  279. }
  280. // Alpha component.
  281. if (alpha_component_precision(mode) > 0) {
  282. // For mode 7, the alpha data is wholly in data1.
  283. // For modes 4 and 6, the alpha data is wholly in data0.
  284. // For mode 5, the alpha data is in data0 and data1.
  285. if (mode == 7)
  286. data = block->data1 >> (block->index - 64);
  287. else if (mode == 5)
  288. data = (block->data0 >> block->index) | ((block->data1 & 0x3) << 14);
  289. else
  290. data = block->data0 >> block->index;
  291. uint8_t alpha_precision = alpha_component_precision(mode);
  292. uint8_t mask = (1 << alpha_precision) - 1;
  293. for (int j = 0; j < nu_subsets; j++)
  294. for (int k = 0; k < 2; k++) { // For each endpoint.
  295. endpoint_array[j * 8 + k * 4 + 3] = data & mask;
  296. data >>= alpha_precision;
  297. }
  298. block->index += nu_subsets * 2 * alpha_precision;
  299. }
  300. #else
  301. // Color components.
  302. for (int i = 0; i < 3; i++) // For each color component.
  303. for (int j = 0; j < nu_subsets; j++) // For each subset.
  304. for (int k = 0; k < 2; k++) // For each endpoint.
  305. endpoint_array[j * 8 + k * 4 + i] =
  306. block_extract_bits(block, color_component_precision(mode));
  307. // Alpha component.
  308. if (alpha_component_precision(mode) > 0) {
  309. for (int j = 0; j < nu_subsets; j++)
  310. for (int k = 0; k < 2; k++) // For each endpoint.
  311. endpoint_array[j * 8 + k * 4 + 3] =
  312. block_extract_bits(block, alpha_component_precision(mode));
  313. }
  314. #endif
  315. }
  316. static const unsigned char mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 };
  317. void fully_decode_endpoints(uint8_t *endpoint_array, int nu_subsets, int mode, Block *block) {
  318. if (mode_has_p_bits[mode]) {
  319. // Mode 1 handled elsewhere.
  320. // Extract end-point pbits.
  321. uint32_t bits;
  322. if (block->index < 64)
  323. {
  324. bits = block->data0 >> block->index;
  325. bits|= block->data1 << (64 - block->index); // ESENTHEL CHANGED
  326. }
  327. else
  328. bits = block->data1 >> (block->index - 64);
  329. for (int i = 0; i < nu_subsets * 2; i++) {
  330. endpoint_array[i * 4 + 0] <<= 1;
  331. endpoint_array[i * 4 + 1] <<= 1;
  332. endpoint_array[i * 4 + 2] <<= 1;
  333. endpoint_array[i * 4 + 3] <<= 1;
  334. endpoint_array[i * 4 + 0] |= (bits & 1);
  335. endpoint_array[i * 4 + 1] |= (bits & 1);
  336. endpoint_array[i * 4 + 2] |= (bits & 1);
  337. endpoint_array[i * 4 + 3] |= (bits & 1);
  338. bits >>= 1;
  339. }
  340. block->index += nu_subsets * 2;
  341. }
  342. int color_prec = color_component_precision_plus_pbit(mode);
  343. int alpha_prec = alpha_component_precision_plus_pbit(mode);
  344. for (int i = 0; i < nu_subsets * 2; i++) {
  345. // Color_component_precision & alpha_component_precision includes pbit
  346. // left shift endpoint components so that their MSB lies in bit 7
  347. endpoint_array[i * 4 + 0] <<= (8 - color_prec);
  348. endpoint_array[i * 4 + 1] <<= (8 - color_prec);
  349. endpoint_array[i * 4 + 2] <<= (8 - color_prec);
  350. endpoint_array[i * 4 + 3] <<= (8 - alpha_prec);
  351. // Replicate each component's MSB into the LSBs revealed by the left-shift operation above.
  352. endpoint_array[i * 4 + 0] |= (endpoint_array[i * 4 + 0] >> color_prec);
  353. endpoint_array[i * 4 + 1] |= (endpoint_array[i * 4 + 1] >> color_prec);
  354. endpoint_array[i * 4 + 2] |= (endpoint_array[i * 4 + 2] >> color_prec);
  355. endpoint_array[i * 4 + 3] |= (endpoint_array[i * 4 + 3] >> alpha_prec);
  356. }
  357. if (mode <= 3) {
  358. for (int i = 0; i < nu_subsets * 2; i++)
  359. endpoint_array[i * 4 + 3] = 0xFF;
  360. }
  361. }
  362. static const uint8_t aWeight2[4] = { 0, 21, 43, 64 };
  363. static const uint8_t aWeight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
  364. static const uint8_t aWeight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
  365. static uint8_t interpolate(uint8_t e0, uint8_t e1, uint8_t index, uint8_t indexprecision) {
  366. if (indexprecision == 2)
  367. return (uint8_t) (((64 - aWeight2[index]) * (uint16_t)e0 + aWeight2[index] * (uint16_t)e1 + 32) >> 6);
  368. else
  369. if (indexprecision == 3)
  370. return (uint8_t) (((64 - aWeight3[index]) * (uint16_t)e0 + aWeight3[index] * (uint16_t)e1 + 32) >> 6);
  371. else // indexprecision == 4
  372. return (uint8_t) (((64 - aWeight4[index]) * (uint16_t)e0 + aWeight4[index] * (uint16_t)e1 + 32) >> 6);
  373. }
  374. static const unsigned char bptc_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
  375. static int get_color_index_bitcount(int mode, int index_selection_bit) {
  376. // If the index selection bit is set for mode 4, return 3, otherwise 2.
  377. return bptc_color_index_bitcount[mode] + index_selection_bit;
  378. }
  379. static const unsigned char bptc_alpha_index_bitcount[8] = { 3, 3, 2, 2, 3, 2, 4, 2};
  380. static int get_alpha_index_bitcount(int mode, int index_selection_bit) {
  381. // If the index selection bit is set for mode 4, return 2, otherwise 3.
  382. return bptc_alpha_index_bitcount[mode] - index_selection_bit;
  383. }
  384. static int extract_mode(Block *block) {
  385. for (int i = 0; i < 8; i++)
  386. if (block->data0 & ((uint64_t)1 << i)) {
  387. block->index = i + 1;
  388. return i;
  389. }
  390. // Illegal.
  391. return - 1;
  392. }
  393. static const unsigned char bptc_NS[8] = { 3, 2, 3, 2, 1, 1, 1, 2 };
  394. static int get_nu_subsets(int mode) {
  395. return bptc_NS[mode];
  396. }
  397. static const unsigned char PB[8] = { 4, 6, 6, 6, 0, 0, 0, 6 };
  398. static int extract_partition_set_id(Block *block, int mode) {
  399. return block_extract_bits(block, PB[mode]);
  400. }
  401. static int get_partition_index(int nu_subsets, int partition_set_id, int i) {
  402. if (nu_subsets == 1)
  403. return 0;
  404. if (nu_subsets == 2)
  405. return table_P2[partition_set_id * 16 + i];
  406. return table_P3[partition_set_id * 16 + i];
  407. }
  408. static const unsigned char RB[8] = { 0, 0, 0, 0, 2, 2, 0, 0 };
  409. static int extract_rot_bits(Block *block, int mode) {
  410. return block_extract_bits(block, RB[mode]);
  411. }
  412. static const unsigned char anchor_index_second_subset[64] = {
  413. 15,15,15,15,15,15,15,15,
  414. 15,15,15,15,15,15,15,15,
  415. 15, 2, 8, 2, 2, 8, 8,15,
  416. 2, 8, 2, 2, 8, 8, 2, 2,
  417. 15,15, 6, 8, 2, 8,15,15,
  418. 2, 8, 2, 2, 2,15,15, 6,
  419. 6, 2, 6, 8,15,15, 2, 2,
  420. 15,15,15,15,15, 2, 2,15
  421. };
  422. static const unsigned char anchor_index_second_subset_of_three[64] = {
  423. 3, 3,15,15, 8, 3,15,15,
  424. 8, 8, 6, 6, 6, 5, 3, 3,
  425. 3, 3, 8,15, 3, 3, 6,10,
  426. 5, 8, 8, 6, 8, 5,15,15,
  427. 8,15, 3, 5, 6,10, 8,15,
  428. 15, 3,15, 5,15,15,15,15,
  429. 3,15, 5, 5, 5, 8, 5,10,
  430. 5,10, 8,13,15,12, 3, 3
  431. };
  432. static const unsigned char anchor_index_third_subset[64] = {
  433. 15, 8, 8, 3,15,15, 3, 8,
  434. 15,15,15,15,15,15,15, 8,
  435. 15, 8,15, 3,15, 8,15, 8,
  436. 3,15, 6,10,15,15,10, 8,
  437. 15, 3,15,10,10, 8, 9,10,
  438. 6,15, 8,15, 3, 6, 6, 8,
  439. 15, 3,15,15,15,15,15,15,
  440. 15,15,15,15, 3,15,15, 8
  441. };
  442. static int get_anchor_index(int partition_set_id, int partition, int nu_subsets) {
  443. if (partition == 0)
  444. return 0;
  445. if (nu_subsets == 2)
  446. return anchor_index_second_subset[partition_set_id];
  447. if (partition == 1)
  448. return anchor_index_second_subset_of_three[partition_set_id];
  449. return anchor_index_third_subset[partition_set_id];
  450. }
  451. static const unsigned char IB[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
  452. static const unsigned char IB2[8] = { 0, 0, 0, 0, 3, 2, 0, 0 };
  453. static const unsigned char mode_has_partition_bits[8] = { 1, 1, 1, 1, 0, 0, 0, 1 };
  454. static int draw_bptc_mode_1(Block *block, unsigned int *image_buffer);
  455. int block4x4_bptc_get_mode(const unsigned char *bitstring) {
  456. Block block;
  457. block.data0 = *(uint64_t *)&bitstring[0];
  458. block.data1 = *(uint64_t *)&bitstring[8];
  459. block.index = 0;
  460. int mode = extract_mode(&block);
  461. return mode;
  462. }
  463. void block4x4_bptc_set_mode(unsigned char *bitstring, int flags) {
  464. // Mode 0 starts with 1
  465. // Mode 1 starts with 01
  466. // ...
  467. // Mode 7 starts with 00000001
  468. int mode_flags = flags & BPTC_MODE_ALLOWED_ALL;
  469. int bit = 0x1;
  470. for (int i = 0; i < 8; i++) {
  471. if (mode_flags == (1 << i)) {
  472. bitstring[0] &= ~(bit - 1);
  473. bitstring[0] |= bit;
  474. return;
  475. }
  476. bit <<= 1;
  477. }
  478. }
  479. static uint64_t clear_bits_uint64(uint64_t data, int bit0, int bit1) {
  480. uint64_t mask = ~(((uint64_t)1 << (bit1 + 1)) - 1);
  481. mask |= ((uint64_t)1 << bit0) - 1;
  482. return data & mask;
  483. }
  484. static uint64_t set_bits_uint64(uint64_t data, int bit0, int bit1, uint64_t val) {
  485. uint64_t d = clear_bits_uint64(data, bit0, bit1);
  486. d |= val << bit0;
  487. return d;
  488. }
  489. void bptc_set_block_colors(unsigned char *bitstring, int flags, unsigned int *colors) {
  490. if ((flags & TWO_COLORS) == 0)
  491. return;
  492. uint64_t data0 = *(uint64_t *)&bitstring[0];
  493. uint64_t data1 = *(uint64_t *)&bitstring[8];
  494. if ((flags & BPTC_MODE_ALLOWED_ALL) == (1 << 3)) {
  495. // Mode 3, 7 color bits.
  496. // Color bits at index: 10
  497. // Color bits end before index: 10 + 4 * 3 * 7 = 94
  498. int r0 = pixel_get_r(colors[0]);
  499. int g0 = pixel_get_g(colors[0]);
  500. int b0 = pixel_get_b(colors[0]);
  501. int r1 = pixel_get_r(colors[1]);
  502. int g1 = pixel_get_g(colors[1]);
  503. int b1 = pixel_get_b(colors[1]);
  504. data0 = set_bits_uint64(data0, 10, 16, r0 >> 1);
  505. data0 = set_bits_uint64(data0, 17, 23, r0 >> 1);
  506. data0 = set_bits_uint64(data0, 24, 30, r1 >> 1);
  507. data0 = set_bits_uint64(data0, 31, 37, r1 >> 1);
  508. data0 = set_bits_uint64(data0, 38, 44, g0 >> 1);
  509. data0 = set_bits_uint64(data0, 45, 51, g0 >> 1);
  510. data0 = set_bits_uint64(data0, 52, 58, g1 >> 1);
  511. data0 = set_bits_uint64(data0, 59, 63, (g1 >> 1) & 0x1F);
  512. data1 = set_bits_uint64(data1, 0, 1, ((g1 >> 1) & 0x60) >> 5);
  513. data1 = set_bits_uint64(data1, 2, 8, b0 >> 1);
  514. data1 = set_bits_uint64(data1, 9, 15, b0 >> 1);
  515. data1 = set_bits_uint64(data1, 16, 22, b1 >> 1);
  516. data1 = set_bits_uint64(data1, 23, 29, b1 >> 1);
  517. *(uint64_t *)&bitstring[0] = data0;
  518. *(uint64_t *)&bitstring[8] = data1;
  519. // printf("bptc_set_block_colors: Colors set for mode 3.\n");
  520. }
  521. else if ((flags & BPTC_MODE_ALLOWED_ALL) == (1 << 5)) {
  522. // Mode 5, 7 color bits, 8 alpha bits.
  523. // Color bits at index: 6 + 2 = 8
  524. // Alpha bits at index: 8 + 2 * 3 * 7 = 50
  525. // Alpha bits end before index: 50 + 2 * 8 = 66
  526. int r0 = pixel_get_r(colors[0]);
  527. int g0 = pixel_get_g(colors[0]);
  528. int b0 = pixel_get_b(colors[0]);
  529. int r1 = pixel_get_r(colors[1]);
  530. int g1 = pixel_get_g(colors[1]);
  531. int b1 = pixel_get_b(colors[1]);
  532. data0 = set_bits_uint64(data0, 8, 14, r0 >> 1);
  533. data0 = set_bits_uint64(data0, 15, 21, r1 >> 1);
  534. data0 = set_bits_uint64(data0, 22, 28, g0 >> 1);
  535. data0 = set_bits_uint64(data0, 29, 35, g0 >> 1);
  536. data0 = set_bits_uint64(data0, 36, 42, b0 >> 1);
  537. data0 = set_bits_uint64(data0, 43, 49, b1 >> 1);
  538. if (flags & (MODES_ALLOWED_PUNCHTHROUGH_ONLY)) {
  539. data0 = set_bits_uint64(data0, 50, 57, 0x00);
  540. data0 = set_bits_uint64(data0, 58, 63, 0x3F);
  541. data1 = set_bits_uint64(data1, 0, 1, 0x3);
  542. }
  543. *(uint64_t *)&bitstring[0] = data0;
  544. *(uint64_t *)&bitstring[8] = data1;
  545. // printf("bptc_set_block_colors: Colors set for mode 5.\n");
  546. }
  547. else if ((flags & BPTC_MODE_ALLOWED_ALL) == (1 << 6)) {
  548. // Mode 5, 7 color bits, 7 alpha bits.
  549. // Color bits at index 7.
  550. // Alpha bits at index: 7 + 2 * 3 * 7 = 49
  551. // Alpha bits end before index: 49 + 2 * 7 = 63
  552. int r0 = pixel_get_r(colors[0]);
  553. int g0 = pixel_get_g(colors[0]);
  554. int b0 = pixel_get_b(colors[0]);
  555. int r1 = pixel_get_r(colors[1]);
  556. int g1 = pixel_get_g(colors[1]);
  557. int b1 = pixel_get_b(colors[1]);
  558. data0 = set_bits_uint64(data0, 7, 13, r0 >> 1);
  559. data0 = set_bits_uint64(data0, 14, 20, r1 >> 1);
  560. data0 = set_bits_uint64(data0, 21, 27, g0 >> 1);
  561. data0 = set_bits_uint64(data0, 28, 34, g1 >> 1);
  562. data0 = set_bits_uint64(data0, 35, 41, b0 >> 1);
  563. data0 = set_bits_uint64(data0, 42, 48, b1 >> 1);
  564. if (flags & (MODES_ALLOWED_PUNCHTHROUGH_ONLY)) {
  565. data0 = set_bits_uint64(data0, 49, 55, 0x00);
  566. data0 = set_bits_uint64(data0, 56, 62, 0x7F);
  567. }
  568. *(uint64_t *)&bitstring[0] = data0;
  569. // printf("bptc_set_block_colors: Colors set for mode 6.\n");
  570. }
  571. }
  572. // Draw a 4x4 pixel block using the BPTC/BC7 texture compression data in bitstring.
  573. int draw_block4x4_bptc(const unsigned char *bitstring, unsigned int *image_buffer/*, int flags*/) {
  574. Block block;
  575. block.data0 = *(uint64_t *)&bitstring[0];
  576. block.data1 = *(uint64_t *)&bitstring[8];
  577. block.index = 0;
  578. int mode = extract_mode(&block);
  579. if (mode == - 1)
  580. return 0;
  581. // Allow compression tied to specific modes (according to flags).
  582. /*if (!(flags & ((int)1 << mode)))
  583. return 0;
  584. if (mode >= 4 && (flags & MODES_ALLOWED_OPAQUE_ONLY))
  585. return 0;
  586. if (mode < 4 && (flags & MODES_ALLOWED_NON_OPAQUE_ONLY))
  587. return 0;*/
  588. if (mode == 1)
  589. return draw_bptc_mode_1(&block, image_buffer);
  590. int nu_subsets = 1;
  591. int partition_set_id = 0;
  592. if (mode_has_partition_bits[mode]) {
  593. nu_subsets = get_nu_subsets(mode);
  594. partition_set_id = extract_partition_set_id(&block, mode);
  595. }
  596. int rotation = extract_rot_bits(&block, mode);
  597. int index_selection_bit = 0;
  598. if (mode == 4)
  599. index_selection_bit = block_extract_bits(&block, 1);
  600. int alpha_index_bitcount = get_alpha_index_bitcount(mode, index_selection_bit);
  601. int color_index_bitcount = get_color_index_bitcount(mode, index_selection_bit);
  602. uint8_t endpoint_array[3 * 2 * 4]; // Max. 3 subsets.
  603. extract_endpoints(mode, nu_subsets, &block, endpoint_array);
  604. fully_decode_endpoints(endpoint_array, nu_subsets, mode, &block);
  605. uint8_t subset_index[16];
  606. for (int i = 0; i < 16; i++)
  607. // subset_index[i] is a number from 0 to 2, or 0 to 1, or 0 depending on the number of subsets.
  608. subset_index[i] = get_partition_index(nu_subsets, partition_set_id, i);
  609. uint8_t anchor_index[4]; // Only need max. 3 elements.
  610. for (int i = 0; i < nu_subsets; i++)
  611. anchor_index[i] = get_anchor_index(partition_set_id, i, nu_subsets);
  612. uint8_t color_index[16];
  613. uint8_t alpha_index[16];
  614. // Extract primary index bits.
  615. uint64_t data1;
  616. if (block.index >= 64) {
  617. // Because the index bits are all in the second 64-bit word, there is no need to use
  618. // block_extract_bits().
  619. // This implies the mode is not 4.
  620. data1 = block.data1 >> (block.index - 64);
  621. uint8_t mask1 = (1 << IB[mode]) - 1;
  622. uint8_t mask2 = (1 << (IB[mode] - 1)) - 1;
  623. for (int i = 0; i < 16; i++)
  624. if (i == anchor_index[subset_index[i]]) {
  625. // Highest bit is zero.
  626. color_index[i] = data1 & mask2;
  627. data1 >>= IB[mode] - 1;
  628. alpha_index[i] = color_index[i];
  629. }
  630. else {
  631. color_index[i] = data1 & mask1;
  632. data1 >>= IB[mode];
  633. alpha_index[i] = color_index[i];
  634. }
  635. }
  636. else { // Implies mode 4.
  637. // Because the bits cross the 64-bit word boundary, we have to be careful.
  638. // Block index is 50 at this point.
  639. uint64_t data = block.data0 >> 50;
  640. data |= block.data1 << 14;
  641. for (int i = 0; i < 16; i++)
  642. if (i == anchor_index[subset_index[i]]) {
  643. // Highest bit is zero.
  644. if (index_selection_bit) { // Implies mode == 4.
  645. alpha_index[i] = data & 0x1;
  646. data >>= 1;
  647. }
  648. else {
  649. color_index[i] = data & 0x1;
  650. data >>= 1;
  651. }
  652. }
  653. else {
  654. if (index_selection_bit) { // Implies mode == 4.
  655. alpha_index[i] = data & 0x3;
  656. data >>= 2;
  657. }
  658. else {
  659. color_index[i] = data & 0x3;
  660. data >>= 2;
  661. }
  662. }
  663. // Block index is 81 at this point.
  664. data1 = block.data1 >> (81 - 64);
  665. }
  666. // Extract secondary index bits.
  667. if (IB2[mode] > 0) {
  668. uint8_t mask1 = (1 << IB2[mode]) - 1;
  669. uint8_t mask2 = (1 << (IB2[mode] - 1)) - 1;
  670. for (int i = 0; i < 16; i++)
  671. if (i == anchor_index[subset_index[i]]) {
  672. // Highest bit is zero.
  673. if (index_selection_bit) {
  674. color_index[i] = data1 & 0x3;
  675. data1 >>= 2;
  676. }
  677. else {
  678. // alpha_index[i] = block_extract_bits(&block, IB2[mode] - 1);
  679. alpha_index[i] = data1 & mask2;
  680. data1 >>= IB2[mode] - 1;
  681. }
  682. }
  683. else {
  684. if (index_selection_bit) {
  685. color_index[i] = data1 & 0x7;
  686. data1 >>= 3;
  687. }
  688. else {
  689. // alpha_index[i] = block_extract_bits(&block, IB2[mode]);
  690. alpha_index[i] = data1 & mask1;
  691. data1 >>= IB2[mode];
  692. }
  693. }
  694. }
  695. for (int i = 0; i < 16; i++) {
  696. uint8_t endpoint_start[4];
  697. uint8_t endpoint_end[4];
  698. for (int j = 0; j < 4; j++) {
  699. endpoint_start[j] = endpoint_array[2 * subset_index[i] * 4 + j];
  700. endpoint_end[j] = endpoint_array[(2 * subset_index[i] + 1) * 4 + j];
  701. }
  702. uint32_t output = 0;
  703. output = pack_r(interpolate(endpoint_start[0], endpoint_end[0], color_index[i], color_index_bitcount));
  704. output |= pack_g(interpolate(endpoint_start[1], endpoint_end[1], color_index[i], color_index_bitcount));
  705. output |= pack_b(interpolate(endpoint_start[2], endpoint_end[2], color_index[i], color_index_bitcount));
  706. output |= pack_a(interpolate(endpoint_start[3], endpoint_end[3], alpha_index[i], alpha_index_bitcount));
  707. if (rotation > 0) {
  708. if (rotation == 1)
  709. output = pack_rgba(pixel_get_a(output), pixel_get_g(output), pixel_get_b(output),
  710. pixel_get_r(output));
  711. else
  712. if (rotation == 2)
  713. output = pack_rgba(pixel_get_r(output), pixel_get_a(output), pixel_get_b(output),
  714. pixel_get_g(output));
  715. else // rotation == 3
  716. output = pack_rgba(pixel_get_r(output), pixel_get_g(output), pixel_get_a(output),
  717. pixel_get_b(output));
  718. }
  719. image_buffer[i] = output;
  720. }
  721. return 1;
  722. }
  723. static uint32_t get_reversed_bits_uint64(uint64_t data, int bit0, int bit1) {
  724. // Assumes bit0 > bit1.
  725. // Reverse the bits.
  726. uint32_t val = 0;
  727. for (int i = 0; i <= bit0 - bit1; i++) {
  728. int shift_right = bit0 - 2 * i;
  729. if (shift_right >= 0)
  730. val |= (data & ((uint64_t)1 << (bit0 - i))) >> shift_right;
  731. else
  732. val |= (data & ((uint64_t)1 << (bit0 - i))) << (- shift_right);
  733. }
  734. return val;
  735. }
  736. // Optimized version of BPTC decode for mode 1, the most common mode.
  737. static int draw_bptc_mode_1(Block *block, unsigned int *image_buffer) {
  738. uint64_t data0 = block->data0;
  739. uint64_t data1 = block->data1;
  740. int partition_set_id = get_bits_uint64(data0, 2, 7);
  741. uint8_t endpoint[2 * 2 * 3]; // 2 subsets.
  742. endpoint[0] = get_bits_uint64(data0, 8, 13); // red, subset 0, endpoint 0
  743. endpoint[3] = get_bits_uint64(data0, 14, 19); // red, subset 0, endpoint 1
  744. endpoint[6] = get_bits_uint64(data0, 20, 25); // red, subset 1, endpoint 0
  745. endpoint[9] = get_bits_uint64(data0, 26, 31); // red, subset 1, endpoint 1
  746. endpoint[1] = get_bits_uint64(data0, 32, 37); // green, subset 0, endpoint 0
  747. endpoint[4] = get_bits_uint64(data0, 38, 43); // green, subset 0, endpoint 1
  748. endpoint[7] = get_bits_uint64(data0, 44, 49); // green, subset 1, endpoint 0
  749. endpoint[10] = get_bits_uint64(data0, 50, 55); // green, subset 1, endpoint 1
  750. endpoint[2] = get_bits_uint64(data0, 56, 61); // blue, subset 0, endpoint 0
  751. endpoint[5] = get_bits_uint64(data0, 62, 63) // blue, subset 0, endpoint 1
  752. | (get_bits_uint64(data1, 0, 3) << 2);
  753. endpoint[8] = get_bits_uint64(data1, 4, 9); // blue, subset 1, endpoint 0
  754. endpoint[11] = get_bits_uint64(data1, 10, 15); // blue, subset 1, endpoint 1
  755. // Decode endpoints.
  756. for (int i = 0; i < 2 * 2; i++) {
  757. //component-wise left-shift
  758. endpoint[i * 3 + 0] <<= 2;
  759. endpoint[i * 3 + 1] <<= 2;
  760. endpoint[i * 3 + 2] <<= 2;
  761. }
  762. // P-bit is shared.
  763. uint8_t pbit_zero = get_bits_uint64(data1, 16, 16) << 1;
  764. uint8_t pbit_one = get_bits_uint64(data1, 17, 17) << 1;
  765. // RGB only pbits for mode 1, one for each subset.
  766. for (int j = 0; j < 3; j++) {
  767. endpoint[0 * 3 + j] |= pbit_zero;
  768. endpoint[1 * 3 + j] |= pbit_zero;
  769. endpoint[2 * 3 + j] |= pbit_one;
  770. endpoint[3 * 3 + j] |= pbit_one;
  771. }
  772. for (int i = 0; i < 2 * 2; i++) {
  773. // Replicate each component's MSB into the LSB.
  774. endpoint[i * 3 + 0] |= endpoint[i * 3 + 0] >> 7;
  775. endpoint[i * 3 + 1] |= endpoint[i * 3 + 1] >> 7;
  776. endpoint[i * 3 + 2] |= endpoint[i * 3 + 2] >> 7;
  777. }
  778. uint8_t subset_index[16];
  779. for (int i = 0; i < 16; i++)
  780. // subset_index[i] is a number from 0 to 1.
  781. subset_index[i] = table_P2[partition_set_id * 16 + i];
  782. uint8_t anchor_index[2];
  783. anchor_index[0] = 0;
  784. anchor_index[1] = anchor_index_second_subset[partition_set_id];
  785. uint8_t color_index[16];
  786. // Extract primary index bits.
  787. data1 >>= 18;
  788. for (int i = 0; i < 16; i++)
  789. if (i == anchor_index[subset_index[i]]) {
  790. // Highest bit is zero.
  791. color_index[i] = data1 & 3; // Get two bits.
  792. data1 >>= 2;
  793. }
  794. else {
  795. color_index[i] = data1 & 7; // Get three bits.
  796. data1 >>= 3;
  797. }
  798. for (int i = 0; i < 16; i++) {
  799. uint8_t endpoint_start[3];
  800. uint8_t endpoint_end[3];
  801. for (int j = 0; j < 3; j++) {
  802. endpoint_start[j] = endpoint[2 * subset_index[i] * 3 + j];
  803. endpoint_end[j] = endpoint[(2 * subset_index[i] + 1) * 3 + j];
  804. }
  805. uint32_t output;
  806. output = pack_r(interpolate(endpoint_start[0], endpoint_end[0], color_index[i], 3));
  807. output |= pack_g(interpolate(endpoint_start[1], endpoint_end[1], color_index[i], 3));
  808. output |= pack_b(interpolate(endpoint_start[2], endpoint_end[2], color_index[i], 3));
  809. output |= pack_a(0xFF);
  810. image_buffer[i] = output;
  811. }
  812. return 1;
  813. }
  814. // BPTC float (BC6H) decoding.
  815. static const signed char map_mode_table[32] = {
  816. 0, 1, 2, 10, -1, -1, 3, 11, -1, -1, 4, 12, -1, -1, 5, 13, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1, 8,
  817. -1, -1, -1, 9, -1 };
  818. static int extract_bptc_float_mode(Block *block) {
  819. int mode = block_extract_bits(block, 2);
  820. if (mode < 2)
  821. return mode;
  822. return map_mode_table[mode | (block_extract_bits(block, 3) << 2)];
  823. }
  824. static int bptc_float_get_partition_index(int nu_subsets, int partition_set_id, int i) {
  825. if (nu_subsets == 1)
  826. return 0;
  827. // nu_subset == 2
  828. return table_P2[partition_set_id * 16 + i];
  829. }
  830. static const unsigned char bptc_float_EPB[14] = {
  831. 10, 7, 11, 11, 11, 9, 8, 8, 8, 6, 10, 11, 12, 16 };
  832. static uint32_t unquantize(uint16_t x, int mode) {
  833. int32_t unq;
  834. if (mode == 13)
  835. unq = x;
  836. else if (x == 0)
  837. unq = 0;
  838. else if (x == (((int32_t)1 << bptc_float_EPB[mode]) - 1))
  839. unq = 0xFFFF;
  840. else
  841. unq = (((int32_t)x << 15) + 0x4000) >> (bptc_float_EPB[mode] - 1);
  842. return unq;
  843. }
  844. static int32_t unquantize_signed(int16_t x, int mode) {
  845. int s = 0;
  846. int32_t unq;
  847. if (bptc_float_EPB[mode] >= 16)
  848. unq = x;
  849. else {
  850. if (x < 0) {
  851. s = 1;
  852. x = -x;
  853. }
  854. if (x == 0)
  855. unq = 0;
  856. else
  857. if (x >= (((int32_t)1 << (bptc_float_EPB[mode] - 1)) - 1))
  858. unq = 0x7FFF;
  859. else
  860. unq = (((int32_t)x << 15) + 0x4000) >> (bptc_float_EPB[mode] - 1);
  861. if (s)
  862. unq = -unq;
  863. }
  864. return unq;
  865. }
  866. static int sign_extend(int value, int source_nu_bits, int target_nu_bits) {
  867. uint32_t sign_bit = value & (1 << (source_nu_bits - 1));
  868. if (!sign_bit)
  869. return value;
  870. uint32_t sign_extend_bits = 0xFFFFFFFF ^ ((1 << source_nu_bits) - 1);
  871. sign_extend_bits &= ((uint64_t)1 << target_nu_bits) - 1;
  872. return value | sign_extend_bits;
  873. }
  874. static int32_t interpolate_float(int32_t e0, int32_t e1, int16_t index, uint8_t indexprecision) {
  875. if (indexprecision == 2)
  876. return (((64 - aWeight2[index]) * (int32_t)e0 + aWeight2[index] * (int32_t)e1 + 32) >> 6);
  877. else
  878. if (indexprecision == 3)
  879. return (((64 - aWeight3[index]) * (int32_t)e0 + aWeight3[index] * (int32_t)e1 + 32) >> 6);
  880. else // indexprecision == 4
  881. return (((64 - aWeight4[index]) * (int32_t)e0 + aWeight4[index] * (int32_t)e1 + 32) >> 6);
  882. }
  883. int block4x4_bptc_float_get_mode(const unsigned char *bitstring) {
  884. Block block;
  885. block.data0 = *(uint64_t *)&bitstring[0];
  886. block.data1 = *(uint64_t *)&bitstring[8];
  887. block.index = 0;
  888. uint32_t mode = extract_bptc_float_mode(&block);
  889. return mode;
  890. }
  891. static const uint8_t bptc_float_set_mode_table[14] = {
  892. 0, 1, 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15
  893. };
  894. void block4x4_bptc_float_set_mode(unsigned char *bitstring, int flags) {
  895. int mode_flags = flags & BPTC_FLOAT_MODE_ALLOWED_ALL;
  896. if (mode_flags & 0x3) {
  897. // Set mode 0 or 1.
  898. bitstring[0] = (bitstring[0] & 0xFC) | ((mode_flags & 0x2) >> 1);
  899. return;
  900. }
  901. uint8_t byte0 = bitstring[0];
  902. byte0 &= 0xE0;
  903. for (int i = 2; i < 14; i++)
  904. if (flags & (1 << i)) {
  905. byte0 |= bptc_float_set_mode_table[i];
  906. bitstring[0] = byte0;
  907. return;
  908. }
  909. }
  910. int draw_block4x4_bptc_float_shared(const unsigned char *bitstring, unsigned int *image_buffer, int signed_flag, int flags) {
  911. Block block;
  912. block.data0 = *(uint64_t *)&bitstring[0];
  913. block.data1 = *(uint64_t *)&bitstring[8];
  914. block.index = 0;
  915. uint32_t mode = extract_bptc_float_mode(&block);
  916. if (mode == - 1)
  917. return 0;
  918. // Allow compression tied to specific modes (according to flags).
  919. if (!(flags & ((int)1 << mode)))
  920. return 0;
  921. int32_t r[4], g[4], b[4];
  922. int partition_set_id = 0;
  923. int delta_bits_r, delta_bits_g, delta_bits_b;
  924. uint64_t data0 = block.data0;
  925. uint64_t data1 = block.data1;
  926. switch (mode) {
  927. case 0 :
  928. // m[1:0],g2[4],b2[4],b3[4],r0[9:0],g0[9:0],b0[9:0],r1[4:0],g3[4],g2[3:0],
  929. // g1[4:0],b3[0],g3[3:0],b1[4:0],b3[1],b2[3:0],r2[4:0],b3[2],r3[4:0],b3[3]
  930. g[2] = get_bits_uint64(data0, 2, 2) << 4;
  931. b[2] = get_bits_uint64(data0, 3, 3) << 4;
  932. b[3] = get_bits_uint64(data0, 4, 4) << 4;
  933. r[0] = get_bits_uint64(data0, 5, 14);
  934. g[0] = get_bits_uint64(data0, 15, 24);
  935. b[0] = get_bits_uint64(data0, 25, 34);
  936. r[1] = get_bits_uint64(data0, 35, 39);
  937. g[3] = get_bits_uint64(data0, 40, 40) << 4;
  938. g[2] |= get_bits_uint64(data0, 41, 44);
  939. g[1] = get_bits_uint64(data0, 45, 49);
  940. b[3] |= get_bits_uint64(data0, 50, 50);
  941. g[3] |= get_bits_uint64(data0, 51, 54);
  942. b[1] = get_bits_uint64(data0, 55, 59);
  943. b[3] |= get_bits_uint64(data0, 60, 60) << 1;
  944. b[2] |= get_bits_uint64(data0, 61, 63);
  945. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  946. r[2] = get_bits_uint64(data1, 1, 5);
  947. b[3] |= get_bits_uint64(data1, 6, 6) << 2;
  948. r[3] = get_bits_uint64(data1, 7, 11);
  949. b[3] |= get_bits_uint64(data1, 12, 12) << 3;
  950. partition_set_id = get_bits_uint64(data1, 13, 17);
  951. block.index = 64 + 18;
  952. delta_bits_r = delta_bits_g = delta_bits_b = 5;
  953. break;
  954. case 1 :
  955. // m[1:0],g2[5],g3[4],g3[5],r0[6:0],b3[0],b3[1],b2[4],g0[6:0],b2[5],b3[2],
  956. // g2[4],b0[6:0],b3[3],b3[5],b3[4],r1[5:0],g2[3:0],g1[5:0],g3[3:0],b1[5:0],
  957. // b2[3:0],r2[5:0],r3[5:0]
  958. g[2] = get_bits_uint64(data0, 2, 2) << 5;
  959. g[3] = get_bits_uint64(data0, 3, 3) << 4;
  960. g[3] |= get_bits_uint64(data0, 4, 4) << 5;
  961. r[0] = get_bits_uint64(data0, 5, 11);
  962. b[3] = get_bits_uint64(data0, 12, 12);
  963. b[3] |= get_bits_uint64(data0, 13, 13) << 1;
  964. b[2] = get_bits_uint64(data0, 14, 14) << 4;
  965. g[0] = get_bits_uint64(data0, 15, 21);
  966. b[2] |= get_bits_uint64(data0, 22, 22) << 5;
  967. b[3] |= get_bits_uint64(data0, 23, 23) << 2;
  968. g[2] |= get_bits_uint64(data0, 24, 24) << 4;
  969. b[0] = get_bits_uint64(data0, 25, 31);
  970. b[3] |= get_bits_uint64(data0, 32, 32) << 3;
  971. b[3] |= get_bits_uint64(data0, 33, 33) << 5;
  972. b[3] |= get_bits_uint64(data0, 34, 34) << 4;
  973. r[1] = get_bits_uint64(data0, 35, 40);
  974. g[2] |= get_bits_uint64(data0, 41, 44);
  975. g[1] = get_bits_uint64(data0, 45, 50);
  976. g[3] |= get_bits_uint64(data0, 51, 54);
  977. b[1] = get_bits_uint64(data0, 55, 60);
  978. b[2] |= get_bits_uint64(data0, 61, 63);
  979. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  980. r[2] = get_bits_uint64(data1, 1, 6);
  981. r[3] = get_bits_uint64(data1, 7, 12);
  982. partition_set_id = get_bits_uint64(data1, 13, 17);
  983. block.index = 64 + 18;
  984. delta_bits_r = delta_bits_g = delta_bits_b = 6;
  985. break;
  986. case 2 :
  987. // m[4:0],r0[9:0],g0[9:0],b0[9:0],r1[4:0],r0[10],g2[3:0],g1[3:0],g0[10],
  988. // b3[0],g3[3:0],b1[3:0],b0[10],b3[1],b2[3:0],r2[4:0],b3[2],r3[4:0],b3[3]
  989. r[0] = get_bits_uint64(data0, 5, 14);
  990. g[0] = get_bits_uint64(data0, 15, 24);
  991. b[0] = get_bits_uint64(data0, 25, 34);
  992. r[1] = get_bits_uint64(data0, 35, 39);
  993. r[0] |= get_bits_uint64(data0, 40, 40) << 10;
  994. g[2] = get_bits_uint64(data0, 41, 44);
  995. g[1] = get_bits_uint64(data0, 45, 48);
  996. g[0] |= get_bits_uint64(data0, 49, 49) << 10;
  997. b[3] = get_bits_uint64(data0, 50, 50);
  998. g[3] = get_bits_uint64(data0, 51, 54);
  999. b[1] = get_bits_uint64(data0, 55, 58);
  1000. b[0] |= get_bits_uint64(data0, 59, 59) << 10;
  1001. b[3] |= get_bits_uint64(data0, 60, 60) << 1;
  1002. b[2] = get_bits_uint64(data0, 61, 63);
  1003. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  1004. r[2] = get_bits_uint64(data1, 1, 5);
  1005. b[3] |= get_bits_uint64(data1, 6, 6) << 2;
  1006. r[3] = get_bits_uint64(data1, 7, 11);
  1007. b[3] |= get_bits_uint64(data1, 12, 12) << 3;
  1008. partition_set_id = get_bits_uint64(data1, 13, 17);
  1009. block.index = 64 + 18;
  1010. delta_bits_r = 5;
  1011. delta_bits_g = delta_bits_b = 4;
  1012. break;
  1013. case 3 : // Original mode 6.
  1014. // m[4:0],r0[9:0],g0[9:0],b0[9:0],r1[3:0],r0[10],g3[4],g2[3:0],g1[4:0],
  1015. // g0[10],g3[3:0],b1[3:0],b0[10],b3[1],b2[3:0],r2[3:0],b3[0],b3[2],r3[3:0],
  1016. // g2[4],b3[3]
  1017. r[0] = get_bits_uint64(data0, 5, 14);
  1018. g[0] = get_bits_uint64(data0, 15, 24);
  1019. b[0] = get_bits_uint64(data0, 25, 34);
  1020. r[1] = get_bits_uint64(data0, 35, 38);
  1021. r[0] |= get_bits_uint64(data0, 39, 39) << 10;
  1022. g[3] = get_bits_uint64(data0, 40, 40) << 4;
  1023. g[2] = get_bits_uint64(data0, 41, 44);
  1024. g[1] = get_bits_uint64(data0, 45, 49);
  1025. g[0] |= get_bits_uint64(data0, 50, 50) << 10;
  1026. g[3] |= get_bits_uint64(data0, 51, 54);
  1027. b[1] = get_bits_uint64(data0, 55, 58);
  1028. b[0] |= get_bits_uint64(data0, 59, 59) << 10;
  1029. b[3] = get_bits_uint64(data0, 60, 60) << 1;
  1030. b[2] = get_bits_uint64(data0, 61, 63);
  1031. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  1032. r[2] = get_bits_uint64(data1, 1, 4);
  1033. b[3] |= get_bits_uint64(data1, 5, 5);
  1034. b[3] |= get_bits_uint64(data1, 6, 6) << 2;
  1035. r[3] = get_bits_uint64(data1, 7, 10);
  1036. g[2] |= get_bits_uint64(data1, 11, 11) << 4;
  1037. b[3] |= get_bits_uint64(data1, 12, 12) << 3;
  1038. partition_set_id = get_bits_uint64(data1, 13, 17);
  1039. block.index = 64 + 18;
  1040. delta_bits_r = delta_bits_b = 4;
  1041. delta_bits_g = 5;
  1042. break;
  1043. case 4 : // Original mode 10.
  1044. // m[4:0],r0[9:0],g0[9:0],b0[9:0],r1[3:0],r0[10],b2[4],g2[3:0],g1[3:0],
  1045. // g0[10],b3[0],g3[3:0],b1[4:0],b0[10],b2[3:0],r2[3:0],b3[1],b3[2],r3[3:0],
  1046. // b3[4],b3[3]
  1047. r[0] = get_bits_uint64(data0, 5, 14);
  1048. g[0] = get_bits_uint64(data0, 15, 24);
  1049. b[0] = get_bits_uint64(data0, 25, 34);
  1050. r[1] = get_bits_uint64(data0, 35, 38);
  1051. r[0] |= get_bits_uint64(data0, 39, 39) << 10;
  1052. b[2] = get_bits_uint64(data0, 40, 40) << 4;
  1053. g[2] = get_bits_uint64(data0, 41, 44);
  1054. g[1] = get_bits_uint64(data0, 45, 48);
  1055. g[0] |= get_bits_uint64(data0, 49, 49) << 10;
  1056. b[3] = get_bits_uint64(data0, 50, 50);
  1057. g[3] = get_bits_uint64(data0, 51, 54);
  1058. b[1] = get_bits_uint64(data0, 55, 59);
  1059. b[0] |= get_bits_uint64(data0, 60, 60) << 10;
  1060. b[2] |= get_bits_uint64(data0, 61, 63);
  1061. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  1062. r[2] = get_bits_uint64(data1, 1, 4);
  1063. b[3] |= get_bits_uint64(data1, 5, 5) << 1;
  1064. b[3] |= get_bits_uint64(data1, 6, 6) << 2;
  1065. r[3] = get_bits_uint64(data1, 7, 10);
  1066. b[3] |= get_bits_uint64(data1, 11, 11) << 4;
  1067. b[3] |= get_bits_uint64(data1, 12, 12) << 3;
  1068. partition_set_id = get_bits_uint64(data1, 13, 17);
  1069. block.index = 64 + 18;
  1070. delta_bits_r = delta_bits_g = 4;
  1071. delta_bits_b = 5;
  1072. break;
  1073. case 5 : // Original mode 14
  1074. // m[4:0],r0[8:0],b2[4],g0[8:0],g2[4],b0[8:0],b3[4],r1[4:0],g3[4],g2[3:0],
  1075. // g1[4:0],b3[0],g3[3:0],b1[4:0],b3[1],b2[3:0],r2[4:0],b3[2],r3[4:0],b3[3]
  1076. r[0] = get_bits_uint64(data0, 5, 13);
  1077. b[2] = get_bits_uint64(data0, 14, 14) << 4;
  1078. g[0] = get_bits_uint64(data0, 15, 23);
  1079. g[2] = get_bits_uint64(data0, 24, 24) << 4;
  1080. b[0] = get_bits_uint64(data0, 25, 33);
  1081. b[3] = get_bits_uint64(data0, 34, 34) << 4;
  1082. r[1] = get_bits_uint64(data0, 35, 39);
  1083. g[3] = get_bits_uint64(data0, 40, 40) << 4;
  1084. g[2] |= get_bits_uint64(data0, 41, 44);
  1085. g[1] = get_bits_uint64(data0, 45, 49);
  1086. b[3] |= get_bits_uint64(data0, 50, 50);
  1087. g[3] |= get_bits_uint64(data0, 51, 54);
  1088. b[1] = get_bits_uint64(data0, 55, 59);
  1089. b[3] |= get_bits_uint64(data0, 60, 60) << 1;
  1090. b[2] |= get_bits_uint64(data0, 61, 63);
  1091. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  1092. r[2] = get_bits_uint64(data1, 1, 5);
  1093. b[3] |= get_bits_uint64(data1, 6, 6) << 2;
  1094. r[3] = get_bits_uint64(data1, 7, 11);
  1095. b[3] |= get_bits_uint64(data1, 12, 12) << 3;
  1096. partition_set_id = get_bits_uint64(data1, 13, 17);
  1097. block.index = 64 + 18;
  1098. delta_bits_r = delta_bits_g = delta_bits_b = 5;
  1099. break;
  1100. case 6 : // Original mode 18
  1101. // m[4:0],r0[7:0],g3[4],b2[4],g0[7:0],b3[2],g2[4],b0[7:0],b3[3],b3[4],
  1102. // r1[5:0],g2[3:0],g1[4:0],b3[0],g3[3:0],b1[4:0],b3[1],b2[3:0],r2[5:0],r3[5:0]
  1103. r[0] = get_bits_uint64(data0, 5, 12);
  1104. g[3] = get_bits_uint64(data0, 13, 13) << 4;
  1105. b[2] = get_bits_uint64(data0, 14, 14) << 4;
  1106. g[0] = get_bits_uint64(data0, 15, 22);
  1107. b[3] = get_bits_uint64(data0, 23, 23) << 2;
  1108. g[2] = get_bits_uint64(data0, 24, 24) << 4;
  1109. b[0] = get_bits_uint64(data0, 25, 32);
  1110. b[3] |= get_bits_uint64(data0, 33, 33) << 3;
  1111. b[3] |= get_bits_uint64(data0, 34, 34) << 4;
  1112. r[1] = get_bits_uint64(data0, 35, 40);
  1113. g[2] |= get_bits_uint64(data0, 41, 44);
  1114. g[1] = get_bits_uint64(data0, 45, 49);
  1115. b[3] |= get_bits_uint64(data0, 50, 50);
  1116. g[3] |= get_bits_uint64(data0, 51, 54);
  1117. b[1] = get_bits_uint64(data0, 55, 59);
  1118. b[3] |= get_bits_uint64(data0, 60, 60) << 1;
  1119. b[2] |= get_bits_uint64(data0, 61, 63);
  1120. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  1121. r[2] = get_bits_uint64(data1, 1, 6);
  1122. r[3] = get_bits_uint64(data1, 7, 12);
  1123. partition_set_id = get_bits_uint64(data1, 13, 17);
  1124. block.index = 64 + 18;
  1125. delta_bits_r = 6;
  1126. delta_bits_g = delta_bits_b = 5;
  1127. break;
  1128. case 7 : // Original mode 22
  1129. // m[4:0],r0[7:0],b3[0],b2[4],g0[7:0],g2[5],g2[4],b0[7:0],g3[5],b3[4],
  1130. // r1[4:0],g3[4],g2[3:0],g1[5:0],g3[3:0],b1[4:0],b3[1],b2[3:0],r2[4:0],
  1131. // b3[2],r3[4:0],b3[3]
  1132. r[0] = get_bits_uint64(data0, 5, 12);
  1133. b[3] = get_bits_uint64(data0, 13, 13);
  1134. b[2] = get_bits_uint64(data0, 14, 14) << 4;
  1135. g[0] = get_bits_uint64(data0, 15, 22);
  1136. g[2] = get_bits_uint64(data0, 23, 23) << 5;
  1137. g[2] |= get_bits_uint64(data0, 24, 24) << 4;
  1138. b[0] = get_bits_uint64(data0, 25, 32);
  1139. g[3] = get_bits_uint64(data0, 33, 33) << 5;
  1140. b[3] |= get_bits_uint64(data0, 34, 34) << 4;
  1141. r[1] = get_bits_uint64(data0, 35, 39);
  1142. g[3] |= get_bits_uint64(data0, 40, 40) << 4;
  1143. g[2] |= get_bits_uint64(data0, 41, 44);
  1144. g[1] = get_bits_uint64(data0, 45, 50);
  1145. g[3] |= get_bits_uint64(data0, 51, 54);
  1146. b[1] = get_bits_uint64(data0, 55, 59);
  1147. b[3] |= get_bits_uint64(data0, 60, 60) << 1;
  1148. b[2] |= get_bits_uint64(data0, 61, 63);
  1149. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  1150. r[2] = get_bits_uint64(data1, 1, 5);
  1151. b[3] |= get_bits_uint64(data1, 6, 6) << 2;
  1152. r[3] = get_bits_uint64(data1, 7, 11);
  1153. b[3] |= get_bits_uint64(data1, 12, 12) << 3;
  1154. partition_set_id = get_bits_uint64(data1, 13, 17);
  1155. block.index = 64 + 18;
  1156. delta_bits_r = delta_bits_b = 5;
  1157. delta_bits_g = 6;
  1158. break;
  1159. case 8 : // Original mode 26
  1160. // m[4:0],r0[7:0],b3[1],b2[4],g0[7:0],b2[5],g2[4],b0[7:0],b3[5],b3[4],
  1161. // r1[4:0],g3[4],g2[3:0],g1[4:0],b3[0],g3[3:0],b1[5:0],b2[3:0],r2[4:0],
  1162. // b3[2],r3[4:0],b3[3]
  1163. r[0] = get_bits_uint64(data0, 5, 12);
  1164. b[3] = get_bits_uint64(data0, 13, 13) << 1;
  1165. b[2] = get_bits_uint64(data0, 14, 14) << 4;
  1166. g[0] = get_bits_uint64(data0, 15, 22);
  1167. b[2] |= get_bits_uint64(data0, 23, 23) << 5;
  1168. g[2] = get_bits_uint64(data0, 24, 24) << 4;
  1169. b[0] = get_bits_uint64(data0, 25, 32);
  1170. b[3] |= get_bits_uint64(data0, 33, 33) << 5;
  1171. b[3] |= get_bits_uint64(data0, 34, 34) << 4;
  1172. r[1] = get_bits_uint64(data0, 35, 39);
  1173. g[3] = get_bits_uint64(data0, 40, 40) << 4;
  1174. g[2] |= get_bits_uint64(data0, 41, 44);
  1175. g[1] = get_bits_uint64(data0, 45, 49);
  1176. b[3] |= get_bits_uint64(data0, 50, 50);
  1177. g[3] |= get_bits_uint64(data0, 51, 54);
  1178. b[1] = get_bits_uint64(data0, 55, 60);
  1179. b[2] |= get_bits_uint64(data0, 61, 63);
  1180. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  1181. r[2] = get_bits_uint64(data1, 1, 5);
  1182. b[3] |= get_bits_uint64(data1, 6, 6) << 2;
  1183. r[3] = get_bits_uint64(data1, 7, 11);
  1184. b[3] |= get_bits_uint64(data1, 12, 12) << 3;
  1185. partition_set_id = get_bits_uint64(data1, 13, 17);
  1186. block.index = 64 + 18;
  1187. delta_bits_r = delta_bits_g = 5;
  1188. delta_bits_b = 6;
  1189. break;
  1190. case 9 : // Original mode 30
  1191. // m[4:0],r0[5:0],g3[4],b3[0],b3[1],b2[4],g0[5:0],g2[5],b2[5],b3[2],
  1192. // g2[4],b0[5:0],g3[5],b3[3],b3[5],b3[4],r1[5:0],g2[3:0],g1[5:0],g3[3:0],
  1193. // b1[5:0],b2[3:0],r2[5:0],r3[5:0]
  1194. r[0] = get_bits_uint64(data0, 5, 10);
  1195. g[3] = get_bits_uint64(data0, 11, 11) << 4;
  1196. b[3] = get_bits_uint64(data0, 12, 13);
  1197. b[2] = get_bits_uint64(data0, 14, 14) << 4;
  1198. g[0] = get_bits_uint64(data0, 15, 20);
  1199. g[2] = get_bits_uint64(data0, 21, 21) << 5;
  1200. b[2] |= get_bits_uint64(data0, 22, 22) << 5;
  1201. b[3] |= get_bits_uint64(data0, 23, 23) << 2;
  1202. g[2] |= get_bits_uint64(data0, 24, 24) << 4;
  1203. b[0] = get_bits_uint64(data0, 25, 30);
  1204. g[3] |= get_bits_uint64(data0, 31, 31) << 5;
  1205. b[3] |= get_bits_uint64(data0, 32, 32) << 3;
  1206. b[3] |= get_bits_uint64(data0, 33, 33) << 5;
  1207. b[3] |= get_bits_uint64(data0, 34, 34) << 4;
  1208. r[1] = get_bits_uint64(data0, 35, 40);
  1209. g[2] |= get_bits_uint64(data0, 41, 44);
  1210. g[1] = get_bits_uint64(data0, 45, 50);
  1211. g[3] |= get_bits_uint64(data0, 51, 54);
  1212. b[1] = get_bits_uint64(data0, 55, 60);
  1213. b[2] |= get_bits_uint64(data0, 61, 63);
  1214. b[2] |= get_bits_uint64(data1, 0, 0) << 3;
  1215. r[2] = get_bits_uint64(data1, 1, 6);
  1216. r[3] = get_bits_uint64(data1, 7, 12);
  1217. partition_set_id = get_bits_uint64(data1, 13, 17);
  1218. block.index = 64 + 18;
  1219. // delta_bits_r = delta_bits_g = delta_bits_b = 6;
  1220. break;
  1221. case 10 : // Original mode 3
  1222. // m[4:0],r0[9:0],g0[9:0],b0[9:0],r1[9:0],g1[9:0],b1[9:0]
  1223. r[0] = get_bits_uint64(data0, 5, 14);
  1224. g[0] = get_bits_uint64(data0, 15, 24);
  1225. b[0] = get_bits_uint64(data0, 25, 34);
  1226. r[1] = get_bits_uint64(data0, 35, 44);
  1227. g[1] = get_bits_uint64(data0, 45, 54);
  1228. b[1] = get_bits_uint64(data0, 55, 63);
  1229. b[1] |= get_bits_uint64(data1, 0, 0) << 9;
  1230. partition_set_id = 0;
  1231. block.index = 65;
  1232. // delta_bits_r = delta_bits_g = delta_bits_b = 10;
  1233. break;
  1234. case 11 : // Original mode 7
  1235. // m[4:0],r0[9:0],g0[9:0],b0[9:0],r1[8:0],r0[10],g1[8:0],g0[10],b1[8:0],b0[10]
  1236. r[0] = get_bits_uint64(data0, 5, 14);
  1237. g[0] = get_bits_uint64(data0, 15, 24);
  1238. b[0] = get_bits_uint64(data0, 25, 34);
  1239. r[1] = get_bits_uint64(data0, 35, 43);
  1240. r[0] |= get_bits_uint64(data0, 44, 44) << 10;
  1241. g[1] = get_bits_uint64(data0, 45, 53);
  1242. g[0] |= get_bits_uint64(data0, 54, 54) << 10;
  1243. b[1] = get_bits_uint64(data0, 55, 63);
  1244. b[0] |= get_bits_uint64(data1, 0, 0) << 10;
  1245. partition_set_id = 0;
  1246. block.index = 65;
  1247. delta_bits_r = delta_bits_g = delta_bits_b = 9;
  1248. break;
  1249. case 12 : // Original mode 11
  1250. // m[4:0],r0[9:0],g0[9:0],b0[9:0],r1[7:0],r0[10:11],g1[7:0],g0[10:11],
  1251. // b1[7:0],b0[10:11]
  1252. r[0] = get_bits_uint64(data0, 5, 14);
  1253. g[0] = get_bits_uint64(data0, 15, 24);
  1254. b[0] = get_bits_uint64(data0, 25, 34);
  1255. r[1] = get_bits_uint64(data0, 35, 42);
  1256. r[0] |= get_reversed_bits_uint64(data0, 44, 43) << 10; // Reversed.
  1257. g[1] = get_bits_uint64(data0, 45, 52);
  1258. g[0] |= get_reversed_bits_uint64(data0, 54, 53) << 10; // Reversed.
  1259. b[1] = get_bits_uint64(data0, 55, 62);
  1260. b[0] |= get_bits_uint64(data0, 63, 63) << 11; // MSB
  1261. b[0] |= get_bits_uint64(data1, 0, 0) << 10; // LSB
  1262. partition_set_id = 0;
  1263. block.index = 65;
  1264. delta_bits_r = delta_bits_g = delta_bits_b = 8;
  1265. break;
  1266. case 13 : // Original mode 15
  1267. // m[4:0],r0[9:0],g0[9:0],b0[9:0],r1[3:0],r0[10:15],g1[3:0],g0[10:15],
  1268. // b1[3:0],b0[10:15]
  1269. r[0] = get_bits_uint64(data0, 5, 14);
  1270. g[0] = get_bits_uint64(data0, 15, 24);
  1271. b[0] = get_bits_uint64(data0, 25, 34);
  1272. r[1] = get_bits_uint64(data0, 35, 38);
  1273. r[0] |= get_reversed_bits_uint64(data0, 44, 39) << 10; // Reversed.
  1274. g[1] = get_bits_uint64(data0, 45, 48);
  1275. g[0] |= get_reversed_bits_uint64(data0, 54, 49) << 10; // Reversed.
  1276. b[1] = get_bits_uint64(data0, 55, 58);
  1277. b[0] |= get_reversed_bits_uint64(data0, 63, 59) << 11; // Reversed.
  1278. b[0] |= get_bits_uint64(data1, 0, 0) << 10;
  1279. partition_set_id = 0;
  1280. block.index = 65;
  1281. delta_bits_r = delta_bits_g = delta_bits_b = 4;
  1282. break;
  1283. }
  1284. int nu_subsets;
  1285. if (mode >= 10)
  1286. nu_subsets = 1;
  1287. else
  1288. nu_subsets = 2;
  1289. if (signed_flag) {
  1290. r[0] = sign_extend(r[0], bptc_float_EPB[mode], 32);
  1291. g[0] = sign_extend(g[0], bptc_float_EPB[mode], 32);
  1292. b[0] = sign_extend(b[0], bptc_float_EPB[mode], 32);
  1293. }
  1294. if (mode != 9 && mode != 10) {
  1295. // Transformed endpoints.
  1296. for (int i = 1; i < nu_subsets * 2; i++) {
  1297. r[i] = sign_extend(r[i], delta_bits_r, 32);
  1298. r[i] = (r[0] + r[i]) & (((uint32_t)1 << bptc_float_EPB[mode]) - 1);
  1299. g[i] = sign_extend(g[i], delta_bits_g, 32);
  1300. g[i] = (g[0] + g[i]) & (((uint32_t)1 << bptc_float_EPB[mode]) - 1);
  1301. b[i] = sign_extend(b[i], delta_bits_b, 32);
  1302. b[i] = (b[0] + b[i]) & (((uint32_t)1 << bptc_float_EPB[mode]) - 1);
  1303. if (signed_flag) {
  1304. r[i] = sign_extend(r[i], bptc_float_EPB[mode], 32);
  1305. g[i] = sign_extend(g[i], bptc_float_EPB[mode], 32);
  1306. b[i] = sign_extend(b[i], bptc_float_EPB[mode], 32);
  1307. }
  1308. }
  1309. }
  1310. else // Mode 9 or 10, no transformed endpoints.
  1311. if (signed_flag)
  1312. for (int i = 1; i < nu_subsets * 2; i++) {
  1313. r[i] = sign_extend(r[i], bptc_float_EPB[mode], 32);
  1314. g[i] = sign_extend(g[i], bptc_float_EPB[mode], 32);
  1315. b[i] = sign_extend(b[i], bptc_float_EPB[mode], 32);
  1316. }
  1317. // Unquantize endpoints.
  1318. if (signed_flag)
  1319. for (int i = 0; i < 2 * nu_subsets; i++) {
  1320. r[i] = unquantize_signed(r[i], mode);
  1321. g[i] = unquantize_signed(g[i], mode);
  1322. b[i] = unquantize_signed(b[i], mode);
  1323. }
  1324. else
  1325. for (int i = 0; i < 2 * nu_subsets; i++) {
  1326. r[i] = unquantize(r[i], mode);
  1327. g[i] = unquantize(g[i], mode);
  1328. b[i] = unquantize(b[i], mode);
  1329. }
  1330. uint8_t subset_index[16];
  1331. for (int i = 0; i < 16; i++) {
  1332. // subset_index[i] is a number from 0 to 1, depending on the number of subsets.
  1333. subset_index[i] = bptc_float_get_partition_index(nu_subsets, partition_set_id, i);
  1334. }
  1335. uint8_t anchor_index[4]; // Only need max. 2 elements
  1336. for (int i = 0; i < nu_subsets; i++)
  1337. anchor_index[i] = get_anchor_index(partition_set_id, i, nu_subsets);
  1338. uint8_t color_index[16];
  1339. // Extract index bits.
  1340. int color_index_bit_count = 3;
  1341. if ((bitstring[0] & 3) == 3) // This defines original modes 3, 7, 11, 15
  1342. color_index_bit_count = 4;
  1343. // Because the index bits are all in the second 64-bit word, there is no need to use
  1344. // block_extract_bits().
  1345. data1 >>= (block.index - 64);
  1346. uint8_t mask1 = (1 << color_index_bit_count) - 1;
  1347. uint8_t mask2 = (1 << (color_index_bit_count - 1)) - 1;
  1348. for (int i = 0; i < 16; i++) {
  1349. if (i == anchor_index[subset_index[i]]) {
  1350. // Highest bit is zero.
  1351. // color_index[i] = block_extract_bits(&block, color_index_bit_count - 1);
  1352. color_index[i] = data1 & mask2;
  1353. data1 >>= color_index_bit_count - 1;
  1354. }
  1355. else {
  1356. // color_index[i] = block_extract_bits(&block, color_index_bit_count);
  1357. color_index[i] = data1 & mask1;
  1358. data1 >>= color_index_bit_count;
  1359. }
  1360. }
  1361. for (int i = 0; i < 16; i++) {
  1362. int32_t endpoint_start_r, endpoint_start_g, endpoint_start_b;
  1363. int32_t endpoint_end_r, endpoint_end_g, endpoint_end_b;
  1364. endpoint_start_r = r[2 * subset_index[i]];
  1365. endpoint_end_r = r[2 * subset_index[i] + 1];
  1366. endpoint_start_g = g[2 * subset_index[i]];
  1367. endpoint_end_g = g[2 * subset_index[i] + 1];
  1368. endpoint_start_b = b[2 * subset_index[i]];
  1369. endpoint_end_b = b[2 * subset_index[i] + 1];
  1370. uint64_t output;
  1371. if (signed_flag) {
  1372. int32_t r16 = interpolate_float(endpoint_start_r, endpoint_end_r, color_index[i],
  1373. color_index_bit_count);
  1374. if (r16 < 0)
  1375. r16 = - (((- r16) * 31) >> 5);
  1376. else
  1377. r16 = (r16 * 31) >> 5;
  1378. int s = 0;
  1379. if (r16 < 0) {
  1380. s = 0x8000;
  1381. r16 = - r16;
  1382. }
  1383. r16 |= s;
  1384. int32_t g16 = interpolate_float(endpoint_start_g, endpoint_end_g, color_index[i],
  1385. color_index_bit_count);
  1386. if (g16 < 0)
  1387. g16 = - (((- g16) * 31) >> 5);
  1388. else
  1389. g16 = (g16 * 31) >> 5;
  1390. s = 0;
  1391. if (g16 < 0) {
  1392. s = 0x8000;
  1393. g16 = - g16;
  1394. }
  1395. g16 |= s;
  1396. int32_t b16 = interpolate_float(endpoint_start_b, endpoint_end_b, color_index[i],
  1397. color_index_bit_count);
  1398. if (b16 < 0)
  1399. b16 = - (((- b16) * 31) >> 5);
  1400. else
  1401. b16 = (b16 * 31) >> 5;
  1402. s = 0;
  1403. if (b16 < 0) {
  1404. s = 0x8000;
  1405. b16 = - b16;
  1406. }
  1407. b16 |= s;
  1408. output = pack_rgb16(r16, g16, b16);
  1409. }
  1410. else {
  1411. output = pack_r16(interpolate_float(endpoint_start_r, endpoint_end_r, color_index[i],
  1412. color_index_bit_count) * 31 / 64);
  1413. output |= pack_g16(interpolate_float(endpoint_start_g, endpoint_end_g, color_index[i],
  1414. color_index_bit_count) * 31 / 64);
  1415. output |= pack_b16(interpolate_float(endpoint_start_b, endpoint_end_b, color_index[i],
  1416. color_index_bit_count) * 31 / 64);
  1417. }
  1418. *(uint64_t *)&image_buffer[i * 2] = output;
  1419. }
  1420. return 1;
  1421. }
  1422. int draw_block4x4_bptc_float(const unsigned char *bitstring, unsigned int *image_buffer, int flags) {
  1423. return draw_block4x4_bptc_float_shared(bitstring, image_buffer, 0, flags);
  1424. }
  1425. int draw_block4x4_bptc_signed_float(const unsigned char *bitstring, unsigned int *image_buffer, int flags) {
  1426. return draw_block4x4_bptc_float_shared(bitstring, image_buffer, 1, flags);
  1427. }