2
0

BC6HDecode.hlsl 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. // RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
  2. // CHECK: groupId
  3. // CHECK: flattenedThreadIdInGroup
  4. // CHECK: bufferStore
  5. //--------------------------------------------------------------------------------------
  6. // File: BC6HDecode.hlsl
  7. //
  8. // The Compute Shader for BC6 Decoder
  9. //
  10. // Copyright (c) Microsoft Corporation. All rights reserved.
  11. //--------------------------------------------------------------------------------------
  12. //#define REF_DEVICE
  13. #define UINTLENGTH 32
  14. #define NCHANNELS 3
  15. #define SIGNED_F16 96
  16. #define UNSIGNED_F16 95
  17. cbuffer cbCS : register( b0 )
  18. {
  19. uint g_tex_width;
  20. uint g_num_block_x;
  21. uint g_format;
  22. uint g_tex_size;
  23. uint g_start_block_id;
  24. };
  25. struct Mode
  26. {
  27. uint type;
  28. bool transformed;
  29. uint4 prec;
  30. };
  31. static const uint candidateModeFlag[14] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
  32. static const uint4 candidateModePrec[14] = { uint4(10,5,5,5), uint4(7,6,6,6),
  33. uint4(11,5,4,4), uint4(11,4,5,4), uint4(11,4,4,5), uint4(9,5,5,5),
  34. uint4(8,6,5,5), uint4(8,5,6,5), uint4(8,5,5,6), uint4(6,6,6,6),
  35. uint4(10,10,10,10), uint4(11,9,9,9), uint4(12,8,8,8), uint4(16,4,4,4) };
  36. static const uint candidateSectionBit[32] =
  37. {
  38. 0xCCCC, 0x8888, 0xEEEE, 0xECC8,
  39. 0xC880, 0xFEEC, 0xFEC8, 0xEC80,
  40. 0xC800, 0xFFEC, 0xFE80, 0xE800,
  41. 0xFFE8, 0xFF00, 0xFFF0, 0xF000,
  42. 0xF710, 0x008E, 0x7100, 0x08CE,
  43. 0x008C, 0x7310, 0x3100, 0x8CCE,
  44. 0x088C, 0x3110, 0x6666, 0x366C,
  45. 0x17E8, 0x0FF0, 0x718E, 0x399C
  46. };
  47. int extract_mode_index( uint4 block );
  48. void extract_compressed_endpoints10( out int3 endPoint, uint mode_type, uint4 block );
  49. void extract_compressed_endpoints11( out int3 endPoint, uint mode_type, uint4 block );
  50. void SIGN_EXTEND( uint3 prec, inout int3 color );
  51. uint extract_index_ONE( uint x, uint y, uint4 block );
  52. void extract_compressed_endpoints20( out int3 endPoint, uint mode_type, uint4 block );
  53. void extract_compressed_endpoints21( out int3 endPoint, uint mode_type, uint4 block );
  54. void extract_compressed_endpoints22( out int3 endPoint, uint mode_type, uint4 block );
  55. void extract_compressed_endpoints23( out int3 endPoint, uint mode_type, uint4 block );
  56. uint extract_index_TWO( uint x, uint y, uint partition_index, uint4 block );
  57. void unquantize( inout int3 color, uint prec );
  58. void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, uint prec, int i );
  59. void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, uint prec, int i );
  60. uint3 finish_unquantize( int3 color );
  61. StructuredBuffer<uint4> g_InBuff : register( t0 );
  62. RWStructuredBuffer<uint4> g_OutBuff : register( u0 );
  63. #define THREAD_GROUP_SIZE 64
  64. #define BLOCK_SIZE_Y 4
  65. #define BLOCK_SIZE_X 4
  66. #define BLOCK_SIZE (BLOCK_SIZE_Y * BLOCK_SIZE_X)
  67. #define BLOCK_IN_GROUP (THREAD_GROUP_SIZE / BLOCK_SIZE)
  68. groupshared int4 shared_temp[THREAD_GROUP_SIZE];
  69. [numthreads( THREAD_GROUP_SIZE, 1, 1 )]
  70. void main( uint3 groupID : SV_GroupID, uint GI : SV_GroupIndex )
  71. {
  72. uint blockInGroup = GI / BLOCK_SIZE;
  73. uint blockID = g_start_block_id + groupID.x * BLOCK_IN_GROUP + blockInGroup;
  74. uint threadInBlock = GI - blockInGroup * BLOCK_SIZE;
  75. if (4 == threadInBlock)
  76. {
  77. shared_temp[GI] = asint(g_InBuff[blockID]);
  78. }
  79. #ifdef REF_DEVICE
  80. GroupMemoryBarrierWithGroupSync();
  81. #endif
  82. uint4 bc_data = asuint(shared_temp[blockInGroup * BLOCK_SIZE + 4]);
  83. int mode_index = extract_mode_index(bc_data);
  84. Mode mode;
  85. mode.type = mode_index + 1;
  86. mode.prec = candidateModePrec[mode_index];
  87. mode.transformed = (9 == mode_index) || (10 == mode_index) ? false : true;
  88. int3 ep = 0;
  89. if (0 == threadInBlock)
  90. {
  91. if ( mode.type > 10 )
  92. {
  93. extract_compressed_endpoints10( ep, mode.type, bc_data );
  94. }
  95. else
  96. {
  97. extract_compressed_endpoints20( ep, mode.type, bc_data );
  98. }
  99. if ( g_format == SIGNED_F16 )
  100. SIGN_EXTEND( mode.prec.x, ep );
  101. shared_temp[GI] = ep.xyzz;
  102. }
  103. else if (threadInBlock < 4)
  104. {
  105. if (1 == threadInBlock)
  106. {
  107. if ( mode.type > 10 )
  108. {
  109. extract_compressed_endpoints11( ep, mode.type, bc_data );
  110. }
  111. else
  112. {
  113. extract_compressed_endpoints21( ep, mode.type, bc_data );
  114. }
  115. }
  116. else
  117. {
  118. if ( mode.type <= 10 )
  119. {
  120. if (2 == threadInBlock)
  121. {
  122. extract_compressed_endpoints22( ep, mode.type, bc_data );
  123. }
  124. else
  125. {
  126. extract_compressed_endpoints23( ep, mode.type, bc_data );
  127. }
  128. }
  129. }
  130. }
  131. #ifdef REF_DEVICE
  132. GroupMemoryBarrierWithGroupSync();
  133. #endif
  134. if (threadInBlock < 4)
  135. {
  136. if ((1 == threadInBlock) || ((threadInBlock > 1) && (mode.type <= 10)))
  137. {
  138. if ( mode.transformed || g_format == SIGNED_F16 )
  139. SIGN_EXTEND( mode.prec.yzw, ep );
  140. if (mode.transformed)
  141. {
  142. ep += shared_temp[blockInGroup * BLOCK_SIZE + 0];
  143. }
  144. }
  145. unquantize( ep, mode.prec.x );
  146. shared_temp[GI] = ep.xyzz;
  147. }
  148. #ifdef REF_DEVICE
  149. GroupMemoryBarrierWithGroupSync();
  150. #endif
  151. uint y = threadInBlock / BLOCK_SIZE_X;
  152. uint x = threadInBlock - y * BLOCK_SIZE_X;
  153. uint block_y = blockID / g_num_block_x;
  154. uint block_x = blockID - block_y * g_num_block_x;
  155. uint addr = (block_y * BLOCK_SIZE_Y + y) * g_tex_width + block_x * BLOCK_SIZE_X + x;
  156. if (addr < g_tex_size)
  157. {
  158. int weight;
  159. uint3 palette;
  160. uint ep_index = blockInGroup * BLOCK_SIZE;
  161. if ( mode.type > 10 )
  162. {
  163. static const int aWeight4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
  164. uint index = extract_index_ONE( x, y, bc_data );
  165. weight = aWeight4[index];
  166. }
  167. else
  168. {
  169. uint partition_index = ( bc_data.z & 0x0003E000 ) >> 13;
  170. uint bit = (candidateSectionBit[partition_index] >> (y * 4 + x)) & 1;
  171. ep_index += bit * 2;
  172. static const int aWeight3[] = {0, 9, 18, 27, 37, 46, 55, 64};
  173. uint index = extract_index_TWO( x, y, partition_index, bc_data );
  174. weight = aWeight3[index];
  175. }
  176. int3 low = shared_temp[ep_index + 0].xyz;
  177. int3 high = shared_temp[ep_index + 1].xyz;
  178. palette = finish_unquantize(((low << 6) + (high - low) * weight + 32 ) >> 6);
  179. g_OutBuff[addr] = uint4( palette, 0x3C00 );
  180. }
  181. }
  182. static const uint candidateModeMask[2] = { 0x03, 0x1f };
  183. static const uint candidateModeMemory[14] = { 0x00, 0x01,
  184. 0x02, 0x06, 0x0A, 0x0E, 0x12, 0x16, 0x1A, 0x1E, 0x03, 0x07, 0x0B, 0x0F };
  185. int extract_mode_index( uint4 block )
  186. {
  187. int mode_index;
  188. uint type = block.r & candidateModeMask[0];
  189. if ( type == candidateModeMemory[0] )
  190. {
  191. mode_index = 0;
  192. }
  193. else if ( type == candidateModeMemory[1] )
  194. {
  195. mode_index = 1;
  196. }
  197. else
  198. {
  199. type = block.r & candidateModeMask[1];
  200. if ( type == candidateModeMemory[2] )
  201. {
  202. mode_index = 2;
  203. }
  204. else if ( type == candidateModeMemory[3] )
  205. {
  206. mode_index = 3;
  207. }
  208. else if ( type == candidateModeMemory[4] )
  209. {
  210. mode_index = 4;
  211. }
  212. else if ( type == candidateModeMemory[5] )
  213. {
  214. mode_index = 5;
  215. }
  216. else if ( type == candidateModeMemory[6] )
  217. {
  218. mode_index = 6;
  219. }
  220. else if ( type == candidateModeMemory[7] )
  221. {
  222. mode_index = 7;
  223. }
  224. else if ( type == candidateModeMemory[8] )
  225. {
  226. mode_index = 8;
  227. }
  228. else if ( type == candidateModeMemory[9] )
  229. {
  230. mode_index = 9;
  231. }
  232. else if ( type == candidateModeMemory[10] )
  233. {
  234. mode_index = 10;
  235. }
  236. else if ( type == candidateModeMemory[11] )
  237. {
  238. mode_index = 11;
  239. }
  240. else if ( type == candidateModeMemory[12] )
  241. {
  242. mode_index = 12;
  243. }
  244. else if ( type == candidateModeMemory[13] )
  245. {
  246. mode_index = 13;
  247. }
  248. }
  249. return mode_index;
  250. }
  251. void SIGN_EXTEND( uint3 prec, inout int3 color )
  252. {
  253. uint3 p = 1 << (prec - 1);
  254. color = (color & p) ? (color & (p - 1)) - p : color;
  255. }
  256. void sign_extend( Mode mode, inout int2x3 endPoint[1] )
  257. {
  258. if ( g_format == SIGNED_F16 )
  259. SIGN_EXTEND( mode.prec.x, endPoint[0][0] );
  260. if ( mode.transformed || g_format == SIGNED_F16 )
  261. SIGN_EXTEND( mode.prec.yzw, endPoint[0][1] );
  262. }
  263. void sign_extend( Mode mode, inout int2x3 endPoint[2] )
  264. {
  265. if ( g_format == SIGNED_F16 )
  266. SIGN_EXTEND( mode.prec.x, endPoint[0][0] );
  267. if ( mode.transformed || g_format == SIGNED_F16 )
  268. {
  269. SIGN_EXTEND( mode.prec.yzw, endPoint[0][1] );
  270. SIGN_EXTEND( mode.prec.yzw, endPoint[1][0] );
  271. SIGN_EXTEND( mode.prec.yzw, endPoint[1][1] );
  272. }
  273. }
  274. void extract_compressed_endpoints( out int2x3 endPoint[1], uint mode_type, uint4 block )
  275. {
  276. if ( mode_type == candidateModeFlag[10])
  277. {
  278. endPoint[0][0].r = ( block.x & 0x00007FE0 ) >> 5;
  279. endPoint[0][0].g = ( block.x & 0x01FF8000 ) >> 15;
  280. endPoint[0][0].b = ( ( block.y & 0x00000007 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  281. endPoint[0][1].r = ( block.y & 0x00001FF8 ) >> 3;
  282. endPoint[0][1].g = ( block.y & 0x007FE000 ) >> 13;
  283. endPoint[0][1].b = ( ( block.z & 0x00000001 ) << 9 ) | ( ( block.y & 0xFF800000 ) >> 23 );
  284. }
  285. else if (mode_type == candidateModeFlag[11])
  286. {
  287. endPoint[0][0].r = ( ( block.y & 0x00001000 ) >> 2 ) | ( ( block.x & 0x00007FE0 ) >> 5 );
  288. endPoint[0][0].g = ( ( block.y & 0x00400000 ) >> 12 ) | ( ( block.x & 0x01FF8000 ) >> 15 );
  289. endPoint[0][0].b = ( ( block.z & 0x00000001 ) << 10 ) | ( ( block.y & 0x00000007 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  290. endPoint[0][1].r = ( block.y & 0x00000FF8 ) >> 3;
  291. endPoint[0][1].g = ( block.y & 0x003FE000 ) >> 13;
  292. endPoint[0][1].b = ( block.y & 0xFF800000 ) >> 23;
  293. }
  294. else if (mode_type == candidateModeFlag[12])// violate the spec in [0][0]
  295. {
  296. endPoint[0][0].r = ( ( block.y & 0x00000800 ) >> 0 ) | ( ( block.y & 0x00001000 ) >> 2 ) | ( ( block.x & 0x00007FE0 ) >> 5 );
  297. endPoint[0][0].g = ( ( block.y & 0x00200000 ) >> 10 ) | ( ( block.y & 0x00400000 ) >> 12 ) | ( ( block.x & 0x01FF8000 ) >> 15 );
  298. endPoint[0][0].b = ( ( block.y & 0x80000000 ) >> 20 ) | ( ( block.z & 0x00000001 ) << 10) | ( ( block.y & 0x00000007 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  299. endPoint[0][1].r = ( block.y & 0x000007F8 ) >> 3;
  300. endPoint[0][1].g = ( block.y & 0x001FE000 ) >> 13;
  301. endPoint[0][1].b = ( block.y & 0x7F800000 ) >> 23;
  302. }
  303. else if (mode_type == candidateModeFlag[13])
  304. {
  305. endPoint[0][0].r = ( ( block.y & 0x00001F80 ) << 3 ) | ( ( block.x & 0x00007FE0 ) >> 5 );
  306. endPoint[0][0].g = ( ( block.y & 0x007E0000 ) >> 7 ) | ( ( block.x & 0x01FF8000 ) >> 15 );
  307. endPoint[0][0].b = ( ( block.y & 0xF8000000 ) >> 17 ) | ( ( block.z & 0x00000001 ) << 15) | ( ( block.y & 0x00000007 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  308. endPoint[0][1].r = ( block.y & 0x00000078 ) >> 3;
  309. endPoint[0][1].g = ( block.y & 0x0001E000 ) >> 13;
  310. endPoint[0][1].b = ( block.y & 0x07800000 ) >> 23;
  311. }
  312. }
  313. void extract_compressed_endpoints10( out int3 endPoint, uint mode_type, uint4 block )
  314. {
  315. if ( mode_type == candidateModeFlag[10])
  316. {
  317. endPoint = (block.x & uint3(0x00007FE0, 0x01FF8000, 0xFE000000)) >> uint3(5, 15, 25);
  318. endPoint.b |= ( ( block.y & 0x00000007 ) << 7 );
  319. }
  320. else if (mode_type == candidateModeFlag[11])
  321. {
  322. endPoint = (block.x & uint3(0x00007FE0, 0x01FF8000, 0xFE000000)) >> uint3(5, 15, 25);
  323. endPoint.rg |= (block.y & uint2(0x00001000, 0x00400000)) >> uint2(2, 12);
  324. endPoint.b |= ( ( block.z & 0x00000001 ) << 10 ) | ( ( block.y & 0x00000007 ) << 7 );
  325. }
  326. else if (mode_type == candidateModeFlag[12])// violate the spec in [0][0]
  327. {
  328. endPoint = (block.x & uint3(0x00007FE0, 0x01FF8000, 0xFE000000)) >> uint3(5, 15, 25)
  329. | (block.y & uint3(0x00000800, 0x00200000, 0x80000000)) >> uint3(0, 10, 20);
  330. endPoint.rg |= (block.y & uint2(0x00001000, 0x00400000)) >> uint2(2, 12);
  331. endPoint.b |= ( ( block.z & 0x00000001 ) << 10) | ( ( block.y & 0x00000007 ) << 7 );
  332. }
  333. else if (mode_type == candidateModeFlag[13])
  334. {
  335. endPoint = (block.x & uint3(0x00007FE0, 0x01FF8000, 0xFE000000)) >> uint3(5, 15, 25);
  336. endPoint.rb |= (block.y & uint2(0x00001F80, 0x00000007)) << uint2(3, 7);
  337. endPoint.gb |= (block.y & uint2(0x007E0000, 0xF8000000)) >> uint2(7, 17);
  338. endPoint.b |= (block.z & 0x00000001) << 15;
  339. }
  340. }
  341. void extract_compressed_endpoints11( out int3 endPoint, uint mode_type, uint4 block )
  342. {
  343. if ( mode_type == candidateModeFlag[10])
  344. {
  345. endPoint = (block.y & uint3(0x00001FF8, 0x007FE000, 0xFF800000)) >> uint3(3, 13, 23);
  346. endPoint.b |= ( block.z & 0x00000001 ) << 9;
  347. }
  348. else if (mode_type == candidateModeFlag[11])
  349. {
  350. endPoint = (block.y & uint3(0x00000FF8, 0x003FE000, 0xFF800000)) >> uint3(3, 13, 23);
  351. }
  352. else if (mode_type == candidateModeFlag[12])// violate the spec in [0][0]
  353. {
  354. endPoint = (block.y & uint3(0x000007F8, 0x001FE000, 0x7F800000)) >> uint3(3, 13, 23);
  355. }
  356. else if (mode_type == candidateModeFlag[13])
  357. {
  358. endPoint = (block.y & uint3(0x00000078, 0x0001E000, 0x07800000)) >> uint3(3, 13, 23);
  359. }
  360. }
  361. uint extract_index_ONE( uint x, uint y, uint4 block )
  362. {
  363. if ( x == 0 && y == 0)
  364. return ( block.z >> 1) & 0x00000007;
  365. if ( y < 2 )
  366. return ( block.z >> ( y * 16 + x * 4 ) ) & 0x0000000F;
  367. return ( block.w >> ( ( y-2 ) * 16 + x * 4 ) ) & 0x0000000F;
  368. }
  369. //void extract_partition( out Partition partition, uint4 block )
  370. //{
  371. /*static const uint4x4 candidateSection[32] =
  372. {
  373. {0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1}, {0,0,0,1, 0,0,0,1, 0,0,0,1, 0,0,0,1}, {0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1}, {0,0,0,1, 0,0,1,1, 0,0,1,1, 0,1,1,1},
  374. {0,0,0,0, 0,0,0,1, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,1, 0,0,1,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,0,1,1, 0,1,1,1},
  375. {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,1,1}, {0,0,1,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,1, 0,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,1,1},
  376. {0,0,0,1, 0,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1}, {0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1},
  377. {0,0,0,0, 1,0,0,0, 1,1,1,0, 1,1,1,1}, {0,1,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,1,0}, {0,1,1,1, 0,0,1,1, 0,0,0,1, 0,0,0,0},
  378. {0,0,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,1,0,0, 1,1,1,0}, {0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,1, 0,0,1,1, 0,0,1,1, 0,0,0,1},
  379. {0,0,1,1, 0,0,0,1, 0,0,0,1, 0,0,0,0}, {0,0,0,0, 1,0,0,0, 1,0,0,0, 1,1,0,0}, {0,1,1,0, 0,1,1,0, 0,1,1,0, 0,1,1,0}, {0,0,1,1, 0,1,1,0, 0,1,1,0, 1,1,0,0},
  380. {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0}, {0,0,0,0, 1,1,1,1, 1,1,1,1, 0,0,0,0}, {0,1,1,1, 0,0,0,1, 1,0,0,0, 1,1,1,0}, {0,0,1,1, 1,0,0,1, 1,0,0,1, 1,1,0,0}
  381. };*/
  382. // partition.index = ( block.z & 0x0003E000 ) >> 13;
  383. //}
  384. void extract_compressed_endpoints( out int2x3 endPoint[2], uint mode_type, uint4 block )
  385. {
  386. if ( mode_type == candidateModeFlag[0])
  387. {
  388. endPoint[0][0].r = ( block.x & 0x00007FE0 ) >> 5;
  389. endPoint[0][0].g = ( block.x & 0x01FF8000 ) >> 15;
  390. endPoint[0][0].b = ( ( block.y & 0x00000007 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  391. endPoint[0][1].r = ( block.y & 0x000000F8 ) >> 3;
  392. endPoint[0][1].g = ( block.y & 0x0003E000 ) >> 13;
  393. endPoint[0][1].b = ( block.y & 0x0F800000 ) >> 23;
  394. endPoint[1][0].r = ( block.z & 0x0000003E ) >> 1;
  395. endPoint[1][0].g = ( ( block.x & 0x00000004 ) << 2 ) | ( ( block.y & 0x00001E00 ) >> 9 );
  396. endPoint[1][0].b = ( ( block.x & 0x00000008 ) << 1 ) | ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  397. endPoint[1][1].r = ( block.z & 0x00000F80 ) >> 7;
  398. endPoint[1][1].g = ( ( block.y & 0x00000100 ) >> 4 ) | ( ( block.y & 0x00780000 ) >> 19 );
  399. endPoint[1][1].b = ( ( block.x & 0x00000010 ) >> 0 ) | ( ( block.z & 0x00001000 ) >> 9 ) | ( ( block.z & 0x00000040 ) >> 4 ) | ( ( block.y & 0x10000000 ) >> 27 ) | ( ( block.y & 0x00040000 ) >> 18 );
  400. }
  401. else if ( mode_type == candidateModeFlag[1])
  402. {
  403. endPoint[0][0].r = ( block.x & 0x00000FE0 ) >> 5;
  404. endPoint[0][0].g = ( block.x & 0x003F8000 ) >> 15;
  405. endPoint[0][0].b = ( block.x & 0xFE000000 ) >> 25;
  406. endPoint[0][1].r = ( block.y & 0x000001F8 ) >> 3;
  407. endPoint[0][1].g = ( block.y & 0x0007E000 ) >> 13;
  408. endPoint[0][1].b = ( block.y & 0x1F800000 ) >> 23;
  409. endPoint[1][0].r = ( block.z & 0x0000007E ) >> 1;
  410. endPoint[1][0].g = ( ( block.x & 0x00000004 ) << 3 ) | ( ( block.x & 0x01000000 ) >> 20 ) | ( ( block.y & 0x00001E00 ) >> 9 );
  411. endPoint[1][0].b = ( ( block.x & 0x00400000 ) >> 17 ) | ( ( block.x & 0x00004000 ) >> 10 ) | ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  412. endPoint[1][1].r = ( block.z & 0x00001F80 ) >> 7;
  413. endPoint[1][1].g = ( ( block.x & 0x00000018 ) << 1 ) | ( ( block.y & 0x00780000 ) >> 19 );
  414. endPoint[1][1].b = ( ( block.y & 0x00000002 ) << 4 ) | ( ( block.y & 0x00000004 ) << 2 ) | ( ( block.y & 0x00000001 ) << 3 ) | ( ( block.x & 0x00800000 ) >> 21 ) | ( ( block.x & 0x00003000 ) >> 12 );
  415. }
  416. else if ( mode_type == candidateModeFlag[2])
  417. {
  418. endPoint[0][0].r = ( ( block.y & 0x00000100 ) << 2 ) | ( ( block.x & 0x00007FE0 ) >> 5 );// fixed a bug in v0.31
  419. endPoint[0][0].g = ( ( block.y & 0x00020000 ) >> 7 ) | ( ( block.x & 0x01FF8000 ) >> 15 );// fixed a bug in v0.31
  420. endPoint[0][0].b = ( ( block.y & 0x08000000 ) >> 17 ) | ( ( block.y & 0x00000007 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );// fixed a bug in v0.31
  421. endPoint[0][1].r = ( block.y & 0x000000F8 ) >> 3;
  422. endPoint[0][1].g = ( block.y & 0x0001E000 ) >> 13;
  423. endPoint[0][1].b = ( block.y & 0x07800000 ) >> 23;
  424. endPoint[1][0].r = ( block.z & 0x0000003E ) >> 1;
  425. endPoint[1][0].g = ( block.y & 0x00001E00 ) >> 9;
  426. endPoint[1][0].b = ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  427. endPoint[1][1].r = ( block.z & 0x00000F80 ) >> 7;
  428. endPoint[1][1].g = ( block.y & 0x00780000 ) >> 19;
  429. endPoint[1][1].b = ( ( block.z & 0x00001000 ) >> 9 ) | ( ( block.z & 0x00000040 ) >> 4 ) | ( ( block.y & 0x10000000 ) >> 27 ) | ( ( block.y & 0x00040000 ) >> 18 );
  430. }
  431. else if ( mode_type == candidateModeFlag[3])
  432. {
  433. endPoint[0][0].r = ( ( block.y & 0x00000080 ) << 3 ) | ( ( block.x & 0x00007FE0 ) >> 5 );// fixed a bug in v0.31
  434. endPoint[0][0].g = ( ( block.y & 0x00040000 ) >> 8 ) | ( ( block.x & 0x01FF8000 ) >> 15 );// fixed a bug in v0.31
  435. endPoint[0][0].b = ( ( block.y & 0x08000000 ) >> 17 ) | ( ( block.y & 0x00000007 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );// fixed a bug in v0.31
  436. endPoint[0][1].r = ( block.y & 0x00000078 ) >> 3;
  437. endPoint[0][1].g = ( block.y & 0x0003E000 ) >> 13;
  438. endPoint[0][1].b = ( block.y & 0x07800000 ) >> 23;
  439. endPoint[1][0].r = ( block.z & 0x0000001E ) >> 1;
  440. endPoint[1][0].g = ( ( block.z & 0x00000800 ) >> 7 ) | ( ( block.y & 0x00001E00 ) >> 9 );
  441. endPoint[1][0].b = ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  442. endPoint[1][1].r = ( block.z & 0x00000780 ) >> 7;
  443. endPoint[1][1].g = ( ( block.y & 0x00000100 ) >> 4) | ( ( block.y & 0x00780000 ) >> 19 );
  444. endPoint[1][1].b = ( ( block.z & 0x00001000 ) >> 9 ) | ( ( block.z & 0x00000040 ) >> 4 ) | ( ( block.y & 0x10000000 ) >> 27 ) | ( ( block.z & 0x00000020 ) >> 5 );
  445. }
  446. else if ( mode_type == candidateModeFlag[4])
  447. {
  448. endPoint[0][0].r = ( ( block.y & 0x00000080 ) << 3 ) | ( ( block.x & 0x00007FE0 ) >> 5 );// fixed a bug in v0.31
  449. endPoint[0][0].g = ( ( block.y & 0x00020000 ) >> 7 ) | ( ( block.x & 0x01FF8000 ) >> 15 );// fixed a bug in v0.31
  450. endPoint[0][0].b = ( ( block.y & 0x10000000 ) >> 18 ) | ( ( block.y & 0x00000007 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );// fixed a bug in v0.31
  451. endPoint[0][1].r = ( block.y & 0x00000078 ) >> 3;
  452. endPoint[0][1].g = ( block.y & 0x0001E000 ) >> 13;
  453. endPoint[0][1].b = ( block.y & 0x0F800000 ) >> 23;
  454. endPoint[1][0].r = ( block.z & 0x0000001E ) >> 1;
  455. endPoint[1][0].g = ( block.y & 0x00001E00 ) >> 9;
  456. endPoint[1][0].b = ( ( block.y & 0x00000100 ) >> 4 ) | ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  457. endPoint[1][1].r = ( block.z & 0x00000780 ) >> 7;
  458. endPoint[1][1].g = ( block.y & 0x00780000 ) >> 19;
  459. endPoint[1][1].b = ( ( block.z & 0x00000800 ) >> 7 ) | ( ( block.z & 0x00001000 ) >> 9 ) | ( ( block.z & 0x00000060 ) >> 4 ) | ( ( block.y & 0x00040000 ) >> 18 );
  460. }
  461. else if ( mode_type == candidateModeFlag[5])
  462. {
  463. endPoint[0][0].r = ( block.x & 0x00003FE0 ) >> 5;
  464. endPoint[0][0].g = ( block.x & 0x00FF8000 ) >> 15;
  465. endPoint[0][0].b = ( ( block.y & 0x00000003 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  466. endPoint[0][1].r = ( block.y & 0x000000F8 ) >> 3;
  467. endPoint[0][1].g = ( block.y & 0x0003E000 ) >> 13;
  468. endPoint[0][1].b = ( block.y & 0x0F800000 ) >> 23;
  469. endPoint[1][0].r = ( block.z & 0x0000003E ) >> 1;
  470. endPoint[1][0].g = ( ( block.x & 0x01000000 ) >> 20 ) | ( ( block.y & 0x00001E00 ) >> 9 );
  471. endPoint[1][0].b = ( ( block.x & 0x00004000 ) >> 10 ) | ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  472. endPoint[1][1].r = ( block.z & 0x00000F80 ) >> 7;
  473. endPoint[1][1].g = ( ( block.y & 0x00000100 ) >> 4 ) | ( ( block.y & 0x00780000 ) >> 19 );
  474. endPoint[1][1].b = ( ( block.y & 0x00000004 ) << 2 ) | ( ( block.z & 0x00001000 ) >> 9 ) | ( ( block.z & 0x00000040 ) >> 4 ) | ( ( block.y & 0x10000000 ) >> 27 ) | ( ( block.y & 0x00040000 ) >> 18 );
  475. }
  476. else if ( mode_type == candidateModeFlag[6])
  477. {
  478. endPoint[0][0].r = ( block.x & 0x00001FE0 ) >> 5;
  479. endPoint[0][0].g = ( block.x & 0x007F8000 ) >> 15;
  480. endPoint[0][0].b = ( ( block.y & 0x00000001 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  481. endPoint[0][1].r = ( block.y & 0x000001F8 ) >> 3;
  482. endPoint[0][1].g = ( block.y & 0x0003E000 ) >> 13;
  483. endPoint[0][1].b = ( block.y & 0x0F800000 ) >> 23;
  484. endPoint[1][0].r = ( block.z & 0x0000007E ) >> 1;
  485. endPoint[1][0].g = ( ( block.x & 0x01000000 ) >> 20 ) | ( ( block.y & 0x00001E00 ) >> 9 );
  486. endPoint[1][0].b = ( ( block.x & 0x00004000 ) >> 10 ) | ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  487. endPoint[1][1].r = ( block.z & 0x00001F80 ) >> 7;
  488. endPoint[1][1].g = ( ( block.x & 0x00002000 ) >> 9 ) | ( ( block.y & 0x00780000 ) >> 19 );
  489. endPoint[1][1].b = ( ( block.y & 0x00000006 ) << 2 ) | ( ( block.x & 0x00800000 ) >> 21 ) | ( ( block.y & 0x10000000 ) >> 27 ) | ( ( block.y & 0x00040000 ) >> 18 );
  490. }
  491. else if ( mode_type == candidateModeFlag[7])
  492. {
  493. endPoint[0][0].r = ( block.x & 0x00001FE0 ) >> 5;
  494. endPoint[0][0].g = ( block.x & 0x007F8000 ) >> 15;
  495. endPoint[0][0].b = ( ( block.y & 0x00000001 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  496. endPoint[0][1].r = ( block.y & 0x000000F8 ) >> 3;
  497. endPoint[0][1].g = ( block.y & 0x0007E000 ) >> 13;
  498. endPoint[0][1].b = ( block.y & 0x0F800000 ) >> 23;
  499. endPoint[1][0].r = ( block.z & 0x0000003E ) >> 1;
  500. endPoint[1][0].g = ( ( block.x & 0x00800000 ) >> 18 ) | ( ( block.x & 0x01000000 ) >> 20 ) | ( ( block.y & 0x00001E00 ) >> 9 );
  501. endPoint[1][0].b = ( ( block.x & 0x00004000 ) >> 10 ) | ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  502. endPoint[1][1].r = ( block.z & 0x00000F80 ) >> 7;
  503. endPoint[1][1].g = ( ( block.y & 0x00000002 ) << 4 ) | ( ( block.y & 0x00000100 ) >> 4 ) | ( ( block.y & 0x00780000 ) >> 19 );
  504. endPoint[1][1].b = ( ( block.y & 0x00000004 ) << 2 ) | ( ( block.z & 0x00001000 ) >> 9 ) | ( ( block.z & 0x00000040 ) >> 4 ) | ( ( block.y & 0x10000000 ) >> 27 ) | ( ( block.x & 0x00002000 ) >> 13 );
  505. }
  506. else if ( mode_type == candidateModeFlag[8])
  507. {
  508. endPoint[0][0].r = ( block.x & 0x00001FE0 ) >> 5;
  509. endPoint[0][0].g = ( block.x & 0x007F8000 ) >> 15;
  510. endPoint[0][0].b = ( ( block.y & 0x00000001 ) << 7 ) | ( ( block.x & 0xFE000000 ) >> 25 );
  511. endPoint[0][1].r = ( block.y & 0x000000F8 ) >> 3;
  512. endPoint[0][1].g = ( block.y & 0x0003E000 ) >> 13;
  513. endPoint[0][1].b = ( block.y & 0x1F800000 ) >> 23;
  514. endPoint[1][0].r = ( block.z & 0x0000003E ) >> 1;
  515. endPoint[1][0].g = ( ( block.x & 0x01000000 ) >> 20 ) | ( ( block.y & 0x00001E00 ) >> 9 );
  516. endPoint[1][0].b = ( ( block.x & 0x00800000 ) >> 18 ) | ( ( block.x & 0x00004000 ) >> 10 ) | ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  517. endPoint[1][1].r = ( block.z & 0x00000F80 ) >> 7;
  518. endPoint[1][1].g = ( ( block.y & 0x00000100 ) >> 4 ) | ( ( block.y & 0x00780000 ) >> 19 );
  519. endPoint[1][1].b = ( ( block.y & 0x00000002 ) << 4 ) | ( ( block.y & 0x00000004 ) << 2 ) | ( ( block.z & 0x00001000 ) >> 9 ) | ( ( block.z & 0x00000040 ) >> 4 ) | ( ( block.x & 0x00002000 ) >> 12 ) | ( ( block.y & 0x00040000 ) >> 18 );
  520. }
  521. else if ( mode_type == candidateModeFlag[9])
  522. {
  523. endPoint[0][0].r = ( block.x & 0x000007E0 ) >> 5;
  524. endPoint[0][0].g = ( block.x & 0x001F8000 ) >> 15;
  525. endPoint[0][0].b = ( block.x & 0x7E000000 ) >> 25;
  526. endPoint[0][1].r = ( block.y & 0x000001F8 ) >> 3;
  527. endPoint[0][1].g = ( block.y & 0x0007E000 ) >> 13;
  528. endPoint[0][1].b = ( block.y & 0x1F800000 ) >> 23;
  529. endPoint[1][0].r = ( block.z & 0x0000007E ) >> 1;
  530. endPoint[1][0].g = ( ( block.x & 0x00200000 ) >> 16 ) | ( ( block.x & 0x01000000 ) >> 20 ) | ( ( block.y & 0x00001E00 ) >> 9 );
  531. endPoint[1][0].b = ( ( block.x & 0x00400000 ) >> 17 ) | ( ( block.x & 0x00004000 ) >> 10 ) | ( ( block.z & 0x00000001 ) << 3 ) | ( ( block.y & 0xE0000000 ) >> 29 );
  532. endPoint[1][1].r = ( block.z & 0x00001F80 ) >> 7;
  533. endPoint[1][1].g = ( ( block.x & 0x80000000 ) >> 26 ) | ( ( block.x & 0x00000800 ) >> 7 ) | ( ( block.y & 0x00780000 ) >> 19 );
  534. endPoint[1][1].b = ( ( block.y & 0x00000002 ) << 4 ) | ( ( block.y & 0x00000004 ) << 2 ) | ( ( block.y & 0x00000001 ) << 3 ) | ( ( block.x & 0x00800000 ) >> 21 ) | ( ( block.x & 0x00003000 ) >> 12 );
  535. }
  536. }
  537. void extract_compressed_endpoints20( out int3 endPoint, uint mode_type, uint4 block )
  538. {
  539. if ( mode_type == candidateModeFlag[0])
  540. {
  541. endPoint = (block.x & uint3(0x00007FE0, 0x01FF8000, 0xFE000000)) >> uint3(5, 15, 25);
  542. endPoint.b |= ( block.y & 0x00000007 ) << 7;
  543. }
  544. else if ( mode_type == candidateModeFlag[1])
  545. {
  546. endPoint = (block.x & uint3(0x00000FE0, 0x003F8000, 0xFE000000)) >> uint3(5, 15, 25);
  547. }
  548. else if ( mode_type == candidateModeFlag[2])
  549. {
  550. endPoint = (block.x & uint3(0x00007FE0, 0x01FF8000, 0xFE000000)) >> uint3(5, 15, 25);
  551. endPoint.rb |= (block.y & uint2(0x00000100, 0x00000007)) << uint2(1, 7);
  552. endPoint.gb |= (block.y & uint2(0x00020000, 0x08000000)) >> uint2(8, 18);
  553. }
  554. else if ( mode_type == candidateModeFlag[3])
  555. {
  556. endPoint = (block.x & uint3(0x00007FE0, 0x01FF8000, 0xFE000000)) >> uint3(5, 15, 25);
  557. endPoint.rb |= (block.y & uint2(0x00000080, 0x00000007)) << uint2(2, 7);
  558. endPoint.gb |= (block.y & uint2(0x00040000, 0x08000000)) >> uint2(9, 18);
  559. }
  560. else if ( mode_type == candidateModeFlag[4])
  561. {
  562. endPoint = (block.x & uint3(0x00007FE0, 0x01FF8000, 0xFE000000)) >> uint3(5, 15, 25);
  563. endPoint.rb |= (block.y & uint2(0x00000080, 0x00000007)) << uint2(2, 7);
  564. endPoint.gb |= (block.y & uint2(0x00020000, 0x10000000)) >> uint2(8, 19);
  565. }
  566. else if ( mode_type == candidateModeFlag[5])
  567. {
  568. endPoint = (block.x & uint3(0x00003FE0, 0x00FF8000, 0xFE000000)) >> uint3(5, 15, 25);
  569. endPoint.b |= ( block.y & 0x00000003 ) << 7;
  570. }
  571. else if ( mode_type == candidateModeFlag[6])
  572. {
  573. endPoint = (block.x & uint3(0x00001FE0, 0x007F8000, 0xFE000000)) >> uint3(5, 15, 25);
  574. endPoint.b |= ( block.y & 0x00000001 ) << 7;
  575. }
  576. else if ( mode_type == candidateModeFlag[7])
  577. {
  578. endPoint = (block.x & uint3(0x00001FE0, 0x007F8000, 0xFE000000)) >> uint3(5, 15, 25);
  579. endPoint.b |= ( block.y & 0x00000001 ) << 7;
  580. }
  581. else if ( mode_type == candidateModeFlag[8])
  582. {
  583. endPoint = (block.x & uint3(0x00001FE0, 0x007F8000, 0xFE000000)) >> uint3(5, 15, 25);
  584. endPoint.b |= ( block.y & 0x00000001 ) << 7;
  585. }
  586. else if ( mode_type == candidateModeFlag[9])
  587. {
  588. endPoint = (block.x & uint3(0x000007E0, 0x001F8000, 0x7E000000)) >> uint3(5, 15, 25);
  589. }
  590. }
  591. void extract_compressed_endpoints21( out int3 endPoint, uint mode_type, uint4 block )
  592. {
  593. uint3 mask;
  594. if ( mode_type == candidateModeFlag[0])
  595. {
  596. mask = uint3(0x000000F8, 0x0003E000, 0x0F800000);
  597. }
  598. else if ( mode_type == candidateModeFlag[1])
  599. {
  600. mask = uint3(0x000001F8, 0x0007E000, 0x1F800000);
  601. }
  602. else if ( mode_type == candidateModeFlag[2])
  603. {
  604. mask = uint3(0x000000F8, 0x0001E000, 0x07800000);
  605. }
  606. else if ( mode_type == candidateModeFlag[3])
  607. {
  608. mask = uint3(0x00000078, 0x0003E000, 0x07800000);
  609. }
  610. else if ( mode_type == candidateModeFlag[4])
  611. {
  612. mask = uint3(0x00000078, 0x0001E000, 0x0F800000);
  613. }
  614. else if ( mode_type == candidateModeFlag[5])
  615. {
  616. mask = uint3(0x000000F8, 0x0003E000, 0x0F800000);
  617. }
  618. else if ( mode_type == candidateModeFlag[6])
  619. {
  620. mask = uint3(0x000001F8, 0x0003E000, 0x0F800000);
  621. }
  622. else if ( mode_type == candidateModeFlag[7])
  623. {
  624. mask = uint3(0x000000F8, 0x0007E000, 0x0F800000);
  625. }
  626. else if ( mode_type == candidateModeFlag[8])
  627. {
  628. mask = uint3(0x000000F8, 0x0003E000, 0x1F800000);
  629. }
  630. else //if ( mode_type == candidateModeFlag[9])
  631. {
  632. mask = uint3(0x000001F8, 0x0007E000, 0x1F800000);
  633. }
  634. endPoint = (block.y & mask) >> uint3(3, 13, 23);
  635. }
  636. void extract_compressed_endpoints22( out int3 endPoint, uint mode_type, uint4 block )
  637. {
  638. if ( mode_type == candidateModeFlag[0])
  639. {
  640. endPoint = (block.zyy & uint3(0x0000003E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  641. endPoint.gb |= (block.x & uint2(0x00000004, 0x00000008)) << uint2(2, 1);
  642. endPoint.b |= (block.z & 0x00000001) << 3;
  643. }
  644. else if ( mode_type == candidateModeFlag[1])
  645. {
  646. endPoint = (block.zyy & uint3(0x0000007E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  647. endPoint.gb |= (block.x & uint2(0x01000000, 0x00004000)) >> uint2(20, 10)
  648. | (block.xz & uint2(0x00000004, 0x00000001)) << 3;
  649. endPoint.b |= (block.x & 0x00400000) >> 17;
  650. }
  651. else if ( mode_type == candidateModeFlag[2])
  652. {
  653. endPoint = (block.zyy & uint3(0x0000003E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  654. endPoint.b |= (block.z & 0x00000001) << 3;
  655. }
  656. else if ( mode_type == candidateModeFlag[3])
  657. {
  658. endPoint = (block.zzy & uint3(0x0000001E, 0x00000800, 0xE0000000)) >> uint3(1, 7, 29);
  659. endPoint.g |= (block.y & 0x00001E00) >> 9;
  660. endPoint.b |= (block.z & 0x00000001) << 3;
  661. }
  662. else if ( mode_type == candidateModeFlag[4])
  663. {
  664. endPoint = (block.zyy & uint3(0x0000001E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  665. endPoint.b |= (block.y & 0x00000100) >> 4
  666. | (block.z & 0x00000001) << 3;
  667. }
  668. else if ( mode_type == candidateModeFlag[5])
  669. {
  670. endPoint = (block.zyy & uint3(0x0000003E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  671. endPoint.gb |= (block.x & uint2(0x01000000, 0x00004000)) >> uint2(20, 10);
  672. endPoint.b |= ( block.z & 0x00000001 ) << 3;
  673. }
  674. else if ( mode_type == candidateModeFlag[6])
  675. {
  676. endPoint = (block.zyy & uint3(0x0000007E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  677. endPoint.gb |= (block.x & uint2(0x01000000, 0x00004000)) >> uint2(20, 10);
  678. endPoint.b |= ( block.z & 0x00000001 ) << 3;
  679. }
  680. else if ( mode_type == candidateModeFlag[7])
  681. {
  682. endPoint = (block.zyy & uint3(0x0000003E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  683. endPoint.gb |= (block.x & uint2(0x01000000, 0x00004000)) >> uint2(20, 10);
  684. endPoint.g |= (block.x & 0x00800000) >> 18;
  685. endPoint.b |= (block.z & 0x00000001) << 3;
  686. }
  687. else if ( mode_type == candidateModeFlag[8])
  688. {
  689. endPoint = (block.zyy & uint3(0x0000003E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  690. endPoint.gb |= (block.x & uint2(0x01000000, 0x00004000)) >> uint2(20, 10);
  691. endPoint.b |= (block.x & 0x00800000) >> 18
  692. | ( block.z & 0x00000001 ) << 3;
  693. }
  694. else if ( mode_type == candidateModeFlag[9])
  695. {
  696. endPoint = (block.zyy & uint3(0x0000007E, 0x00001E00, 0xE0000000)) >> uint3(1, 9, 29);
  697. endPoint.gb |= (block.x & uint2(0x01000000, 0x00004000)) >> uint2(20, 10)
  698. | (block.x & uint2(0x00200000, 0x00400000)) >> uint2(16, 17);
  699. endPoint.b |= ( block.z & 0x00000001 ) << 3;
  700. }
  701. }
  702. void extract_compressed_endpoints23( out int3 endPoint, uint mode_type, uint4 block )
  703. {
  704. if ( mode_type == candidateModeFlag[0])
  705. {
  706. endPoint = (block.zyz & uint3(0x00000F80, 0x00780000, 0x00001000)) >> uint3(7, 19, 9);
  707. endPoint.gb |= (block.y & uint2(0x00000100, 0x00040000)) >> uint2(4, 18);
  708. endPoint.b |= (block.x & 0x00000010) >> 0
  709. | (block.y & 0x10000000) >> 27
  710. | (block.z & 0x00000040) >> 4;
  711. }
  712. else if ( mode_type == candidateModeFlag[1])
  713. {
  714. endPoint = (block.zyx & uint3(0x00001F80, 0x00780000, 0x00800000)) >> uint3(7, 19, 21);
  715. endPoint.gb |= (block.xy & uint2(0x00000018, 0x00000002)) << uint2(1, 4);
  716. endPoint.b |= (block.y & 0x00000004) << 2
  717. | (block.y & 0x00000001) << 3
  718. | (block.x & 0x00003000) >> 12;
  719. }
  720. else if ( mode_type == candidateModeFlag[2])
  721. {
  722. endPoint = (block.zyz & uint3(0x00000F80, 0x00780000, 0x00001000)) >> uint3(7, 19, 9);
  723. endPoint.b |= (block.y & 0x00040000) >> 18
  724. | (block.z & 0x00000040) >> 4
  725. | (block.y & 0x10000000) >> 27;
  726. }
  727. else if ( mode_type == candidateModeFlag[3])
  728. {
  729. endPoint = (block.zyz & uint3(0x00000780, 0x00780000, 0x00001000)) >> uint3(7, 19, 9);
  730. endPoint.gb |= (block.yz & uint2(0x00000100, 0x00000040)) >> 4;
  731. endPoint.b |= (block.z & 0x00000020) >> 5
  732. | (block.y & 0x10000000) >> 27;
  733. }
  734. else if ( mode_type == candidateModeFlag[4])
  735. {
  736. endPoint = (block.zyz & uint3(0x00000780, 0x00780000, 0x00001000)) >> uint3(7, 19, 9);
  737. endPoint.b |= (block.y & 0x00040000) >> 18
  738. | (block.z & 0x00000800) >> 7
  739. | (block.z & 0x00000060) >> 4;
  740. }
  741. else if ( mode_type == candidateModeFlag[5])
  742. {
  743. endPoint = (block.zyz & uint3(0x00000F80, 0x00780000, 0x00001000)) >> uint3(7, 19, 9);
  744. endPoint.gb |= (block.y & uint2(0x00000100, 0x10000000)) >> uint2(4, 27);
  745. endPoint.b |= (block.y & 0x00040000) >> 18
  746. | (block.y & 0x00000004) << 2
  747. | (block.z & 0x00000040) >> 4;
  748. }
  749. else if ( mode_type == candidateModeFlag[6])
  750. {
  751. endPoint = (block.zyx & uint3(0x00001F80, 0x00780000, 0x00800000)) >> uint3(7, 19, 21);
  752. endPoint.gb |= (block.xy & uint2(0x00002000, 0x00040000)) >> uint2(9, 18);
  753. endPoint.b |= (block.y & 0x10000000) >> 27
  754. | (block.y & 0x00000006) << 2;
  755. }
  756. else if ( mode_type == candidateModeFlag[7])
  757. {
  758. endPoint = (block.zyz & uint3(0x00000F80, 0x00780000, 0x00001000)) >> uint3(7, 19, 9);
  759. endPoint.gb |= (block.y & uint2(0x00000002, 0x00000004)) << uint2(4, 2)
  760. | (block.yz & uint2(0x00000100, 0x00000040)) >> 4;
  761. endPoint.b |= (block.y & 0x10000000) >> 27
  762. | (block.x & 0x00002000) >> 13;
  763. }
  764. else if ( mode_type == candidateModeFlag[8])
  765. {
  766. endPoint = (block.zyz & uint3(0x00000F80, 0x00780000, 0x00001000)) >> uint3(7, 19, 9);
  767. endPoint.gb |= (block.y & uint2(0x00000100, 0x00040000)) >> uint2(4, 18);
  768. endPoint.b |= (block.z & 0x00000040) >> 4
  769. | (block.y & 0x00000002) << 4
  770. | (block.y & 0x00000004) << 2
  771. | (block.x & 0x00002000) >> 12;
  772. }
  773. else if ( mode_type == candidateModeFlag[9])
  774. {
  775. endPoint = (block.zyx & uint3(0x00001F80, 0x00780000, 0x00800000)) >> uint3(7, 19, 21);
  776. endPoint.gb |= (block.x & uint2(0x00000800, 0x00003000)) >> uint2(7, 12);
  777. endPoint.g |= (block.x & 0x80000000) >> 26;
  778. endPoint.b |= (block.y & 0x00000002) << 4
  779. | (block.y & 0x00000004) << 2
  780. | (block.y & 0x00000001) << 3;
  781. }
  782. }
  783. /*const uint2 candidateFixUpIndex[32] =
  784. {
  785. {3,3},{3,3},{3,3},{3,3},
  786. {3,3},{3,3},{3,3},{3,3},
  787. {3,3},{3,3},{3,3},{3,3},
  788. {3,3},{3,3},{3,3},{3,3},
  789. {3,3},{2,0},{0,2},{2,0},
  790. {2,0},{0,2},{0,2},{3,3},
  791. {2,0},{0,2},{2,0},{2,0},
  792. {0,2},{0,2},{2,0},{2,0}
  793. };*/
  794. uint extract_index_TWO( uint x, uint y, uint partition_index, uint4 block )
  795. {
  796. static const uint candidateFixUpIndex1D[32] =
  797. {
  798. 15,15,15,15,
  799. 15,15,15,15,
  800. 15,15,15,15,
  801. 15,15,15,15,
  802. 15,2,8,2,
  803. 2,8,8,15,
  804. 2,8,2,2,
  805. 8,8,2,2
  806. };
  807. if ( x == 0 && y == 0 )
  808. return ( block.z >> 18 ) & 0x00000003;
  809. uint index = y * 4 + x;
  810. if ( index < candidateFixUpIndex1D[partition_index] )
  811. {
  812. if ( index < 5 )
  813. return ( block.z >> ( index * 3 + 17 ) ) & 0x00000007;
  814. return ( block.w >> ( index * 3 - 15 ) ) & 0x00000007;
  815. }
  816. if ( index == candidateFixUpIndex1D[partition_index] )
  817. {
  818. if ( index < 5 )
  819. return ( block.z >> ( index * 3 + 17 ) ) & 0x00000003;
  820. return ( block.w >> ( index * 3 - 15 ) ) & 0x00000003;
  821. }
  822. if ( index < 5 )
  823. return ( block.z >> ( index * 3 + 16 ) ) & 0x00000007;
  824. if ( index > 5 )
  825. return ( block.w >> ( index * 3 - 16 ) ) & 0x00000007;
  826. return ( ( block.z >> 31 ) & 0x00000001 ) | ( ( block.w << 1 ) & 0x00000006 );
  827. }
  828. void unquantize( inout int3 color, uint prec )
  829. {
  830. int iprec = asint( prec );
  831. if (g_format == UNSIGNED_F16 )
  832. {
  833. if (prec < 15)
  834. {
  835. color = (color != 0) ? (color == ((1 << iprec) - 1) ? 0xFFFF : (((color << 16) + 0x8000) >> iprec)) : color;
  836. }
  837. }
  838. else
  839. {
  840. if (prec < 16)
  841. {
  842. uint3 s = color >= 0 ? 0 : 1;
  843. color = abs(color);
  844. color = (color != 0) ? (color >= ((1 << (iprec - 1)) - 1) ? 0x7FFF : (((color << 15) + 0x4000) >> (iprec - 1))) : color;
  845. color = s > 0 ? -color : color;
  846. }
  847. }
  848. }
  849. uint3 finish_unquantize( int3 color )
  850. {
  851. if ( g_format == UNSIGNED_F16 )
  852. color = ( color * 31 ) >> 6;
  853. else
  854. {
  855. color = ( color < 0 ) ? -( ( -color * 31 ) >> 5 ) : ( color * 31 ) >> 5;
  856. color = ( color < 0 ) ? (( -color ) | 0x8000) : color;
  857. }
  858. return asuint(color);
  859. }
  860. void generate_palette_unquantized8( out uint3 palette, int3 low, int3 high, uint prec, int i )
  861. {
  862. static const int aWeight3[] = {0, 9, 18, 27, 37, 46, 55, 64};
  863. int3 tmp = ( low * ( 64 - aWeight3[i] ) + high * aWeight3[i] + 32 ) >> 6;
  864. palette = finish_unquantize( tmp );
  865. }
  866. void generate_palette_unquantized16( out uint3 palette, int3 low, int3 high, uint prec, int i )
  867. {
  868. static const int aWeight4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
  869. int3 tmp = ( low * ( 64 - aWeight4[i] ) + high * aWeight4[i] + 32 ) >> 6;
  870. palette = finish_unquantize( tmp );
  871. }