astcenc_block_sizes.cpp 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2022 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. /**
  18. * @brief Functions to generate block size descriptor and decimation tables.
  19. */
  20. #include "astcenc_internal.h"
  21. /**
  22. * @brief Decode the properties of an encoded 2D block mode.
  23. *
  24. * @param block_mode The encoded block mode.
  25. * @param[out] x_weights The number of weights in the X dimension.
  26. * @param[out] y_weights The number of weights in the Y dimension.
  27. * @param[out] is_dual_plane True if this block mode has two weight planes.
  28. * @param[out] quant_mode The quantization level for the weights.
  29. * @param[out] weight_bits The storage bit count for the weights.
  30. *
  31. * @return Returns true if a valid mode, false otherwise.
  32. */
  33. static bool decode_block_mode_2d(
  34. unsigned int block_mode,
  35. unsigned int& x_weights,
  36. unsigned int& y_weights,
  37. bool& is_dual_plane,
  38. unsigned int& quant_mode,
  39. unsigned int& weight_bits
  40. ) {
  41. unsigned int base_quant_mode = (block_mode >> 4) & 1;
  42. unsigned int H = (block_mode >> 9) & 1;
  43. unsigned int D = (block_mode >> 10) & 1;
  44. unsigned int A = (block_mode >> 5) & 0x3;
  45. x_weights = 0;
  46. y_weights = 0;
  47. if ((block_mode & 3) != 0)
  48. {
  49. base_quant_mode |= (block_mode & 3) << 1;
  50. unsigned int B = (block_mode >> 7) & 3;
  51. switch ((block_mode >> 2) & 3)
  52. {
  53. case 0:
  54. x_weights = B + 4;
  55. y_weights = A + 2;
  56. break;
  57. case 1:
  58. x_weights = B + 8;
  59. y_weights = A + 2;
  60. break;
  61. case 2:
  62. x_weights = A + 2;
  63. y_weights = B + 8;
  64. break;
  65. case 3:
  66. B &= 1;
  67. if (block_mode & 0x100)
  68. {
  69. x_weights = B + 2;
  70. y_weights = A + 2;
  71. }
  72. else
  73. {
  74. x_weights = A + 2;
  75. y_weights = B + 6;
  76. }
  77. break;
  78. }
  79. }
  80. else
  81. {
  82. base_quant_mode |= ((block_mode >> 2) & 3) << 1;
  83. if (((block_mode >> 2) & 3) == 0)
  84. {
  85. return false;
  86. }
  87. unsigned int B = (block_mode >> 9) & 3;
  88. switch ((block_mode >> 7) & 3)
  89. {
  90. case 0:
  91. x_weights = 12;
  92. y_weights = A + 2;
  93. break;
  94. case 1:
  95. x_weights = A + 2;
  96. y_weights = 12;
  97. break;
  98. case 2:
  99. x_weights = A + 6;
  100. y_weights = B + 6;
  101. D = 0;
  102. H = 0;
  103. break;
  104. case 3:
  105. switch ((block_mode >> 5) & 3)
  106. {
  107. case 0:
  108. x_weights = 6;
  109. y_weights = 10;
  110. break;
  111. case 1:
  112. x_weights = 10;
  113. y_weights = 6;
  114. break;
  115. case 2:
  116. case 3:
  117. return false;
  118. }
  119. break;
  120. }
  121. }
  122. unsigned int weight_count = x_weights * y_weights * (D + 1);
  123. quant_mode = (base_quant_mode - 2) + 6 * H;
  124. is_dual_plane = D != 0;
  125. weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
  126. return (weight_count <= BLOCK_MAX_WEIGHTS &&
  127. weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
  128. weight_bits <= BLOCK_MAX_WEIGHT_BITS);
  129. }
  130. /**
  131. * @brief Decode the properties of an encoded 3D block mode.
  132. *
  133. * @param block_mode The encoded block mode.
  134. * @param[out] x_weights The number of weights in the X dimension.
  135. * @param[out] y_weights The number of weights in the Y dimension.
  136. * @param[out] z_weights The number of weights in the Z dimension.
  137. * @param[out] is_dual_plane True if this block mode has two weight planes.
  138. * @param[out] quant_mode The quantization level for the weights.
  139. * @param[out] weight_bits The storage bit count for the weights.
  140. *
  141. * @return Returns true if a valid mode, false otherwise.
  142. */
  143. static bool decode_block_mode_3d(
  144. unsigned int block_mode,
  145. unsigned int& x_weights,
  146. unsigned int& y_weights,
  147. unsigned int& z_weights,
  148. bool& is_dual_plane,
  149. unsigned int& quant_mode,
  150. unsigned int& weight_bits
  151. ) {
  152. unsigned int base_quant_mode = (block_mode >> 4) & 1;
  153. unsigned int H = (block_mode >> 9) & 1;
  154. unsigned int D = (block_mode >> 10) & 1;
  155. unsigned int A = (block_mode >> 5) & 0x3;
  156. x_weights = 0;
  157. y_weights = 0;
  158. z_weights = 0;
  159. if ((block_mode & 3) != 0)
  160. {
  161. base_quant_mode |= (block_mode & 3) << 1;
  162. unsigned int B = (block_mode >> 7) & 3;
  163. unsigned int C = (block_mode >> 2) & 0x3;
  164. x_weights = A + 2;
  165. y_weights = B + 2;
  166. z_weights = C + 2;
  167. }
  168. else
  169. {
  170. base_quant_mode |= ((block_mode >> 2) & 3) << 1;
  171. if (((block_mode >> 2) & 3) == 0)
  172. {
  173. return false;
  174. }
  175. int B = (block_mode >> 9) & 3;
  176. if (((block_mode >> 7) & 3) != 3)
  177. {
  178. D = 0;
  179. H = 0;
  180. }
  181. switch ((block_mode >> 7) & 3)
  182. {
  183. case 0:
  184. x_weights = 6;
  185. y_weights = B + 2;
  186. z_weights = A + 2;
  187. break;
  188. case 1:
  189. x_weights = A + 2;
  190. y_weights = 6;
  191. z_weights = B + 2;
  192. break;
  193. case 2:
  194. x_weights = A + 2;
  195. y_weights = B + 2;
  196. z_weights = 6;
  197. break;
  198. case 3:
  199. x_weights = 2;
  200. y_weights = 2;
  201. z_weights = 2;
  202. switch ((block_mode >> 5) & 3)
  203. {
  204. case 0:
  205. x_weights = 6;
  206. break;
  207. case 1:
  208. y_weights = 6;
  209. break;
  210. case 2:
  211. z_weights = 6;
  212. break;
  213. case 3:
  214. return false;
  215. }
  216. break;
  217. }
  218. }
  219. unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
  220. quant_mode = (base_quant_mode - 2) + 6 * H;
  221. is_dual_plane = D != 0;
  222. weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
  223. return (weight_count <= BLOCK_MAX_WEIGHTS &&
  224. weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
  225. weight_bits <= BLOCK_MAX_WEIGHT_BITS);
  226. }
  227. /**
  228. * @brief Create a 2D decimation entry for a block-size and weight-decimation pair.
  229. *
  230. * @param x_texels The number of texels in the X dimension.
  231. * @param y_texels The number of texels in the Y dimension.
  232. * @param x_weights The number of weights in the X dimension.
  233. * @param y_weights The number of weights in the Y dimension.
  234. * @param[out] di The decimation info structure to populate.
  235. * @param[out] wb The decimation table init scratch working buffers.
  236. */
  237. static void init_decimation_info_2d(
  238. unsigned int x_texels,
  239. unsigned int y_texels,
  240. unsigned int x_weights,
  241. unsigned int y_weights,
  242. decimation_info& di,
  243. dt_init_working_buffers& wb
  244. ) {
  245. unsigned int texels_per_block = x_texels * y_texels;
  246. unsigned int weights_per_block = x_weights * y_weights;
  247. uint8_t max_texel_count_of_weight = 0;
  248. promise(weights_per_block > 0);
  249. promise(texels_per_block > 0);
  250. promise(x_texels > 0);
  251. promise(y_texels > 0);
  252. for (unsigned int i = 0; i < weights_per_block; i++)
  253. {
  254. wb.texel_count_of_weight[i] = 0;
  255. }
  256. for (unsigned int i = 0; i < texels_per_block; i++)
  257. {
  258. wb.weight_count_of_texel[i] = 0;
  259. }
  260. for (unsigned int y = 0; y < y_texels; y++)
  261. {
  262. for (unsigned int x = 0; x < x_texels; x++)
  263. {
  264. unsigned int texel = y * x_texels + x;
  265. unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
  266. unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
  267. unsigned int x_weight_frac = x_weight & 0xF;
  268. unsigned int y_weight_frac = y_weight & 0xF;
  269. unsigned int x_weight_int = x_weight >> 4;
  270. unsigned int y_weight_int = y_weight >> 4;
  271. unsigned int qweight[4];
  272. qweight[0] = x_weight_int + y_weight_int * x_weights;
  273. qweight[1] = qweight[0] + 1;
  274. qweight[2] = qweight[0] + x_weights;
  275. qweight[3] = qweight[2] + 1;
  276. // Truncated-precision bilinear interpolation
  277. unsigned int prod = x_weight_frac * y_weight_frac;
  278. unsigned int weight[4];
  279. weight[3] = (prod + 8) >> 4;
  280. weight[1] = x_weight_frac - weight[3];
  281. weight[2] = y_weight_frac - weight[3];
  282. weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
  283. for (unsigned int i = 0; i < 4; i++)
  284. {
  285. if (weight[i] != 0)
  286. {
  287. wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
  288. wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
  289. wb.weight_count_of_texel[texel]++;
  290. wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
  291. wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
  292. wb.texel_count_of_weight[qweight[i]]++;
  293. max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
  294. }
  295. }
  296. }
  297. }
  298. uint8_t max_texel_weight_count = 0;
  299. for (unsigned int i = 0; i < texels_per_block; i++)
  300. {
  301. di.texel_weight_count[i] = wb.weight_count_of_texel[i];
  302. max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
  303. for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
  304. {
  305. di.texel_weights_int_4t[j][i] = wb.weights_of_texel[i][j];
  306. di.texel_weights_float_4t[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
  307. di.texel_weights_4t[j][i] = wb.grid_weights_of_texel[i][j];
  308. }
  309. // Init all 4 entries so we can rely on zeros for vectorization
  310. for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
  311. {
  312. di.texel_weights_int_4t[j][i] = 0;
  313. di.texel_weights_float_4t[j][i] = 0.0f;
  314. di.texel_weights_4t[j][i] = 0;
  315. }
  316. }
  317. di.max_texel_weight_count = max_texel_weight_count;
  318. for (unsigned int i = 0; i < weights_per_block; i++)
  319. {
  320. unsigned int texel_count_wt = wb.texel_count_of_weight[i];
  321. di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
  322. for (unsigned int j = 0; j < texel_count_wt; j++)
  323. {
  324. uint8_t texel = wb.texels_of_weight[i][j];
  325. // Create transposed versions of these for better vectorization
  326. di.weight_texel[j][i] = texel;
  327. di.weights_flt[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
  328. // perform a layer of array unrolling. An aspect of this unrolling is that
  329. // one of the texel-weight indexes is an identity-mapped index; we will use this
  330. // fact to reorder the indexes so that the first one is the identity index.
  331. int swap_idx = -1;
  332. for (unsigned int k = 0; k < 4; k++)
  333. {
  334. uint8_t dttw = di.texel_weights_4t[k][texel];
  335. float dttwf = di.texel_weights_float_4t[k][texel];
  336. if (dttw == i && dttwf != 0.0f)
  337. {
  338. swap_idx = k;
  339. }
  340. di.texel_weights_texel[i][j][k] = dttw;
  341. di.texel_weights_float_texel[i][j][k] = dttwf;
  342. }
  343. if (swap_idx != 0)
  344. {
  345. uint8_t vi = di.texel_weights_texel[i][j][0];
  346. float vf = di.texel_weights_float_texel[i][j][0];
  347. di.texel_weights_texel[i][j][0] = di.texel_weights_texel[i][j][swap_idx];
  348. di.texel_weights_float_texel[i][j][0] = di.texel_weights_float_texel[i][j][swap_idx];
  349. di.texel_weights_texel[i][j][swap_idx] = vi;
  350. di.texel_weights_float_texel[i][j][swap_idx] = vf;
  351. }
  352. }
  353. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  354. // Match last texel in active lane in SIMD group, for better gathers
  355. uint8_t last_texel = di.weight_texel[texel_count_wt - 1][i];
  356. for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
  357. {
  358. di.weight_texel[j][i] = last_texel;
  359. di.weights_flt[j][i] = 0.0f;
  360. }
  361. }
  362. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  363. unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
  364. for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
  365. {
  366. di.texel_weight_count[i] = 0;
  367. for (unsigned int j = 0; j < 4; j++)
  368. {
  369. di.texel_weights_float_4t[j][i] = 0;
  370. di.texel_weights_4t[j][i] = 0;
  371. di.texel_weights_int_4t[j][i] = 0;
  372. }
  373. }
  374. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  375. // Match last texel in active lane in SIMD group, for better gathers
  376. unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
  377. uint8_t last_texel = di.weight_texel[last_texel_count_wt - 1][weights_per_block - 1];
  378. unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
  379. for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
  380. {
  381. di.weight_texel_count[i] = 0;
  382. for (unsigned int j = 0; j < max_texel_count_of_weight; j++)
  383. {
  384. di.weight_texel[j][i] = last_texel;
  385. di.weights_flt[j][i] = 0.0f;
  386. }
  387. }
  388. di.texel_count = static_cast<uint8_t>(texels_per_block);
  389. di.weight_count = static_cast<uint8_t>(weights_per_block);
  390. di.weight_x = static_cast<uint8_t>(x_weights);
  391. di.weight_y = static_cast<uint8_t>(y_weights);
  392. di.weight_z = 1;
  393. }
  394. /**
  395. * @brief Create a 3D decimation entry for a block-size and weight-decimation pair.
  396. *
  397. * @param x_texels The number of texels in the X dimension.
  398. * @param y_texels The number of texels in the Y dimension.
  399. * @param z_texels The number of texels in the Z dimension.
  400. * @param x_weights The number of weights in the X dimension.
  401. * @param y_weights The number of weights in the Y dimension.
  402. * @param z_weights The number of weights in the Z dimension.
  403. * @param[out] di The decimation info structure to populate.
  404. @param[out] wb The decimation table init scratch working buffers.
  405. */
  406. static void init_decimation_info_3d(
  407. unsigned int x_texels,
  408. unsigned int y_texels,
  409. unsigned int z_texels,
  410. unsigned int x_weights,
  411. unsigned int y_weights,
  412. unsigned int z_weights,
  413. decimation_info& di,
  414. dt_init_working_buffers& wb
  415. ) {
  416. unsigned int texels_per_block = x_texels * y_texels * z_texels;
  417. unsigned int weights_per_block = x_weights * y_weights * z_weights;
  418. uint8_t max_texel_count_of_weight = 0;
  419. promise(weights_per_block > 0);
  420. promise(texels_per_block > 0);
  421. for (unsigned int i = 0; i < weights_per_block; i++)
  422. {
  423. wb.texel_count_of_weight[i] = 0;
  424. }
  425. for (unsigned int i = 0; i < texels_per_block; i++)
  426. {
  427. wb.weight_count_of_texel[i] = 0;
  428. }
  429. for (unsigned int z = 0; z < z_texels; z++)
  430. {
  431. for (unsigned int y = 0; y < y_texels; y++)
  432. {
  433. for (unsigned int x = 0; x < x_texels; x++)
  434. {
  435. int texel = (z * y_texels + y) * x_texels + x;
  436. int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
  437. int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
  438. int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
  439. int x_weight_frac = x_weight & 0xF;
  440. int y_weight_frac = y_weight & 0xF;
  441. int z_weight_frac = z_weight & 0xF;
  442. int x_weight_int = x_weight >> 4;
  443. int y_weight_int = y_weight >> 4;
  444. int z_weight_int = z_weight >> 4;
  445. int qweight[4];
  446. int weight[4];
  447. qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
  448. qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
  449. // simplex interpolation
  450. int fs = x_weight_frac;
  451. int ft = y_weight_frac;
  452. int fp = z_weight_frac;
  453. int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
  454. int N = x_weights;
  455. int NM = x_weights * y_weights;
  456. int s1, s2, w0, w1, w2, w3;
  457. switch (cas)
  458. {
  459. case 7:
  460. s1 = 1;
  461. s2 = N;
  462. w0 = 16 - fs;
  463. w1 = fs - ft;
  464. w2 = ft - fp;
  465. w3 = fp;
  466. break;
  467. case 3:
  468. s1 = N;
  469. s2 = 1;
  470. w0 = 16 - ft;
  471. w1 = ft - fs;
  472. w2 = fs - fp;
  473. w3 = fp;
  474. break;
  475. case 5:
  476. s1 = 1;
  477. s2 = NM;
  478. w0 = 16 - fs;
  479. w1 = fs - fp;
  480. w2 = fp - ft;
  481. w3 = ft;
  482. break;
  483. case 4:
  484. s1 = NM;
  485. s2 = 1;
  486. w0 = 16 - fp;
  487. w1 = fp - fs;
  488. w2 = fs - ft;
  489. w3 = ft;
  490. break;
  491. case 2:
  492. s1 = N;
  493. s2 = NM;
  494. w0 = 16 - ft;
  495. w1 = ft - fp;
  496. w2 = fp - fs;
  497. w3 = fs;
  498. break;
  499. case 0:
  500. s1 = NM;
  501. s2 = N;
  502. w0 = 16 - fp;
  503. w1 = fp - ft;
  504. w2 = ft - fs;
  505. w3 = fs;
  506. break;
  507. default:
  508. s1 = NM;
  509. s2 = N;
  510. w0 = 16 - fp;
  511. w1 = fp - ft;
  512. w2 = ft - fs;
  513. w3 = fs;
  514. break;
  515. }
  516. qweight[1] = qweight[0] + s1;
  517. qweight[2] = qweight[1] + s2;
  518. weight[0] = w0;
  519. weight[1] = w1;
  520. weight[2] = w2;
  521. weight[3] = w3;
  522. for (unsigned int i = 0; i < 4; i++)
  523. {
  524. if (weight[i] != 0)
  525. {
  526. wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
  527. wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
  528. wb.weight_count_of_texel[texel]++;
  529. wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
  530. wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
  531. wb.texel_count_of_weight[qweight[i]]++;
  532. max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
  533. }
  534. }
  535. }
  536. }
  537. }
  538. uint8_t max_texel_weight_count = 0;
  539. for (unsigned int i = 0; i < texels_per_block; i++)
  540. {
  541. di.texel_weight_count[i] = wb.weight_count_of_texel[i];
  542. max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
  543. // Init all 4 entries so we can rely on zeros for vectorization
  544. for (unsigned int j = 0; j < 4; j++)
  545. {
  546. di.texel_weights_int_4t[j][i] = 0;
  547. di.texel_weights_float_4t[j][i] = 0.0f;
  548. di.texel_weights_4t[j][i] = 0;
  549. }
  550. for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
  551. {
  552. di.texel_weights_int_4t[j][i] = wb.weights_of_texel[i][j];
  553. di.texel_weights_float_4t[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
  554. di.texel_weights_4t[j][i] = wb.grid_weights_of_texel[i][j];
  555. }
  556. }
  557. di.max_texel_weight_count = max_texel_weight_count;
  558. for (unsigned int i = 0; i < weights_per_block; i++)
  559. {
  560. unsigned int texel_count_wt = wb.texel_count_of_weight[i];
  561. di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
  562. for (unsigned int j = 0; j < texel_count_wt; j++)
  563. {
  564. unsigned int texel = wb.texels_of_weight[i][j];
  565. // Create transposed versions of these for better vectorization
  566. di.weight_texel[j][i] = static_cast<uint8_t>(texel);
  567. di.weights_flt[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
  568. // perform a layer of array unrolling. An aspect of this unrolling is that
  569. // one of the texel-weight indexes is an identity-mapped index; we will use this
  570. // fact to reorder the indexes so that the first one is the identity index.
  571. int swap_idx = -1;
  572. for (unsigned int k = 0; k < 4; k++)
  573. {
  574. uint8_t dttw = di.texel_weights_4t[k][texel];
  575. float dttwf = di.texel_weights_float_4t[k][texel];
  576. if (dttw == i && dttwf != 0.0f)
  577. {
  578. swap_idx = k;
  579. }
  580. di.texel_weights_texel[i][j][k] = dttw;
  581. di.texel_weights_float_texel[i][j][k] = dttwf;
  582. }
  583. if (swap_idx != 0)
  584. {
  585. uint8_t vi = di.texel_weights_texel[i][j][0];
  586. float vf = di.texel_weights_float_texel[i][j][0];
  587. di.texel_weights_texel[i][j][0] = di.texel_weights_texel[i][j][swap_idx];
  588. di.texel_weights_float_texel[i][j][0] = di.texel_weights_float_texel[i][j][swap_idx];
  589. di.texel_weights_texel[i][j][swap_idx] = vi;
  590. di.texel_weights_float_texel[i][j][swap_idx] = vf;
  591. }
  592. }
  593. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  594. // Match last texel in active lane in SIMD group, for better gathers
  595. uint8_t last_texel = di.weight_texel[texel_count_wt - 1][i];
  596. for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
  597. {
  598. di.weight_texel[j][i] = last_texel;
  599. di.weights_flt[j][i] = 0.0f;
  600. }
  601. }
  602. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  603. unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
  604. for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
  605. {
  606. di.texel_weight_count[i] = 0;
  607. for (unsigned int j = 0; j < 4; j++)
  608. {
  609. di.texel_weights_float_4t[j][i] = 0;
  610. di.texel_weights_4t[j][i] = 0;
  611. di.texel_weights_int_4t[j][i] = 0;
  612. }
  613. }
  614. // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  615. // Match last texel in active lane in SIMD group, for better gathers
  616. int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
  617. uint8_t last_texel = di.weight_texel[last_texel_count_wt - 1][weights_per_block - 1];
  618. unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
  619. for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
  620. {
  621. di.weight_texel_count[i] = 0;
  622. for (int j = 0; j < max_texel_count_of_weight; j++)
  623. {
  624. di.weight_texel[j][i] = last_texel;
  625. di.weights_flt[j][i] = 0.0f;
  626. }
  627. }
  628. di.texel_count = static_cast<uint8_t>(texels_per_block);
  629. di.weight_count = static_cast<uint8_t>(weights_per_block);
  630. di.weight_x = static_cast<uint8_t>(x_weights);
  631. di.weight_y = static_cast<uint8_t>(y_weights);
  632. di.weight_z = static_cast<uint8_t>(z_weights);
  633. }
  634. /**
  635. * @brief Assign the texels to use for kmeans clustering.
  636. *
  637. * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used.
  638. * The @c bsd.texel_count is an input and must be populated beforehand.
  639. *
  640. * @param[in,out] bsd The block size descriptor to populate.
  641. */
  642. static void assign_kmeans_texels(
  643. block_size_descriptor& bsd
  644. ) {
  645. // Use all texels for kmeans on a small block
  646. if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
  647. {
  648. for (uint8_t i = 0; i < bsd.texel_count; i++)
  649. {
  650. bsd.kmeans_texels[i] = i;
  651. }
  652. return;
  653. }
  654. // Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
  655. uint64_t rng_state[2];
  656. astc::rand_init(rng_state);
  657. // Initialize array used for tracking used indices
  658. bool seen[BLOCK_MAX_TEXELS];
  659. for (uint8_t i = 0; i < bsd.texel_count; i++)
  660. {
  661. seen[i] = false;
  662. }
  663. // Assign 64 random indices, retrying if we see repeats
  664. unsigned int arr_elements_set = 0;
  665. while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
  666. {
  667. uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
  668. texel = texel % bsd.texel_count;
  669. if (!seen[texel])
  670. {
  671. bsd.kmeans_texels[arr_elements_set++] = texel;
  672. seen[texel] = true;
  673. }
  674. }
  675. }
  676. /**
  677. * @brief Allocate a single 2D decimation table entry.
  678. *
  679. * @param x_texels The number of texels in the X dimension.
  680. * @param y_texels The number of texels in the Y dimension.
  681. * @param x_weights The number of weights in the X dimension.
  682. * @param y_weights The number of weights in the Y dimension.
  683. * @param bsd The block size descriptor we are populating.
  684. * @param wb The decimation table init scratch working buffers.
  685. * @param index The packed array index to populate.
  686. */
  687. static void construct_dt_entry_2d(
  688. unsigned int x_texels,
  689. unsigned int y_texels,
  690. unsigned int x_weights,
  691. unsigned int y_weights,
  692. block_size_descriptor& bsd,
  693. dt_init_working_buffers& wb,
  694. unsigned int index
  695. ) {
  696. unsigned int weight_count = x_weights * y_weights;
  697. assert(weight_count <= BLOCK_MAX_WEIGHTS);
  698. bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
  699. decimation_info& di = bsd.decimation_tables[index];
  700. init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
  701. int maxprec_1plane = -1;
  702. int maxprec_2planes = -1;
  703. for (int i = 0; i < 12; i++)
  704. {
  705. unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
  706. if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
  707. {
  708. maxprec_1plane = i;
  709. }
  710. if (try_2planes)
  711. {
  712. unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
  713. if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
  714. {
  715. maxprec_2planes = i;
  716. }
  717. }
  718. }
  719. // At least one of the two should be valid ...
  720. assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
  721. bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
  722. bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
  723. bsd.decimation_modes[index].refprec_1_plane = 0;
  724. bsd.decimation_modes[index].refprec_2_planes = 0;
  725. }
  726. /**
  727. * @brief Allocate block modes and decimation tables for a single 2D block size.
  728. *
  729. * @param x_texels The number of texels in the X dimension.
  730. * @param y_texels The number of texels in the Y dimension.
  731. * @param can_omit_modes Can we discard modes that astcenc won't use, even if legal?
  732. * @param mode_cutoff Percentile cutoff in range [0,1]. Low values more likely to be used.
  733. * @param[out] bsd The block size descriptor to populate.
  734. */
  735. static void construct_block_size_descriptor_2d(
  736. unsigned int x_texels,
  737. unsigned int y_texels,
  738. bool can_omit_modes,
  739. float mode_cutoff,
  740. block_size_descriptor& bsd
  741. ) {
  742. // Store a remap table for storing packed decimation modes.
  743. // Indexing uses [Y * 16 + X] and max size for each axis is 12.
  744. static const unsigned int MAX_DMI = 12 * 16 + 12;
  745. int decimation_mode_index[MAX_DMI];
  746. dt_init_working_buffers* wb = new dt_init_working_buffers;
  747. bsd.xdim = static_cast<uint8_t>(x_texels);
  748. bsd.ydim = static_cast<uint8_t>(y_texels);
  749. bsd.zdim = 1;
  750. bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
  751. for (unsigned int i = 0; i < MAX_DMI; i++)
  752. {
  753. decimation_mode_index[i] = -1;
  754. }
  755. // Gather all the decimation grids that can be used with the current block
  756. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  757. const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
  758. float always_cutoff = 0.0f;
  759. #else
  760. // Unused in decompress-only builds
  761. (void)can_omit_modes;
  762. (void)mode_cutoff;
  763. #endif
  764. // Construct the list of block formats referencing the decimation tables
  765. unsigned int packed_bm_idx = 0;
  766. unsigned int packed_dm_idx = 0;
  767. // Trackers
  768. unsigned int bm_counts[4] { 0 };
  769. unsigned int dm_counts[4] { 0 };
  770. // Clear the list to a known-bad value
  771. for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  772. {
  773. bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
  774. }
  775. // Iterate four times to build a usefully ordered list:
  776. // - Pass 0 - keep selected single plane "always" block modes
  777. // - Pass 1 - keep selected single plane "non-always" block modes
  778. // - Pass 2 - keep select dual plane block modes
  779. // - Pass 3 - keep everything else that's legal
  780. unsigned int limit = can_omit_modes ? 3 : 4;
  781. for (unsigned int j = 0; j < limit; j ++)
  782. {
  783. for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  784. {
  785. // Skip modes we've already included in a previous pass
  786. if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
  787. {
  788. continue;
  789. }
  790. // Decode parameters
  791. unsigned int x_weights;
  792. unsigned int y_weights;
  793. bool is_dual_plane;
  794. unsigned int quant_mode;
  795. unsigned int weight_bits;
  796. bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
  797. // Always skip invalid encodings for the current block size
  798. if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
  799. {
  800. continue;
  801. }
  802. // Selectively skip dual plane encodings
  803. if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
  804. {
  805. continue;
  806. }
  807. // Always skip encodings we can't physically encode based on
  808. // generic encoding bit availability
  809. if (is_dual_plane)
  810. {
  811. // This is the only check we need as only support 1 partition
  812. if ((109 - weight_bits) <= 0)
  813. {
  814. continue;
  815. }
  816. }
  817. else
  818. {
  819. // This is conservative - fewer bits may be available for > 1 partition
  820. if ((111 - weight_bits) <= 0)
  821. {
  822. continue;
  823. }
  824. }
  825. // Selectively skip encodings based on percentile
  826. bool percentile_hit = false;
  827. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  828. if (j == 0)
  829. {
  830. percentile_hit = percentiles[i] <= always_cutoff;
  831. }
  832. else
  833. {
  834. percentile_hit = percentiles[i] <= mode_cutoff;
  835. }
  836. #endif
  837. if (j != 3 && !percentile_hit)
  838. {
  839. continue;
  840. }
  841. // Allocate and initialize the decimation table entry if we've not used it yet
  842. int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
  843. if (decimation_mode < 0)
  844. {
  845. construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
  846. decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
  847. decimation_mode = packed_dm_idx;
  848. dm_counts[j]++;
  849. packed_dm_idx++;
  850. }
  851. auto& bm = bsd.block_modes[packed_bm_idx];
  852. bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
  853. bm.quant_mode = static_cast<uint8_t>(quant_mode);
  854. bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
  855. bm.weight_bits = static_cast<uint8_t>(weight_bits);
  856. bm.mode_index = static_cast<uint16_t>(i);
  857. auto& dm = bsd.decimation_modes[decimation_mode];
  858. if (is_dual_plane)
  859. {
  860. dm.set_ref_2_plane(bm.get_weight_quant_mode());
  861. }
  862. else
  863. {
  864. dm.set_ref_1_plane(bm.get_weight_quant_mode());
  865. }
  866. bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
  867. packed_bm_idx++;
  868. bm_counts[j]++;
  869. }
  870. }
  871. bsd.block_mode_count_1plane_always = bm_counts[0];
  872. bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
  873. bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
  874. bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
  875. bsd.decimation_mode_count_always = dm_counts[0];
  876. bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
  877. bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
  878. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  879. assert(bsd.block_mode_count_1plane_always > 0);
  880. assert(bsd.decimation_mode_count_always > 0);
  881. delete[] percentiles;
  882. #endif
  883. // Ensure the end of the array contains valid data (should never get read)
  884. for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
  885. {
  886. bsd.decimation_modes[i].maxprec_1plane = -1;
  887. bsd.decimation_modes[i].maxprec_2planes = -1;
  888. bsd.decimation_modes[i].refprec_1_plane = 0;
  889. bsd.decimation_modes[i].refprec_2_planes = 0;
  890. }
  891. // Determine the texels to use for kmeans clustering.
  892. assign_kmeans_texels(bsd);
  893. delete wb;
  894. }
  895. /**
  896. * @brief Allocate block modes and decimation tables for a single £D block size.
  897. *
  898. * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as
  899. * the percentile mode cutoffs. If 3D becomes more widely used we should look at this.
  900. *
  901. * @param x_texels The number of texels in the X dimension.
  902. * @param y_texels The number of texels in the Y dimension.
  903. * @param z_texels The number of texels in the Z dimension.
  904. * @param[out] bsd The block size descriptor to populate.
  905. */
  906. static void construct_block_size_descriptor_3d(
  907. unsigned int x_texels,
  908. unsigned int y_texels,
  909. unsigned int z_texels,
  910. block_size_descriptor& bsd
  911. ) {
  912. // Store a remap table for storing packed decimation modes.
  913. // Indexing uses [Z * 64 + Y * 8 + X] and max size for each axis is 6.
  914. static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
  915. int decimation_mode_index[MAX_DMI];
  916. unsigned int decimation_mode_count = 0;
  917. dt_init_working_buffers* wb = new dt_init_working_buffers;
  918. bsd.xdim = static_cast<uint8_t>(x_texels);
  919. bsd.ydim = static_cast<uint8_t>(y_texels);
  920. bsd.zdim = static_cast<uint8_t>(z_texels);
  921. bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
  922. for (unsigned int i = 0; i < MAX_DMI; i++)
  923. {
  924. decimation_mode_index[i] = -1;
  925. }
  926. // gather all the infill-modes that can be used with the current block size
  927. for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
  928. {
  929. for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
  930. {
  931. for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
  932. {
  933. unsigned int weight_count = x_weights * y_weights * z_weights;
  934. if (weight_count > BLOCK_MAX_WEIGHTS)
  935. {
  936. continue;
  937. }
  938. decimation_info& di = bsd.decimation_tables[decimation_mode_count];
  939. decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
  940. init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
  941. int maxprec_1plane = -1;
  942. int maxprec_2planes = -1;
  943. for (unsigned int i = 0; i < 12; i++)
  944. {
  945. unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
  946. if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
  947. {
  948. maxprec_1plane = i;
  949. }
  950. unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
  951. if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
  952. {
  953. maxprec_2planes = i;
  954. }
  955. }
  956. if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
  957. {
  958. maxprec_2planes = -1;
  959. }
  960. bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
  961. bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
  962. bsd.decimation_modes[decimation_mode_count].refprec_1_plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
  963. bsd.decimation_modes[decimation_mode_count].refprec_2_planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
  964. decimation_mode_count++;
  965. }
  966. }
  967. }
  968. // Ensure the end of the array contains valid data (should never get read)
  969. for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
  970. {
  971. bsd.decimation_modes[i].maxprec_1plane = -1;
  972. bsd.decimation_modes[i].maxprec_2planes = -1;
  973. bsd.decimation_modes[i].refprec_1_plane = 0;
  974. bsd.decimation_modes[i].refprec_2_planes = 0;
  975. }
  976. bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
  977. bsd.decimation_mode_count_selected = decimation_mode_count;
  978. bsd.decimation_mode_count_all = decimation_mode_count;
  979. // Construct the list of block formats referencing the decimation tables
  980. // Clear the list to a known-bad value
  981. for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  982. {
  983. bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
  984. }
  985. unsigned int packed_idx = 0;
  986. unsigned int bm_counts[2] { 0 };
  987. // Iterate two times to build a usefully ordered list:
  988. // - Pass 0 - keep valid single plane block modes
  989. // - Pass 1 - keep valid dual plane block modes
  990. for (unsigned int j = 0; j < 2; j++)
  991. {
  992. for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  993. {
  994. // Skip modes we've already included in a previous pass
  995. if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
  996. {
  997. continue;
  998. }
  999. unsigned int x_weights;
  1000. unsigned int y_weights;
  1001. unsigned int z_weights;
  1002. bool is_dual_plane;
  1003. unsigned int quant_mode;
  1004. unsigned int weight_bits;
  1005. bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
  1006. // Skip invalid encodings
  1007. if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
  1008. {
  1009. continue;
  1010. }
  1011. // Skip encodings in the wrong iteration
  1012. if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
  1013. {
  1014. continue;
  1015. }
  1016. // Always skip encodings we can't physically encode based on bit availability
  1017. if (is_dual_plane)
  1018. {
  1019. // This is the only check we need as only support 1 partition
  1020. if ((109 - weight_bits) <= 0)
  1021. {
  1022. continue;
  1023. }
  1024. }
  1025. else
  1026. {
  1027. // This is conservative - fewer bits may be available for > 1 partition
  1028. if ((111 - weight_bits) <= 0)
  1029. {
  1030. continue;
  1031. }
  1032. }
  1033. int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
  1034. bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
  1035. bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
  1036. bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
  1037. bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
  1038. bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
  1039. bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
  1040. bm_counts[j]++;
  1041. packed_idx++;
  1042. }
  1043. }
  1044. bsd.block_mode_count_1plane_always = 0; // Skipped for 3D modes
  1045. bsd.block_mode_count_1plane_selected = bm_counts[0];
  1046. bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
  1047. bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
  1048. // Determine the texels to use for kmeans clustering.
  1049. assign_kmeans_texels(bsd);
  1050. delete wb;
  1051. }
  1052. /* See header for documentation. */
  1053. void init_block_size_descriptor(
  1054. unsigned int x_texels,
  1055. unsigned int y_texels,
  1056. unsigned int z_texels,
  1057. bool can_omit_modes,
  1058. unsigned int partition_count_cutoff,
  1059. float mode_cutoff,
  1060. block_size_descriptor& bsd
  1061. ) {
  1062. if (z_texels > 1)
  1063. {
  1064. construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
  1065. }
  1066. else
  1067. {
  1068. construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
  1069. }
  1070. init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
  1071. }