astc_compress_symbolic.cpp 58 KB


  1. /*----------------------------------------------------------------------------*/
  2. /**
  3. * This confidential and proprietary software may be used only as
  4. * authorised by a licensing agreement from ARM Limited
  5. * (C) COPYRIGHT 2011-2012 ARM Limited
  6. * ALL RIGHTS RESERVED
  7. *
  8. * The entire notice above must be reproduced on all authorised
  9. * copies and copies may only be made to the extent permitted
  10. * by a licensing agreement from ARM Limited.
  11. *
  12. * @brief Compress a block of colors, expressed as a symbolic block, for ASTC.
  13. */
  14. /*----------------------------------------------------------------------------*/
  15. #include "astc_codec_internals.h"
  16. #include "softfloat.h"
  17. #include <math.h>
  18. #include <string.h>
  19. #include <stdio.h>
  20. #ifdef DEBUG_CAPTURE_NAN
  21. #ifndef _GNU_SOURCE
  22. #define _GNU_SOURCE
  23. #endif
  24. #include <fenv.h>
  25. #endif
  26. #include <stdio.h>
  27. int realign_weights(astc_decode_mode decode_mode,
  28. int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb, uint8_t * weight_set8, uint8_t * plane2_weight_set8)
  29. {
  30. int i, j;
  31. // get the appropriate partition descriptor.
  32. int partition_count = scb->partition_count;
  33. const partition_info *pt = get_partition_table(xdim, ydim, zdim, partition_count);
  34. pt += scb->partition_index;
  35. // get the appropriate block descriptor
  36. const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
  37. const decimation_table *const *ixtab2 = bsd->decimation_tables;
  38. const decimation_table *it = ixtab2[bsd->block_modes[scb->block_mode].decimation_mode];
  39. int is_dual_plane = bsd->block_modes[scb->block_mode].is_dual_plane;
  40. // get quantization-parameters
  41. int weight_quantization_level = bsd->block_modes[scb->block_mode].quantization_mode;
  42. // decode the color endpoints
  43. ushort4 color_endpoint0[4];
  44. ushort4 color_endpoint1[4];
  45. int rgb_hdr[4];
  46. int alpha_hdr[4];
  47. int nan_endpoint[4];
  48. for (i = 0; i < partition_count; i++)
  49. unpack_color_endpoints(decode_mode,
  50. scb->color_formats[i], scb->color_quantization_level, scb->color_values[i], &rgb_hdr[i], &alpha_hdr[i], &nan_endpoint[i], &(color_endpoint0[i]), &(color_endpoint1[i]));
  51. float uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
  52. float uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
  53. int weight_count = it->num_weights;
  54. // read and unquantize the weights.
  55. const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
  56. for (i = 0; i < weight_count; i++)
  57. {
  58. uq_plane1_weights[i] = qat->unquantized_value_flt[weight_set8[i]];
  59. }
  60. if (is_dual_plane)
  61. {
  62. for (i = 0; i < weight_count; i++)
  63. uq_plane2_weights[i] = qat->unquantized_value_flt[plane2_weight_set8[i]];
  64. }
  65. int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1;
  66. // for each weight, unquantize the weight, use it to compute a color and a color error.
  67. // then, increment the weight until the color error stops decreasing
  68. // then, decrement the weight until the color error stops increasing
  69. #define COMPUTE_ERROR( errorvar ) \
  70. errorvar = 0.0f; \
  71. for(j=0;j<texels_to_evaluate;j++) \
  72. { \
  73. int texel = it->weight_texel[i][j]; \
  74. int partition = pt->partition_of_texel[texel]; \
  75. float plane1_weight = compute_value_of_texel_flt( texel, it, uq_plane1_weights ); \
  76. float plane2_weight = 0.0f; \
  77. if( is_dual_plane ) \
  78. plane2_weight = compute_value_of_texel_flt( texel, it, uq_plane2_weights ); \
  79. int int_plane1_weight = static_cast<int>(floor( plane1_weight*64.0f + 0.5f ) ); \
  80. int int_plane2_weight = static_cast<int>(floor( plane2_weight*64.0f + 0.5f ) ); \
  81. ushort4 lrp_color = lerp_color_int( \
  82. decode_mode, \
  83. color_endpoint0[partition], \
  84. color_endpoint1[partition], \
  85. int_plane1_weight, \
  86. int_plane2_weight, \
  87. plane2_color_component ); \
  88. float4 color = float4( lrp_color.x, lrp_color.y, lrp_color.z, lrp_color.w ); \
  89. float4 origcolor = float4( \
  90. blk->work_data[4*texel], \
  91. blk->work_data[4*texel+1], \
  92. blk->work_data[4*texel+2], \
  93. blk->work_data[4*texel+3] ); \
  94. float4 error_weight = ewb->error_weights[texel]; \
  95. float4 colordiff = color - origcolor; \
  96. errorvar += dot( colordiff*colordiff, error_weight ); \
  97. }
  98. int adjustments = 0;
  99. for (i = 0; i < weight_count; i++)
  100. {
  101. int current_wt = weight_set8[i];
  102. int texels_to_evaluate = it->weight_num_texels[i];
  103. float current_error;
  104. COMPUTE_ERROR(current_error);
  105. // increment until error starts increasing.
  106. while (1)
  107. {
  108. int next_wt = qat->next_quantized_value[current_wt];
  109. if (next_wt == current_wt)
  110. break;
  111. uq_plane1_weights[i] = qat->unquantized_value_flt[next_wt];
  112. float next_error;
  113. COMPUTE_ERROR(next_error);
  114. if (next_error < current_error)
  115. {
  116. // succeeded, increment the weight
  117. current_wt = next_wt;
  118. current_error = next_error;
  119. adjustments++;
  120. }
  121. else
  122. {
  123. // failed, back out the attempted increment
  124. uq_plane1_weights[i] = qat->unquantized_value_flt[current_wt];
  125. break;
  126. }
  127. }
  128. // decrement until error starts increasing
  129. while (1)
  130. {
  131. int prev_wt = qat->prev_quantized_value[current_wt];
  132. if (prev_wt == current_wt)
  133. break;
  134. uq_plane1_weights[i] = qat->unquantized_value_flt[prev_wt];
  135. float prev_error;
  136. COMPUTE_ERROR(prev_error);
  137. if (prev_error < current_error)
  138. {
  139. // succeeded, decrement the weight
  140. current_wt = prev_wt;
  141. current_error = prev_error;
  142. adjustments++;
  143. }
  144. else
  145. {
  146. // failed, back out the attempted decrement
  147. uq_plane1_weights[i] = qat->unquantized_value_flt[current_wt];
  148. break;
  149. }
  150. }
  151. weight_set8[i] = current_wt;
  152. }
  153. if (!is_dual_plane)
  154. return adjustments;
  155. // processing of the second plane of weights
  156. for (i = 0; i < weight_count; i++)
  157. {
  158. int current_wt = plane2_weight_set8[i];
  159. int texels_to_evaluate = it->weight_num_texels[i];
  160. float current_error;
  161. COMPUTE_ERROR(current_error);
  162. // increment until error starts increasing.
  163. while (1)
  164. {
  165. int next_wt = qat->next_quantized_value[current_wt];
  166. if (next_wt == current_wt)
  167. break;
  168. uq_plane2_weights[i] = qat->unquantized_value_flt[next_wt];
  169. float next_error;
  170. COMPUTE_ERROR(next_error);
  171. if (next_error < current_error)
  172. {
  173. // succeeded, increment the weight
  174. current_wt = next_wt;
  175. current_error = next_error;
  176. adjustments++;
  177. }
  178. else
  179. {
  180. // failed, back out the attempted increment
  181. uq_plane2_weights[i] = qat->unquantized_value_flt[current_wt];
  182. break;
  183. }
  184. }
  185. // decrement until error starts increasing
  186. while (1)
  187. {
  188. int prev_wt = qat->prev_quantized_value[current_wt];
  189. if (prev_wt == current_wt)
  190. break;
  191. uq_plane2_weights[i] = qat->unquantized_value_flt[prev_wt];
  192. float prev_error;
  193. COMPUTE_ERROR(prev_error);
  194. if (prev_error < current_error)
  195. {
  196. // succeeded, decrement the weight
  197. current_wt = prev_wt;
  198. current_error = prev_error;
  199. adjustments++;
  200. }
  201. else
  202. {
  203. // failed, back out the attempted decrement
  204. uq_plane2_weights[i] = qat->unquantized_value_flt[current_wt];
  205. break;
  206. }
  207. }
  208. plane2_weight_set8[i] = current_wt;
  209. }
  210. return adjustments;
  211. }
  212. /*
  213. function for compressing a block symbolically, given that we have already decided on a partition
  214. */
  215. static void compress_symbolic_block_fixed_partition_1_plane(astc_decode_mode decode_mode,
  216. float mode_cutoff,
  217. int max_refinement_iters,
  218. int xdim, int ydim, int zdim,
  219. int partition_count, int partition_index,
  220. const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb,
  221. compress_fixed_partition_buffers * tmpbuf)
  222. {
  223. int i, j, k;
  224. static const int free_bits_for_partition_count[5] = { 0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS };
  225. const partition_info *pi = get_partition_table(xdim, ydim, zdim, partition_count);
  226. pi += partition_index;
  227. // first, compute ideal weights and endpoint colors, under thre assumption that
  228. // there is no quantization or decimation going on.
  229. endpoints_and_weights *ei = tmpbuf->ei1;
  230. endpoints_and_weights *eix = tmpbuf->eix1;
  231. compute_endpoints_and_ideal_weights_1_plane(xdim, ydim, zdim, pi, blk, ewb, ei);
  232. // next, compute ideal weights and endpoint colors for every decimation.
  233. const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
  234. const decimation_table *const *ixtab2 = bsd->decimation_tables;
  235. // int block_mode_count = bsd->single_plane_block_mode_count;
  236. float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
  237. float *decimated_weights = tmpbuf->decimated_weights;
  238. float *flt_quantized_decimated_quantized_weights = tmpbuf->flt_quantized_decimated_quantized_weights;
  239. uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
  240. // for each decimation mode, compute an ideal set of weights
  241. // (that is, weights computed with the assumption that they are not quantized)
  242. for (i = 0; i < MAX_DECIMATION_MODES; i++)
  243. {
  244. if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_1plane[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff)
  245. continue;
  246. eix[i] = *ei;
  247. compute_ideal_weights_for_decimation_table(&(eix[i]), ixtab2[i], decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, decimated_weights + i * MAX_WEIGHTS_PER_BLOCK);
  248. }
  249. // compute maximum colors for the endpoints and ideal weights.
  250. // for each endpoint-and-ideal-weight pair, compute the smallest weight value
  251. // that will result in a color value greater than 1.
  252. float4 min_ep = float4(10, 10, 10, 10);
  253. for (i = 0; i < partition_count; i++)
  254. {
  255. #ifdef DEBUG_CAPTURE_NAN
  256. fedisableexcept(FE_DIVBYZERO | FE_INVALID);
  257. #endif
  258. float4 ep = (float4(1, 1, 1, 1) - ei->ep.endpt0[i]) / (ei->ep.endpt1[i] - ei->ep.endpt0[i]);
  259. if (ep.x > 0.5f && ep.x < min_ep.x)
  260. min_ep.x = ep.x;
  261. if (ep.y > 0.5f && ep.y < min_ep.y)
  262. min_ep.y = ep.y;
  263. if (ep.z > 0.5f && ep.z < min_ep.z)
  264. min_ep.z = ep.z;
  265. if (ep.w > 0.5f && ep.w < min_ep.w)
  266. min_ep.w = ep.w;
  267. #ifdef DEBUG_CAPTURE_NAN
  268. feenableexcept(FE_DIVBYZERO | FE_INVALID);
  269. #endif
  270. }
  271. float min_wt_cutoff = MIN(MIN(min_ep.x, min_ep.y), MIN(min_ep.z, min_ep.w));
  272. // for each mode, use the angular method to compute a shift.
  273. float weight_low_value[MAX_WEIGHT_MODES];
  274. float weight_high_value[MAX_WEIGHT_MODES];
  275. compute_angular_endpoints_1plane(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value, weight_high_value);
  276. // for each mode (which specifies a decimation and a quantization):
  277. // * compute number of bits needed for the quantized weights.
  278. // * generate an optimized set of quantized weights.
  279. // * compute quantization errors for the mode.
  280. int qwt_bitcounts[MAX_WEIGHT_MODES];
  281. float qwt_errors[MAX_WEIGHT_MODES];
  282. for (i = 0; i < MAX_WEIGHT_MODES; i++)
  283. {
  284. if (bsd->block_modes[i].permit_encode == 0 || bsd->block_modes[i].is_dual_plane != 0 || bsd->block_modes[i].percentile > mode_cutoff)
  285. {
  286. qwt_errors[i] = 1e38f;
  287. continue;
  288. }
  289. if (weight_high_value[i] > 1.02f * min_wt_cutoff)
  290. weight_high_value[i] = 1.0f;
  291. int decimation_mode = bsd->block_modes[i].decimation_mode;
  292. if (bsd->decimation_mode_percentile[decimation_mode] > mode_cutoff)
  293. ASTC_CODEC_INTERNAL_ERROR;
  294. // compute weight bitcount for the mode
  295. int bits_used_by_weights = compute_ise_bitcount(ixtab2[decimation_mode]->num_weights,
  296. (quantization_method) bsd->block_modes[i].quantization_mode);
  297. int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
  298. if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
  299. {
  300. qwt_errors[i] = 1e38f;
  301. continue;
  302. }
  303. qwt_bitcounts[i] = bitcount;
  304. // then, generate the optimized set of weights for the weight mode.
  305. compute_ideal_quantized_weights_for_decimation_table(&(eix[decimation_mode]),
  306. ixtab2[decimation_mode],
  307. weight_low_value[i], weight_high_value[i],
  308. decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode,
  309. flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
  310. u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
  311. bsd->block_modes[i].quantization_mode);
  312. // then, compute weight-errors for the weight mode.
  313. qwt_errors[i] = compute_error_of_weight_set(&(eix[decimation_mode]), ixtab2[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i);
  314. #ifdef DEBUG_PRINT_DIAGNOSTICS
  315. if (print_diagnostics)
  316. printf("Block mode %d -> weight error = %f\n", i, qwt_errors[i]);
  317. #endif
  318. }
  319. // for each weighting mode, determine the optimal combination of color endpoint encodings
  320. // and weight encodings; return results for the 4 best-looking modes.
  321. int partition_format_specifiers[4][4];
  322. int quantized_weight[4];
  323. int color_quantization_level[4];
  324. int color_quantization_level_mod[4];
  325. determine_optimal_set_of_endpoint_formats_to_use(xdim, ydim, zdim, pi, blk, ewb, &(ei->ep), -1, // used to flag that we are in single-weight mode
  326. qwt_bitcounts, qwt_errors, partition_format_specifiers, quantized_weight, color_quantization_level, color_quantization_level_mod);
  327. // then iterate over the 4 believed-to-be-best modes to find out which one is
  328. // actually best.
  329. for (i = 0; i < 4; i++)
  330. {
  331. uint8_t *u8_weight_src;
  332. int weights_to_copy;
  333. if (quantized_weight[i] < 0)
  334. {
  335. scb->error_block = 1;
  336. scb++;
  337. continue;
  338. }
  339. int decimation_mode = bsd->block_modes[quantized_weight[i]].decimation_mode;
  340. int weight_quantization_mode = bsd->block_modes[quantized_weight[i]].quantization_mode;
  341. const decimation_table *it = ixtab2[decimation_mode];
  342. #ifdef DEBUG_PRINT_DIAGNOSTICS
  343. if (print_diagnostics)
  344. {
  345. printf("Selected mode = %d\n", quantized_weight[i]);
  346. printf("Selected decimation mode = %d\n", decimation_mode);
  347. printf("Selected weight-quantization mode = %d\n", weight_quantization_mode);
  348. }
  349. #endif
  350. u8_weight_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * quantized_weight[i];
  351. weights_to_copy = it->num_weights;
  352. // recompute the ideal color endpoints before storing them.
  353. float4 rgbs_colors[4];
  354. float4 rgbo_colors[4];
  355. float2 lum_intervals[4];
  356. int l;
  357. for (l = 0; l < max_refinement_iters; l++)
  358. {
  359. recompute_ideal_colors(xdim, ydim, zdim, weight_quantization_mode, &(eix[decimation_mode].ep), rgbs_colors, rgbo_colors, lum_intervals, u8_weight_src, NULL, -1, pi, it, blk, ewb);
  360. // quantize the chosen color
  361. // store the colors for the block
  362. for (j = 0; j < partition_count; j++)
  363. {
  364. scb->color_formats[j] = pack_color_endpoints(decode_mode,
  365. eix[decimation_mode].ep.endpt0[j],
  366. eix[decimation_mode].ep.endpt1[j],
  367. rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], scb->color_values[j], color_quantization_level[i]);
  368. }
  369. // if all the color endpoint modes are the same, we get a few more
  370. // bits to store colors; let's see if we can take advantage of this:
  371. // requantize all the colors and see if the endpoint modes remain the same;
  372. // if they do, then exploit it.
  373. scb->color_formats_matched = 0;
  374. if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1]
  375. && color_quantization_level != color_quantization_level_mod)
  376. && (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3])))))
  377. {
  378. int colorvals[4][12];
  379. int color_formats_mod[4];
  380. for (j = 0; j < partition_count; j++)
  381. {
  382. color_formats_mod[j] = pack_color_endpoints(decode_mode,
  383. eix[decimation_mode].ep.endpt0[j],
  384. eix[decimation_mode].ep.endpt1[j],
  385. rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], colorvals[j], color_quantization_level_mod[i]);
  386. }
  387. if (color_formats_mod[0] == color_formats_mod[1]
  388. && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
  389. {
  390. scb->color_formats_matched = 1;
  391. for (j = 0; j < 4; j++)
  392. for (k = 0; k < 12; k++)
  393. scb->color_values[j][k] = colorvals[j][k];
  394. for (j = 0; j < 4; j++)
  395. scb->color_formats[j] = color_formats_mod[j];
  396. }
  397. }
  398. // store header fields
  399. scb->partition_count = partition_count;
  400. scb->partition_index = partition_index;
  401. scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i];
  402. scb->block_mode = quantized_weight[i];
  403. scb->error_block = 0;
  404. if (scb->color_quantization_level < 4)
  405. {
  406. scb->error_block = 1; // should never happen, but cannot prove it impossible.
  407. }
  408. // perform a final pass over the weights to try to improve them.
  409. int adjustments = realign_weights(decode_mode,
  410. xdim, ydim, zdim,
  411. blk, ewb, scb,
  412. u8_weight_src,
  413. NULL);
  414. if (adjustments == 0)
  415. break;
  416. }
  417. for (j = 0; j < weights_to_copy; j++)
  418. scb->plane1_weights[j] = u8_weight_src[j];
  419. scb++;
  420. }
  421. }
  422. static void compress_symbolic_block_fixed_partition_2_planes(astc_decode_mode decode_mode,
  423. float mode_cutoff,
  424. int max_refinement_iters,
  425. int xdim, int ydim, int zdim,
  426. int partition_count, int partition_index,
  427. int separate_component, const imageblock * blk, const error_weight_block * ewb,
  428. symbolic_compressed_block * scb,
  429. compress_fixed_partition_buffers * tmpbuf)
  430. {
  431. int i, j, k;
  432. static const int free_bits_for_partition_count[5] =
  433. { 0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS };
  434. const partition_info *pi = get_partition_table(xdim, ydim, zdim, partition_count);
  435. pi += partition_index;
  436. // first, compute ideal weights and endpoint colors
  437. endpoints_and_weights *ei1 = tmpbuf->ei1;
  438. endpoints_and_weights *ei2 = tmpbuf->ei2;
  439. endpoints_and_weights *eix1 = tmpbuf->eix1;
  440. endpoints_and_weights *eix2 = tmpbuf->eix2;
  441. compute_endpoints_and_ideal_weights_2_planes(xdim, ydim, zdim, pi, blk, ewb, separate_component, ei1, ei2);
  442. // next, compute ideal weights and endpoint colors for every decimation.
  443. const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
  444. const decimation_table *const *ixtab2 = bsd->decimation_tables;
  445. float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
  446. float *decimated_weights = tmpbuf->decimated_weights;
  447. float *flt_quantized_decimated_quantized_weights = tmpbuf->flt_quantized_decimated_quantized_weights;
  448. uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
  449. // for each decimation mode, compute an ideal set of weights
  450. for (i = 0; i < MAX_DECIMATION_MODES; i++)
  451. {
  452. if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_2planes[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff)
  453. continue;
  454. eix1[i] = *ei1;
  455. eix2[i] = *ei2;
  456. compute_ideal_weights_for_decimation_table(&(eix1[i]), ixtab2[i], decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK);
  457. compute_ideal_weights_for_decimation_table(&(eix2[i]), ixtab2[i], decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK);
  458. }
  459. // compute maximum colors for the endpoints and ideal weights.
  460. // for each endpoint-and-ideal-weight pair, compute the smallest weight value
  461. // that will result in a color value greater than 1.
  462. float4 min_ep1 = float4(10, 10, 10, 10);
  463. float4 min_ep2 = float4(10, 10, 10, 10);
  464. for (i = 0; i < partition_count; i++)
  465. {
  466. #ifdef DEBUG_CAPTURE_NAN
  467. fedisableexcept(FE_DIVBYZERO | FE_INVALID);
  468. #endif
  469. float4 ep1 = (float4(1, 1, 1, 1) - ei1->ep.endpt0[i]) / (ei1->ep.endpt1[i] - ei1->ep.endpt0[i]);
  470. if (ep1.x > 0.5f && ep1.x < min_ep1.x)
  471. min_ep1.x = ep1.x;
  472. if (ep1.y > 0.5f && ep1.y < min_ep1.y)
  473. min_ep1.y = ep1.y;
  474. if (ep1.z > 0.5f && ep1.z < min_ep1.z)
  475. min_ep1.z = ep1.z;
  476. if (ep1.w > 0.5f && ep1.w < min_ep1.w)
  477. min_ep1.w = ep1.w;
  478. float4 ep2 = (float4(1, 1, 1, 1) - ei2->ep.endpt0[i]) / (ei2->ep.endpt1[i] - ei2->ep.endpt0[i]);
  479. if (ep2.x > 0.5f && ep2.x < min_ep2.x)
  480. min_ep2.x = ep2.x;
  481. if (ep2.y > 0.5f && ep2.y < min_ep2.y)
  482. min_ep2.y = ep2.y;
  483. if (ep2.z > 0.5f && ep2.z < min_ep2.z)
  484. min_ep2.z = ep2.z;
  485. if (ep2.w > 0.5f && ep2.w < min_ep2.w)
  486. min_ep2.w = ep2.w;
  487. #ifdef DEBUG_CAPTURE_NAN
  488. feenableexcept(FE_DIVBYZERO | FE_INVALID);
  489. #endif
  490. }
  491. float min_wt_cutoff1, min_wt_cutoff2;
  492. switch (separate_component)
  493. {
  494. case 0:
  495. min_wt_cutoff2 = min_ep2.x;
  496. min_ep1.x = 1e30f;
  497. break;
  498. case 1:
  499. min_wt_cutoff2 = min_ep2.y;
  500. min_ep1.y = 1e30f;
  501. break;
  502. case 2:
  503. min_wt_cutoff2 = min_ep2.z;
  504. min_ep1.z = 1e30f;
  505. break;
  506. case 3:
  507. min_wt_cutoff2 = min_ep2.w;
  508. min_ep1.w = 1e30f;
  509. break;
  510. default:
  511. min_wt_cutoff2 = 1e30f;
  512. }
  513. min_wt_cutoff1 = MIN(MIN(min_ep1.x, min_ep1.y), MIN(min_ep1.z, min_ep1.w));
  514. float weight_low_value1[MAX_WEIGHT_MODES];
  515. float weight_high_value1[MAX_WEIGHT_MODES];
  516. float weight_low_value2[MAX_WEIGHT_MODES];
  517. float weight_high_value2[MAX_WEIGHT_MODES];
  518. compute_angular_endpoints_2planes(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value1, weight_high_value1, weight_low_value2, weight_high_value2);
  519. // for each mode (which specifies a decimation and a quantization):
  520. // * generate an optimized set of quantized weights.
  521. // * compute quantization errors for each mode
  522. // * compute number of bits needed for the quantized weights.
  523. int qwt_bitcounts[MAX_WEIGHT_MODES];
  524. float qwt_errors[MAX_WEIGHT_MODES];
  525. for (i = 0; i < MAX_WEIGHT_MODES; i++)
  526. {
  527. if (bsd->block_modes[i].permit_encode == 0 || bsd->block_modes[i].is_dual_plane != 1 || bsd->block_modes[i].percentile > mode_cutoff)
  528. {
  529. qwt_errors[i] = 1e38f;
  530. continue;
  531. }
  532. int decimation_mode = bsd->block_modes[i].decimation_mode;
  533. if (weight_high_value1[i] > 1.02f * min_wt_cutoff1)
  534. weight_high_value1[i] = 1.0f;
  535. if (weight_high_value2[i] > 1.02f * min_wt_cutoff2)
  536. weight_high_value2[i] = 1.0f;
  537. // compute weight bitcount for the mode
  538. int bits_used_by_weights = compute_ise_bitcount(2 * ixtab2[decimation_mode]->num_weights,
  539. (quantization_method) bsd->block_modes[i].quantization_mode);
  540. int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
  541. if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
  542. {
  543. qwt_errors[i] = 1e38f;
  544. continue;
  545. }
  546. qwt_bitcounts[i] = bitcount;
  547. // then, generate the optimized set of weights for the mode.
  548. compute_ideal_quantized_weights_for_decimation_table(&(eix1[decimation_mode]),
  549. ixtab2[decimation_mode],
  550. weight_low_value1[i],
  551. weight_high_value1[i],
  552. decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode),
  553. flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i),
  554. u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bsd->block_modes[i].quantization_mode);
  555. compute_ideal_quantized_weights_for_decimation_table(&(eix2[decimation_mode]),
  556. ixtab2[decimation_mode],
  557. weight_low_value2[i],
  558. weight_high_value2[i],
  559. decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode + 1),
  560. flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1),
  561. u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bsd->block_modes[i].quantization_mode);
  562. // then, compute quantization errors for the block mode.
  563. qwt_errors[i] =
  564. compute_error_of_weight_set(&(eix1[decimation_mode]),
  565. ixtab2[decimation_mode],
  566. flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i))
  567. + compute_error_of_weight_set(&(eix2[decimation_mode]), ixtab2[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1));
  568. }
  569. // decide the optimal combination of color endpoint encodings and weight encoodings.
  570. int partition_format_specifiers[4][4];
  571. int quantized_weight[4];
  572. int color_quantization_level[4];
  573. int color_quantization_level_mod[4];
  574. endpoints epm;
  575. merge_endpoints(&(ei1->ep), &(ei2->ep), separate_component, &epm);
  576. determine_optimal_set_of_endpoint_formats_to_use(xdim, ydim, zdim,
  577. pi,
  578. blk,
  579. ewb,
  580. &epm, separate_component, qwt_bitcounts, qwt_errors, partition_format_specifiers, quantized_weight, color_quantization_level, color_quantization_level_mod);
  581. for (i = 0; i < 4; i++)
  582. {
  583. if (quantized_weight[i] < 0)
  584. {
  585. scb->error_block = 1;
  586. scb++;
  587. continue;
  588. }
  589. uint8_t *u8_weight1_src;
  590. uint8_t *u8_weight2_src;
  591. int weights_to_copy;
  592. int decimation_mode = bsd->block_modes[quantized_weight[i]].decimation_mode;
  593. int weight_quantization_mode = bsd->block_modes[quantized_weight[i]].quantization_mode;
  594. const decimation_table *it = ixtab2[decimation_mode];
  595. u8_weight1_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * quantized_weight[i]);
  596. u8_weight2_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * quantized_weight[i] + 1);
  597. weights_to_copy = it->num_weights;
  598. // recompute the ideal color endpoints before storing them.
  599. merge_endpoints(&(eix1[decimation_mode].ep), &(eix2[decimation_mode].ep), separate_component, &epm);
  600. float4 rgbs_colors[4];
  601. float4 rgbo_colors[4];
  602. float2 lum_intervals[4];
  603. int l;
  604. for (l = 0; l < max_refinement_iters; l++)
  605. {
  606. recompute_ideal_colors(xdim, ydim, zdim, weight_quantization_mode, &epm, rgbs_colors, rgbo_colors, lum_intervals, u8_weight1_src, u8_weight2_src, separate_component, pi, it, blk, ewb);
  607. // store the colors for the block
  608. for (j = 0; j < partition_count; j++)
  609. {
  610. scb->color_formats[j] = pack_color_endpoints(decode_mode,
  611. epm.endpt0[j],
  612. epm.endpt1[j],
  613. rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], scb->color_values[j], color_quantization_level[i]);
  614. }
  615. scb->color_formats_matched = 0;
  616. if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1]
  617. && color_quantization_level != color_quantization_level_mod)
  618. && (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3])))))
  619. {
  620. int colorvals[4][12];
  621. int color_formats_mod[4];
  622. for (j = 0; j < partition_count; j++)
  623. {
  624. color_formats_mod[j] = pack_color_endpoints(decode_mode,
  625. epm.endpt0[j],
  626. epm.endpt1[j],
  627. rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], colorvals[j], color_quantization_level_mod[i]);
  628. }
  629. if (color_formats_mod[0] == color_formats_mod[1]
  630. && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
  631. {
  632. scb->color_formats_matched = 1;
  633. for (j = 0; j < 4; j++)
  634. for (k = 0; k < 12; k++)
  635. scb->color_values[j][k] = colorvals[j][k];
  636. for (j = 0; j < 4; j++)
  637. scb->color_formats[j] = color_formats_mod[j];
  638. }
  639. }
  640. // store header fields
  641. scb->partition_count = partition_count;
  642. scb->partition_index = partition_index;
  643. scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i];
  644. scb->block_mode = quantized_weight[i];
  645. scb->plane2_color_component = separate_component;
  646. scb->error_block = 0;
  647. if (scb->color_quantization_level < 4)
  648. {
  649. scb->error_block = 1; // should never happen, but cannot prove it impossible
  650. }
  651. int adjustments = realign_weights(decode_mode,
  652. xdim, ydim, zdim,
  653. blk, ewb, scb,
  654. u8_weight1_src,
  655. u8_weight2_src);
  656. if (adjustments == 0)
  657. break;
  658. }
  659. for (j = 0; j < weights_to_copy; j++)
  660. {
  661. scb->plane1_weights[j] = u8_weight1_src[j];
  662. scb->plane2_weights[j] = u8_weight2_src[j];
  663. }
  664. scb++;
  665. }
  666. }
  667. void expand_block_artifact_suppression(int xdim, int ydim, int zdim, error_weighting_params * ewp)
  668. {
  669. int x, y, z;
  670. float centerpos_x = (xdim - 1) * 0.5f;
  671. float centerpos_y = (ydim - 1) * 0.5f;
  672. float centerpos_z = (zdim - 1) * 0.5f;
  673. float *bef = ewp->block_artifact_suppression_expanded;
  674. for (z = 0; z < zdim; z++)
  675. for (y = 0; y < ydim; y++)
  676. for (x = 0; x < xdim; x++)
  677. {
  678. float xdif = (x - centerpos_x) / xdim;
  679. float ydif = (y - centerpos_y) / ydim;
  680. float zdif = (z - centerpos_z) / zdim;
  681. float wdif = 0.36f;
  682. float dist = sqrt(xdif * xdif + ydif * ydif + zdif * zdif + wdif * wdif);
  683. *bef = pow(dist, ewp->block_artifact_suppression);
  684. bef++;
  685. }
  686. }
  687. // Function to set error weights for each color component for each texel in a block.
  688. // Returns the sum of all the error values set.
  689. float prepare_error_weight_block(const astc_codec_image * input_image,
  690. int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, error_weight_block * ewb, error_weight_block_orig * ewbo)
  691. {
  692. int x, y, z;
  693. int idx = 0;
  694. int any_mean_stdev_weight =
  695. ewp->rgb_base_weight != 1.0 || ewp->alpha_base_weight != 1.0 || ewp->rgb_mean_weight != 0.0 || ewp->rgb_stdev_weight != 0.0 || ewp->alpha_mean_weight != 0.0 || ewp->alpha_stdev_weight != 0.0;
  696. float4 color_weights = float4(ewp->rgba_weights[0],
  697. ewp->rgba_weights[1],
  698. ewp->rgba_weights[2],
  699. ewp->rgba_weights[3]);
  700. ewb->contains_zeroweight_texels = 0;
  701. for (z = 0; z < zdim; z++)
  702. for (y = 0; y < ydim; y++)
  703. for (x = 0; x < xdim; x++)
  704. {
  705. int xpos = x + blk->xpos;
  706. int ypos = y + blk->ypos;
  707. int zpos = z + blk->zpos;
  708. if (xpos >= input_image->xsize || ypos >= input_image->ysize || zpos >= input_image->zsize)
  709. {
  710. float4 weights = float4(1e-11f, 1e-11f, 1e-11f, 1e-11f);
  711. ewb->error_weights[idx] = weights;
  712. ewb->contains_zeroweight_texels = 1;
  713. }
  714. else
  715. {
  716. float4 error_weight = float4(ewp->rgb_base_weight,
  717. ewp->rgb_base_weight,
  718. ewp->rgb_base_weight,
  719. ewp->alpha_base_weight);
  720. if (any_mean_stdev_weight)
  721. {
  722. float4 avg = input_averages[zpos][ypos][xpos];
  723. if (avg.x < 6e-5f)
  724. avg.x = 6e-5f;
  725. if (avg.y < 6e-5f)
  726. avg.y = 6e-5f;
  727. if (avg.z < 6e-5f)
  728. avg.z = 6e-5f;
  729. if (avg.w < 6e-5f)
  730. avg.w = 6e-5f;
  731. /*
  732. printf("avg: %f %f %f %f\n", avg.x, avg.y, avg.z, avg.w ); */
  733. avg = avg * avg;
  734. float4 variance = input_variances[zpos][ypos][xpos];
  735. variance = variance * variance;
  736. float favg = (avg.x + avg.y + avg.z) * (1.0f / 3.0f);
  737. float fvar = (variance.x + variance.y + variance.z) * (1.0f / 3.0f);
  738. float mixing = ewp->rgb_mean_and_stdev_mixing;
  739. avg.xyz = float3(favg, favg, favg) * mixing + avg.xyz * (1.0f - mixing);
  740. variance.xyz = float3(fvar, fvar, fvar) * mixing + variance.xyz * (1.0f - mixing);
  741. float4 stdev = float4(sqrt(MAX(variance.x, 0.0f)),
  742. sqrt(MAX(variance.y, 0.0f)),
  743. sqrt(MAX(variance.z, 0.0f)),
  744. sqrt(MAX(variance.w, 0.0f)));
  745. avg.xyz = avg.xyz * ewp->rgb_mean_weight;
  746. avg.w = avg.w * ewp->alpha_mean_weight;
  747. stdev.xyz = stdev.xyz * ewp->rgb_stdev_weight;
  748. stdev.w = stdev.w * ewp->alpha_stdev_weight;
  749. error_weight = error_weight + avg + stdev;
  750. error_weight = float4(1.0f, 1.0f, 1.0f, 1.0f) / error_weight;
  751. }
  752. if (ewp->ra_normal_angular_scale)
  753. {
  754. float x = (blk->orig_data[4 * idx] - 0.5f) * 2.0f;
  755. float y = (blk->orig_data[4 * idx + 3] - 0.5f) * 2.0f;
  756. float denom = 1.0f - x * x - y * y;
  757. if (denom < 0.1f)
  758. denom = 0.1f;
  759. denom = 1.0f / denom;
  760. error_weight.x *= 1.0f + x * x * denom;
  761. error_weight.w *= 1.0f + y * y * denom;
  762. }
  763. if (ewp->enable_rgb_scale_with_alpha)
  764. {
  765. float alpha_scale;
  766. if (ewp->alpha_radius != 0)
  767. alpha_scale = input_alpha_averages[zpos][ypos][xpos];
  768. else
  769. alpha_scale = blk->orig_data[4 * idx + 3];
  770. if (alpha_scale < 0.0001f)
  771. alpha_scale = 0.0001f;
  772. alpha_scale *= alpha_scale;
  773. error_weight.xyz = error_weight.xyz * alpha_scale;
  774. }
  775. error_weight = error_weight * color_weights;
  776. error_weight = error_weight * ewp->block_artifact_suppression_expanded[idx];
  777. // if we perform a conversion from linear to sRGB, then we multiply
  778. // the weight with the derivative of the linear->sRGB transform function.
  779. if (perform_srgb_transform)
  780. {
  781. float r = blk->orig_data[4 * idx];
  782. float g = blk->orig_data[4 * idx + 1];
  783. float b = blk->orig_data[4 * idx + 2];
  784. if (r < 0.0031308f)
  785. r = 12.92f;
  786. else
  787. r = 0.4396f * pow(r, -0.58333f);
  788. if (g < 0.0031308f)
  789. g = 12.92f;
  790. else
  791. g = 0.4396f * pow(g, -0.58333f);
  792. if (b < 0.0031308f)
  793. b = 12.92f;
  794. else
  795. b = 0.4396f * pow(b, -0.58333f);
  796. error_weight.x *= r;
  797. error_weight.y *= g;
  798. error_weight.z *= b;
  799. }
  800. /*
  801. printf("%f %f %f %f\n", error_weight.x, error_weight.y, error_weight.z, error_weight.w );
  802. */
  803. // when we loaded the block to begin with, we applied a transfer function
  804. // and computed the derivative of the transfer function. However, the
  805. // error-weight computation so far is based on the original color values,
  806. // not the transfer-function values. As such, we must multiply the
  807. // error weights by the derivative of the inverse of the transfer function,
  808. // which is equivalent to dividing by the derivative of the transfer
  809. // function.
  810. ewbo->error_weights[idx] = error_weight;
  811. error_weight.x /= (blk->deriv_data[4 * idx] * blk->deriv_data[4 * idx] * 1e-10f);
  812. error_weight.y /= (blk->deriv_data[4 * idx + 1] * blk->deriv_data[4 * idx + 1] * 1e-10f);
  813. error_weight.z /= (blk->deriv_data[4 * idx + 2] * blk->deriv_data[4 * idx + 2] * 1e-10f);
  814. error_weight.w /= (blk->deriv_data[4 * idx + 3] * blk->deriv_data[4 * idx + 3] * 1e-10f);
  815. /*
  816. printf("--> %f %f %f %f\n", error_weight.x, error_weight.y, error_weight.z, error_weight.w );
  817. */
  818. ewb->error_weights[idx] = error_weight;
  819. if (dot(error_weight, float4(1, 1, 1, 1)) < 1e-10f)
  820. ewb->contains_zeroweight_texels = 1;
  821. }
  822. idx++;
  823. }
  824. int i;
  825. float4 error_weight_sum = float4(0, 0, 0, 0);
  826. int texels_per_block = xdim * ydim * zdim;
  827. for (i = 0; i < texels_per_block; i++)
  828. {
  829. error_weight_sum = error_weight_sum + ewb->error_weights[i];
  830. ewb->texel_weight_r[i] = ewb->error_weights[i].x;
  831. ewb->texel_weight_g[i] = ewb->error_weights[i].y;
  832. ewb->texel_weight_b[i] = ewb->error_weights[i].z;
  833. ewb->texel_weight_a[i] = ewb->error_weights[i].w;
  834. ewb->texel_weight_rg[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y) * 0.5f;
  835. ewb->texel_weight_rb[i] = (ewb->error_weights[i].x + ewb->error_weights[i].z) * 0.5f;
  836. ewb->texel_weight_gb[i] = (ewb->error_weights[i].y + ewb->error_weights[i].z) * 0.5f;
  837. ewb->texel_weight_ra[i] = (ewb->error_weights[i].x + ewb->error_weights[i].w) * 0.5f;
  838. ewb->texel_weight_gba[i] = (ewb->error_weights[i].y + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.333333f;
  839. ewb->texel_weight_rba[i] = (ewb->error_weights[i].x + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.333333f;
  840. ewb->texel_weight_rga[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].w) * 0.333333f;
  841. ewb->texel_weight_rgb[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].z) * 0.333333f;
  842. ewb->texel_weight[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.25f;
  843. }
  844. return dot(error_weight_sum, float4(1, 1, 1, 1));
  845. }
  846. /*
  847. functions to analyze block statistical properties:
  848. * simple properties: * mean * variance
  849. * covariance-matrix correllation coefficients
  850. */
  851. // compute averages and covariance matrices for 4 components
  852. static void compute_covariance_matrix(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, mat4 * cov_matrix)
  853. {
  854. int i;
  855. int texels_per_block = xdim * ydim * zdim;
  856. float r_sum = 0.0f;
  857. float g_sum = 0.0f;
  858. float b_sum = 0.0f;
  859. float a_sum = 0.0f;
  860. float rr_sum = 0.0f;
  861. float gg_sum = 0.0f;
  862. float bb_sum = 0.0f;
  863. float aa_sum = 0.0f;
  864. float rg_sum = 0.0f;
  865. float rb_sum = 0.0f;
  866. float ra_sum = 0.0f;
  867. float gb_sum = 0.0f;
  868. float ga_sum = 0.0f;
  869. float ba_sum = 0.0f;
  870. float weight_sum = 0.0f;
  871. for (i = 0; i < texels_per_block; i++)
  872. {
  873. float weight = ewb->texel_weight[i];
  874. if (weight < 0.0f)
  875. ASTC_CODEC_INTERNAL_ERROR;
  876. weight_sum += weight;
  877. float r = blk->work_data[4 * i];
  878. float g = blk->work_data[4 * i + 1];
  879. float b = blk->work_data[4 * i + 2];
  880. float a = blk->work_data[4 * i + 3];
  881. r_sum += r * weight;
  882. rr_sum += r * (r * weight);
  883. rg_sum += g * (r * weight);
  884. rb_sum += b * (r * weight);
  885. ra_sum += a * (r * weight);
  886. g_sum += g * weight;
  887. gg_sum += g * (g * weight);
  888. gb_sum += b * (g * weight);
  889. ga_sum += a * (g * weight);
  890. b_sum += b * weight;
  891. bb_sum += b * (b * weight);
  892. ba_sum += a * (b * weight);
  893. a_sum += a * weight;
  894. aa_sum += a * (a * weight);
  895. }
  896. float rpt = 1.0f / MAX(weight_sum, 1e-7f);
  897. float rs = r_sum;
  898. float gs = g_sum;
  899. float bs = b_sum;
  900. float as = a_sum;
  901. cov_matrix->v[0] = float4(rr_sum - rs * rs * rpt, rg_sum - rs * gs * rpt, rb_sum - rs * bs * rpt, ra_sum - rs * as * rpt);
  902. cov_matrix->v[1] = float4(rg_sum - rs * gs * rpt, gg_sum - gs * gs * rpt, gb_sum - gs * bs * rpt, ga_sum - gs * as * rpt);
  903. cov_matrix->v[2] = float4(rb_sum - rs * bs * rpt, gb_sum - gs * bs * rpt, bb_sum - bs * bs * rpt, ba_sum - bs * as * rpt);
  904. cov_matrix->v[3] = float4(ra_sum - rs * as * rpt, ga_sum - gs * as * rpt, ba_sum - bs * as * rpt, aa_sum - as * as * rpt);
  905. }
  906. void prepare_block_statistics(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, int *is_normal_map, float *lowest_correl)
  907. {
  908. int i;
  909. mat4 cov_matrix;
  910. compute_covariance_matrix(xdim, ydim, zdim, blk, ewb, &cov_matrix);
  911. // use the covariance matrix to compute
  912. // correllation coefficients
  913. float rr_var = cov_matrix.v[0].x;
  914. float gg_var = cov_matrix.v[1].y;
  915. float bb_var = cov_matrix.v[2].z;
  916. float aa_var = cov_matrix.v[3].w;
  917. float rg_correlation = cov_matrix.v[0].y / sqrt(MAX(rr_var * gg_var, 1e-30f));
  918. float rb_correlation = cov_matrix.v[0].z / sqrt(MAX(rr_var * bb_var, 1e-30f));
  919. float ra_correlation = cov_matrix.v[0].w / sqrt(MAX(rr_var * aa_var, 1e-30f));
  920. float gb_correlation = cov_matrix.v[1].z / sqrt(MAX(gg_var * bb_var, 1e-30f));
  921. float ga_correlation = cov_matrix.v[1].w / sqrt(MAX(gg_var * aa_var, 1e-30f));
  922. float ba_correlation = cov_matrix.v[2].w / sqrt(MAX(bb_var * aa_var, 1e-30f));
  923. if (astc_isnan(rg_correlation))
  924. rg_correlation = 1.0f;
  925. if (astc_isnan(rb_correlation))
  926. rb_correlation = 1.0f;
  927. if (astc_isnan(ra_correlation))
  928. ra_correlation = 1.0f;
  929. if (astc_isnan(gb_correlation))
  930. gb_correlation = 1.0f;
  931. if (astc_isnan(ga_correlation))
  932. ga_correlation = 1.0f;
  933. if (astc_isnan(ba_correlation))
  934. ba_correlation = 1.0f;
  935. float lowest_correlation = MIN(fabs(rg_correlation), fabs(rb_correlation));
  936. lowest_correlation = MIN(lowest_correlation, fabs(ra_correlation));
  937. lowest_correlation = MIN(lowest_correlation, fabs(gb_correlation));
  938. lowest_correlation = MIN(lowest_correlation, fabs(ga_correlation));
  939. lowest_correlation = MIN(lowest_correlation, fabs(ba_correlation));
  940. *lowest_correl = lowest_correlation;
  941. // compute a "normal-map" factor
  942. // this factor should be exactly 0.0 for a normal map, while it may be all over the
  943. // place for anything that is NOT a normal map. We can probably assume that a factor
  944. // of less than 0.2f represents a normal map.
  945. float nf_sum = 0.0f;
  946. int texels_per_block = xdim * ydim * zdim;
  947. for (i = 0; i < texels_per_block; i++)
  948. {
  949. float3 val = float3(blk->orig_data[4 * i],
  950. blk->orig_data[4 * i + 1],
  951. blk->orig_data[4 * i + 2]);
  952. val = (val - float3(0.5f, 0.5f, 0.5f)) * 2.0f;
  953. float length_squared = dot(val, val);
  954. float nf = fabs(length_squared - 1.0f);
  955. nf_sum += nf;
  956. }
  957. float nf_avg = nf_sum / texels_per_block;
  958. *is_normal_map = nf_avg < 0.2;
  959. }
  960. void compress_constant_color_block(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb)
  961. {
  962. int texel_count = xdim * ydim * zdim;
  963. int i;
  964. float4 color_sum = float4(0, 0, 0, 0);
  965. float4 color_weight_sum = float4(0, 0, 0, 0);
  966. const float *clp = blk->work_data;
  967. for (i = 0; i < texel_count; i++)
  968. {
  969. float4 weights = ewb->error_weights[i];
  970. float4 color_data = float4(clp[4 * i], clp[4 * i + 1], clp[4 * i + 2], clp[4 * i + 3]);
  971. color_sum = color_sum + (color_data * weights);
  972. color_weight_sum = color_weight_sum + weights;
  973. }
  974. float4 avg_color = color_sum / color_weight_sum;
  975. int use_fp16 = blk->rgb_lns[0];
  976. #ifdef DEBUG_PRINT_DIAGNOSTICS
  977. if (print_diagnostics)
  978. {
  979. printf("Averaged color: %f %f %f %f\n", avg_color.x, avg_color.y, avg_color.z, avg_color.w);
  980. }
  981. #endif
  982. // convert the color
  983. if (blk->rgb_lns[0])
  984. {
  985. int avg_red = static_cast < int >(floor(avg_color.x + 0.5f));
  986. int avg_green = static_cast < int >(floor(avg_color.y + 0.5f));
  987. int avg_blue = static_cast < int >(floor(avg_color.z + 0.5f));
  988. if (avg_red < 0)
  989. avg_red = 0;
  990. else if (avg_red > 65535)
  991. avg_red = 65535;
  992. if (avg_green < 0)
  993. avg_green = 0;
  994. else if (avg_green > 65535)
  995. avg_green = 65535;
  996. if (avg_blue < 0)
  997. avg_blue = 0;
  998. else if (avg_blue > 65535)
  999. avg_blue = 65535;
  1000. avg_color.x = sf16_to_float(lns_to_sf16(avg_red));
  1001. avg_color.y = sf16_to_float(lns_to_sf16(avg_green));
  1002. avg_color.z = sf16_to_float(lns_to_sf16(avg_blue));
  1003. }
  1004. else
  1005. {
  1006. avg_color.x *= (1.0f / 65535.0f);
  1007. avg_color.y *= (1.0f / 65535.0f);
  1008. avg_color.z *= (1.0f / 65535.0f);
  1009. }
  1010. if (blk->alpha_lns[0])
  1011. {
  1012. int avg_alpha = static_cast < int >(floor(avg_color.w + 0.5f));
  1013. if (avg_alpha < 0)
  1014. avg_alpha = 0;
  1015. else if (avg_alpha > 65535)
  1016. avg_alpha = 65535;
  1017. avg_color.w = sf16_to_float(lns_to_sf16(avg_alpha));
  1018. }
  1019. else
  1020. {
  1021. avg_color.w *= (1.0f / 65535.0f);
  1022. }
  1023. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1024. if (print_diagnostics)
  1025. {
  1026. printf("Averaged color: %f %f %f %f (%d)\n", avg_color.x, avg_color.y, avg_color.z, avg_color.w, use_fp16);
  1027. }
  1028. #endif
  1029. if (use_fp16)
  1030. {
  1031. scb->error_block = 0;
  1032. scb->block_mode = -1;
  1033. scb->partition_count = 0;
  1034. scb->constant_color[0] = float_to_sf16(avg_color.x, SF_NEARESTEVEN);
  1035. scb->constant_color[1] = float_to_sf16(avg_color.y, SF_NEARESTEVEN);
  1036. scb->constant_color[2] = float_to_sf16(avg_color.z, SF_NEARESTEVEN);
  1037. scb->constant_color[3] = float_to_sf16(avg_color.w, SF_NEARESTEVEN);
  1038. }
  1039. else
  1040. {
  1041. scb->error_block = 0;
  1042. scb->block_mode = -2;
  1043. scb->partition_count = 0;
  1044. float red = avg_color.x;
  1045. float green = avg_color.y;
  1046. float blue = avg_color.z;
  1047. float alpha = avg_color.w;
  1048. if (red < 0)
  1049. red = 0;
  1050. else if (red > 1)
  1051. red = 1;
  1052. if (green < 0)
  1053. green = 0;
  1054. else if (green > 1)
  1055. green = 1;
  1056. if (blue < 0)
  1057. blue = 0;
  1058. else if (blue > 1)
  1059. blue = 1;
  1060. if (alpha < 0)
  1061. alpha = 0;
  1062. else if (alpha > 1)
  1063. alpha = 1;
  1064. scb->constant_color[0] = static_cast < int >(floor(red * 65535.0f + 0.5f));
  1065. scb->constant_color[1] = static_cast < int >(floor(green * 65535.0f + 0.5f));
  1066. scb->constant_color[2] = static_cast < int >(floor(blue * 65535.0f + 0.5f));
  1067. scb->constant_color[3] = static_cast < int >(floor(alpha * 65535.0f + 0.5f));
  1068. }
  1069. }
  1070. int block_mode_histogram[2048];
  1071. float compress_symbolic_block(const astc_codec_image * input_image,
  1072. astc_decode_mode decode_mode, int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, symbolic_compressed_block * scb,
  1073. compress_symbolic_block_buffers * tmpbuf)
  1074. {
  1075. int i, j;
  1076. int xpos = blk->xpos;
  1077. int ypos = blk->ypos;
  1078. int zpos = blk->zpos;
  1079. int x, y, z;
  1080. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1081. if (print_diagnostics)
  1082. {
  1083. printf("Diagnostics of block of dimension %d x %d x %d\n\n", xdim, ydim, zdim);
  1084. printf("XPos: %d YPos: %d ZPos: %d\n", xpos, ypos, zpos);
  1085. printf("Red-min: %f Red-max: %f\n", blk->red_min, blk->red_max);
  1086. printf("Green-min: %f Green-max: %f\n", blk->green_min, blk->green_max);
  1087. printf("Blue-min: %f Blue-max: %f\n", blk->blue_min, blk->blue_max);
  1088. printf("Alpha-min: %f Alpha-max: %f\n", blk->alpha_min, blk->alpha_max);
  1089. printf("Grayscale: %d\n", blk->grayscale);
  1090. for (z = 0; z < zdim; z++)
  1091. for (y = 0; y < ydim; y++)
  1092. for (x = 0; x < xdim; x++)
  1093. {
  1094. int idx = ((z * ydim + y) * xdim + x) * 4;
  1095. printf("Texel (%d %d %d) : orig=< %g, %g, %g, %g >, work=< %g, %g, %g, %g >\n",
  1096. x, y, z,
  1097. blk->orig_data[idx],
  1098. blk->orig_data[idx + 1], blk->orig_data[idx + 2], blk->orig_data[idx + 3], blk->work_data[idx], blk->work_data[idx + 1], blk->work_data[idx + 2], blk->work_data[idx + 3]);
  1099. }
  1100. printf("\n");
  1101. }
  1102. #endif
  1103. if (blk->red_min == blk->red_max && blk->green_min == blk->green_max && blk->blue_min == blk->blue_max && blk->alpha_min == blk->alpha_max)
  1104. {
  1105. // detected a constant-color block. Encode as FP16 if using HDR
  1106. scb->error_block = 0;
  1107. if (rgb_force_use_of_hdr)
  1108. {
  1109. scb->block_mode = -1;
  1110. scb->partition_count = 0;
  1111. scb->constant_color[0] = float_to_sf16(blk->orig_data[0], SF_NEARESTEVEN);
  1112. scb->constant_color[1] = float_to_sf16(blk->orig_data[1], SF_NEARESTEVEN);
  1113. scb->constant_color[2] = float_to_sf16(blk->orig_data[2], SF_NEARESTEVEN);
  1114. scb->constant_color[3] = float_to_sf16(blk->orig_data[3], SF_NEARESTEVEN);
  1115. }
  1116. else
  1117. {
  1118. // Encode as UNORM16 if NOT using HDR.
  1119. scb->block_mode = -2;
  1120. scb->partition_count = 0;
  1121. float red = blk->orig_data[0];
  1122. float green = blk->orig_data[1];
  1123. float blue = blk->orig_data[2];
  1124. float alpha = blk->orig_data[3];
  1125. if (red < 0)
  1126. red = 0;
  1127. else if (red > 1)
  1128. red = 1;
  1129. if (green < 0)
  1130. green = 0;
  1131. else if (green > 1)
  1132. green = 1;
  1133. if (blue < 0)
  1134. blue = 0;
  1135. else if (blue > 1)
  1136. blue = 1;
  1137. if (alpha < 0)
  1138. alpha = 0;
  1139. else if (alpha > 1)
  1140. alpha = 1;
  1141. scb->constant_color[0] = (int)floor(red * 65535.0f + 0.5f);
  1142. scb->constant_color[1] = (int)floor(green * 65535.0f + 0.5f);
  1143. scb->constant_color[2] = (int)floor(blue * 65535.0f + 0.5f);
  1144. scb->constant_color[3] = (int)floor(alpha * 65535.0f + 0.5f);
  1145. }
  1146. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1147. if (print_diagnostics)
  1148. {
  1149. printf("Block is single-color <%4.4X %4.4X %4.4X %4.4X>\n", scb->constant_color[0], scb->constant_color[1], scb->constant_color[2], scb->constant_color[3]);
  1150. }
  1151. #endif
  1152. if (print_tile_errors)
  1153. printf("0\n");
  1154. physical_compressed_block psb = symbolic_to_physical(xdim, ydim, zdim, scb);
  1155. physical_to_symbolic(xdim, ydim, zdim, psb, scb);
  1156. return 0.0f;
  1157. }
  1158. error_weight_block *ewb = tmpbuf->ewb;
  1159. error_weight_block_orig *ewbo = tmpbuf->ewbo;
  1160. float error_weight_sum = prepare_error_weight_block(input_image,
  1161. xdim, ydim, zdim,
  1162. ewp, blk, ewb, ewbo);
  1163. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1164. if (print_diagnostics)
  1165. {
  1166. printf("\n");
  1167. for (z = 0; z < zdim; z++)
  1168. for (y = 0; y < ydim; y++)
  1169. for (x = 0; x < xdim; x++)
  1170. {
  1171. int idx = (z * ydim + y) * xdim + x;
  1172. printf("ErrorWeight (%d %d %d) : < %g, %g, %g, %g >\n", x, y, z, ewb->error_weights[idx].x, ewb->error_weights[idx].y, ewb->error_weights[idx].z, ewb->error_weights[idx].w);
  1173. }
  1174. printf("\n");
  1175. }
  1176. #endif
  1177. symbolic_compressed_block *tempblocks = tmpbuf->tempblocks;
  1178. float error_of_best_block = 1e20f;
  1179. // int modesel=0;
  1180. imageblock *temp = tmpbuf->temp;
  1181. float best_errorvals_in_modes[17];
  1182. for (i = 0; i < 17; i++)
  1183. best_errorvals_in_modes[i] = 1e30f;
  1184. int uses_alpha = imageblock_uses_alpha(xdim, ydim, zdim, blk);
  1185. // compression of average-color blocks disabled for the time being;
  1186. // they produce extremely severe block artifacts.
  1187. #if 0
  1188. // first, compress an averaged-color block
  1189. compress_constant_color_block(xdim, ydim, zdim, blk, ewb, scb);
  1190. decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, scb, temp);
  1191. float avgblock_errorval = compute_imageblock_difference(xdim, ydim, zdim,
  1192. blk, temp, ewb) * 4.0f; // bias somewhat against the average-color block.
  1193. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1194. if (print_diagnostics)
  1195. {
  1196. printf("\n-----------------------------------\n");
  1197. printf("Average-color block test completed\n");
  1198. printf("Resulting error value: %g\n", avgblock_errorval);
  1199. }
  1200. #endif
  1201. if (avgblock_errorval < error_of_best_block)
  1202. {
  1203. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1204. if (print_diagnostics)
  1205. printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
  1206. #endif
  1207. error_of_best_block = avgblock_errorval;
  1208. // *scb = tempblocks[j];
  1209. modesel = 0;
  1210. }
  1211. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1212. if (print_diagnostics)
  1213. {
  1214. printf("-----------------------------------\n");
  1215. }
  1216. #endif
  1217. #endif
  1218. float mode_cutoff = ewp->block_mode_cutoff;
  1219. // next, test mode #0. This mode uses 1 plane of weights and 1 partition.
  1220. // we test it twice, first with a modecutoff of 0, then with the specified mode-cutoff.
  1221. // This causes an early-out that speeds up encoding of "easy" content.
  1222. float modecutoffs[2];
  1223. float errorval_mult[2] = { 2.5, 1 };
  1224. modecutoffs[0] = 0;
  1225. modecutoffs[1] = mode_cutoff;
  1226. #if 0
  1227. if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
  1228. goto END_OF_TESTS;
  1229. #endif
  1230. float best_errorval_in_mode;
  1231. for (i = 0; i < 2; i++)
  1232. {
  1233. compress_symbolic_block_fixed_partition_1_plane(decode_mode, modecutoffs[i], ewp->max_refinement_iters, xdim, ydim, zdim, 1, // partition count
  1234. 0, // partition index
  1235. blk, ewb, tempblocks, tmpbuf->plane1);
  1236. best_errorval_in_mode = 1e30f;
  1237. for (j = 0; j < 4; j++)
  1238. {
  1239. if (tempblocks[j].error_block)
  1240. continue;
  1241. decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
  1242. float errorval = compute_imageblock_difference(xdim, ydim, zdim,
  1243. blk, temp, ewb) * errorval_mult[i];
  1244. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1245. if (print_diagnostics)
  1246. {
  1247. printf("\n-----------------------------------\n");
  1248. printf("Single-weight partition test 0 (1 partition) completed\n");
  1249. printf("Resulting error value: %g\n", errorval);
  1250. }
  1251. #endif
  1252. if (errorval < best_errorval_in_mode)
  1253. best_errorval_in_mode = errorval;
  1254. if (errorval < error_of_best_block)
  1255. {
  1256. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1257. if (print_diagnostics)
  1258. printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
  1259. #endif
  1260. error_of_best_block = errorval;
  1261. *scb = tempblocks[j];
  1262. // modesel = 0;
  1263. }
  1264. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1265. if (print_diagnostics)
  1266. {
  1267. printf("-----------------------------------\n");
  1268. }
  1269. #endif
  1270. }
  1271. best_errorvals_in_modes[0] = best_errorval_in_mode;
  1272. if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
  1273. goto END_OF_TESTS;
  1274. }
  1275. int is_normal_map;
  1276. float lowest_correl;
  1277. prepare_block_statistics(xdim, ydim, zdim, blk, ewb, &is_normal_map, &lowest_correl);
  1278. if (is_normal_map && lowest_correl < 0.99f)
  1279. lowest_correl = 0.99f;
  1280. // next, test the four possible 1-partition, 2-planes modes
  1281. for (i = 0; i < 4; i++)
  1282. {
  1283. if (lowest_correl > ewp->lowest_correlation_cutoff)
  1284. continue;
  1285. if (blk->grayscale && i != 3)
  1286. continue;
  1287. if (!uses_alpha && i == 3)
  1288. continue;
  1289. compress_symbolic_block_fixed_partition_2_planes(decode_mode, mode_cutoff, ewp->max_refinement_iters, xdim, ydim, zdim, 1, // partition count
  1290. 0, // partition index
  1291. i, // the color component to test a separate plane of weights for.
  1292. blk, ewb, tempblocks, tmpbuf->planes2);
  1293. best_errorval_in_mode = 1e30f;
  1294. for (j = 0; j < 4; j++)
  1295. {
  1296. if (tempblocks[j].error_block)
  1297. continue;
  1298. decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
  1299. float errorval = compute_imageblock_difference(xdim, ydim, zdim,
  1300. blk, temp, ewb);
  1301. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1302. if (print_diagnostics)
  1303. {
  1304. printf("\n-----------------------------------\n");
  1305. printf("Dual-weight partition test %d (1 partition) completed\n", i);
  1306. printf("Resulting error value: %g\n", errorval);
  1307. }
  1308. #endif
  1309. if (errorval < best_errorval_in_mode)
  1310. best_errorval_in_mode = errorval;
  1311. if (errorval < error_of_best_block)
  1312. {
  1313. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1314. if (print_diagnostics)
  1315. printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
  1316. #endif
  1317. error_of_best_block = errorval;
  1318. *scb = tempblocks[j];
  1319. // modesel = i+1;
  1320. }
  1321. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1322. if (print_diagnostics)
  1323. {
  1324. printf("-----------------------------------\n");
  1325. }
  1326. #endif
  1327. best_errorvals_in_modes[i + 1] = best_errorval_in_mode;
  1328. }
  1329. if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
  1330. goto END_OF_TESTS;
  1331. }
  1332. // find best blocks for 2, 3 and 4 partitions
  1333. int partition_count;
  1334. for (partition_count = 2; partition_count <= 4; partition_count++)
  1335. {
  1336. int partition_indices_1plane[2];
  1337. int partition_indices_2planes[2];
  1338. find_best_partitionings(ewp->partition_search_limit,
  1339. xdim, ydim, zdim, partition_count, blk, ewb, 1,
  1340. &(partition_indices_1plane[0]), &(partition_indices_1plane[1]), &(partition_indices_2planes[0]));
  1341. for (i = 0; i < 2; i++)
  1342. {
  1343. compress_symbolic_block_fixed_partition_1_plane(decode_mode, mode_cutoff, ewp->max_refinement_iters, xdim, ydim, zdim, partition_count, partition_indices_1plane[i], blk, ewb, tempblocks, tmpbuf->plane1);
  1344. best_errorval_in_mode = 1e30f;
  1345. for (j = 0; j < 4; j++)
  1346. {
  1347. if (tempblocks[j].error_block)
  1348. continue;
  1349. decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
  1350. float errorval = compute_imageblock_difference(xdim, ydim, zdim,
  1351. blk, temp, ewb);
  1352. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1353. if (print_diagnostics)
  1354. {
  1355. printf("\n-----------------------------------\n");
  1356. printf("Single-weight partition test %d (%d partitions) completed\n", i, partition_count);
  1357. printf("Resulting error value: %g\n", errorval);
  1358. }
  1359. #endif
  1360. if (errorval < best_errorval_in_mode)
  1361. best_errorval_in_mode = errorval;
  1362. if (errorval < error_of_best_block)
  1363. {
  1364. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1365. if (print_diagnostics)
  1366. printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
  1367. #endif
  1368. error_of_best_block = errorval;
  1369. *scb = tempblocks[j];
  1370. // modesel = 4*(partition_count-2) + 5 + i;
  1371. }
  1372. }
  1373. best_errorvals_in_modes[4 * (partition_count - 2) + 5 + i] = best_errorval_in_mode;
  1374. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1375. if (print_diagnostics)
  1376. {
  1377. printf("-----------------------------------\n");
  1378. }
  1379. #endif
  1380. if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
  1381. goto END_OF_TESTS;
  1382. }
  1383. if (partition_count == 2 && !is_normal_map && MIN(best_errorvals_in_modes[5], best_errorvals_in_modes[6]) > (best_errorvals_in_modes[0] * ewp->partition_1_to_2_limit))
  1384. goto END_OF_TESTS;
  1385. // don't bother to check 4 partitions for dual plane of weightss, ever.
  1386. if (partition_count == 4)
  1387. break;
  1388. for (i = 0; i < 2; i++)
  1389. {
  1390. if (lowest_correl > ewp->lowest_correlation_cutoff)
  1391. continue;
  1392. compress_symbolic_block_fixed_partition_2_planes(decode_mode,
  1393. mode_cutoff,
  1394. ewp->max_refinement_iters,
  1395. xdim, ydim, zdim,
  1396. partition_count,
  1397. partition_indices_2planes[i] & (PARTITION_COUNT - 1), partition_indices_2planes[i] >> PARTITION_BITS,
  1398. blk, ewb, tempblocks, tmpbuf->planes2);
  1399. best_errorval_in_mode = 1e30f;
  1400. for (j = 0; j < 4; j++)
  1401. {
  1402. if (tempblocks[j].error_block)
  1403. continue;
  1404. decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
  1405. float errorval = compute_imageblock_difference(xdim, ydim, zdim,
  1406. blk, temp, ewb);
  1407. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1408. if (print_diagnostics)
  1409. {
  1410. printf("\n-----------------------------------\n");
  1411. printf("Dual-weight partition test %d (%d partitions) completed\n", i, partition_count);
  1412. printf("Resulting error value: %g\n", errorval);
  1413. }
  1414. #endif
  1415. if (errorval < best_errorval_in_mode)
  1416. best_errorval_in_mode = errorval;
  1417. if (errorval < error_of_best_block)
  1418. {
  1419. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1420. if (print_diagnostics)
  1421. printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
  1422. #endif
  1423. error_of_best_block = errorval;
  1424. *scb = tempblocks[j];
  1425. // modesel = 4*(partition_count-2) + 5 + 2 + i;
  1426. }
  1427. }
  1428. best_errorvals_in_modes[4 * (partition_count - 2) + 5 + 2 + i] = best_errorval_in_mode;
  1429. #ifdef DEBUG_PRINT_DIAGNOSTICS
  1430. if (print_diagnostics)
  1431. {
  1432. printf("-----------------------------------\n");
  1433. }
  1434. #endif
  1435. if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
  1436. goto END_OF_TESTS;
  1437. }
  1438. }
  1439. END_OF_TESTS:
  1440. #if 0
  1441. if (print_statistics)
  1442. {
  1443. for (i = 0; i < 13; i++)
  1444. printf("%f ", best_errorvals_in_modes[i]);
  1445. printf("%d %f %f %f ", modesel, error_of_best_block,
  1446. MIN(best_errorvals_in_modes[1], best_errorvals_in_modes[2]) / best_errorvals_in_modes[0],
  1447. MIN(MIN(best_errorvals_in_modes[7], best_errorvals_in_modes[8]), best_errorvals_in_modes[9]) / best_errorvals_in_modes[0]);
  1448. printf("\n");
  1449. }
  1450. #endif
  1451. if (scb->block_mode >= 0)
  1452. block_mode_histogram[scb->block_mode & 0x7ff]++;
  1453. // compress/decompress to a physical block
  1454. physical_compressed_block psb = symbolic_to_physical(xdim, ydim, zdim, scb);
  1455. physical_to_symbolic(xdim, ydim, zdim, psb, scb);
  1456. if (print_tile_errors)
  1457. printf("%g\n", error_of_best_block);
  1458. // mean squared error per color component.
  1459. return error_of_best_block / ((float)xdim * ydim * zdim);
  1460. }