avpcl_mode5.cpp 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216
  1. /*
  2. Copyright 2007 nVidia, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
  5. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
  6. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  7. See the License for the specific language governing permissions and limitations under the License.
  8. */
  9. // Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
  10. // x100000 2r 777x2 8x2 2bi 2bi
  11. #include "bits.h"
  12. #include "tile.h"
  13. #include "avpcl.h"
  14. #include "nvcore/debug.h"
  15. #include "nvmath/vector.inl"
  16. #include "nvmath/matrix.inl"
  17. #include "nvmath/fitting.h"
  18. #include "avpcl_utils.h"
  19. #include "endpts.h"
  20. #include <string.h>
  21. #include <float.h>
  22. using namespace nv;
  23. using namespace AVPCL;
  24. // there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
  25. // array 0 is always the RGB array and array 1 is always the A array
  26. #define NINDEXARRAYS 2
  27. #define INDEXARRAY_RGB 0
  28. #define INDEXARRAY_A 1
  29. #define INDEXARRAY_2BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
  30. #define INDEXARRAY_3BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
  31. #define NINDICES3 4
  32. #define INDEXBITS3 2
  33. #define HIGH_INDEXBIT3 (1<<(INDEXBITS3-1))
  34. #define DENOM3 (NINDICES3-1)
  35. #define BIAS3 (DENOM3/2)
  36. #define NINDICES2 4
  37. #define INDEXBITS2 2
  38. #define HIGH_INDEXBIT2 (1<<(INDEXBITS2-1))
  39. #define DENOM2 (NINDICES2-1)
  40. #define BIAS2 (DENOM2/2)
  41. #define NINDICES_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
  42. #define INDEXBITS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
  43. #define HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
  44. #define DENOM_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
  45. #define BIAS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
  46. #define NINDICES_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
  47. #define INDEXBITS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
  48. #define HIGH_INDEXBIT_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
  49. #define DENOM_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
  50. #define BIAS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
  51. #define NSHAPES 1
  52. static int shapes[NSHAPES] =
  53. {
  54. 0x0000,
  55. };
  56. #define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
  57. #define NREGIONS 1 // keep the region stuff in just in case...
  58. // encoded index compression location: region 0 is always at 0,0.
  59. #define NBITSIZES 2 // one endpoint pair
  60. struct ChanBits
  61. {
  62. int nbitsizes[NBITSIZES]; // bitsizes for one channel
  63. };
  64. struct Pattern
  65. {
  66. ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel
  67. int transform_mode; // x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
  68. int mode; // associated mode value
  69. int modebits; // number of mode bits
  70. const char *encoding; // verilog description of encoding for this mode
  71. };
  72. #define TRANSFORM_MODE_ALPHA 1
  73. #define TRANSFORM_MODE_RGB 2
  74. #define NPATTERNS 1
  75. static Pattern patterns[NPATTERNS] =
  76. {
  77. // red green blue alpha xfm mode mb encoding
  78. 7,7, 7,7, 7,7, 8,8, 0x0, 0x20, 6, "",
  79. };
  80. struct RegionPrec
  81. {
  82. int endpt_a_prec[NCHANNELS_RGBA];
  83. int endpt_b_prec[NCHANNELS_RGBA];
  84. };
  85. struct PatternPrec
  86. {
  87. RegionPrec region_precs[NREGIONS];
  88. };
  89. // this is the precision for each channel and region
  90. // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
  91. static PatternPrec pattern_precs[NPATTERNS] =
  92. {
  93. 7,7,7,8, 7,7,7,8,
  94. };
  95. // return # of bits needed to store n. handle signed or unsigned cases properly
  96. static int nbits(int n, bool issigned)
  97. {
  98. int nb;
  99. if (n==0)
  100. return 0; // no bits needed for 0, signed or not
  101. else if (n > 0)
  102. {
  103. for (nb=0; n; ++nb, n>>=1) ;
  104. return nb + (issigned?1:0);
  105. }
  106. else
  107. {
  108. nvAssert (issigned);
  109. for (nb=0; n<-1; ++nb, n>>=1) ;
  110. return nb + 1;
  111. }
  112. }
  113. #define R_0 ep[0].A[i]
  114. #define R_1 ep[0].B[i]
  115. static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
  116. {
  117. int i;
  118. if (transform_mode & TRANSFORM_MODE_RGB)
  119. for (i=CHANNEL_R; i<CHANNEL_A; ++i)
  120. R_1 -= R_0;
  121. if (transform_mode & TRANSFORM_MODE_ALPHA)
  122. {
  123. i = CHANNEL_A;
  124. R_1 -= R_0;
  125. }
  126. }
  127. static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
  128. {
  129. int i;
  130. if (transform_mode & TRANSFORM_MODE_RGB)
  131. for (i=CHANNEL_R; i<CHANNEL_A; ++i)
  132. R_1 += R_0;
  133. if (transform_mode & TRANSFORM_MODE_ALPHA)
  134. {
  135. i = CHANNEL_A;
  136. R_1 += R_0;
  137. }
  138. }
  139. static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
  140. {
  141. for (int region = 0; region < NREGIONS; ++region)
  142. {
  143. q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
  144. q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
  145. q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
  146. q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
  147. q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
  148. q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
  149. q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
  150. q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
  151. }
  152. }
  153. // swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
  154. // index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
  155. static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
  156. {
  157. int index_positions[NREGIONS];
  158. index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0
  159. for (int region = 0; region < NREGIONS; ++region)
  160. {
  161. int x = index_positions[region] & 3;
  162. int y = (index_positions[region] >> 2) & 3;
  163. nvAssert(REGION(x,y,shapeindex) == region); // double check the table
  164. // swap RGB
  165. if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
  166. {
  167. // high bit is set, swap the endpts and indices for this region
  168. int t;
  169. for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
  170. for (int y = 0; y < Tile::TILE_H; y++)
  171. for (int x = 0; x < Tile::TILE_W; x++)
  172. if (REGION(x,y,shapeindex) == region)
  173. indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
  174. }
  175. // swap A
  176. if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
  177. {
  178. // high bit is set, swap the endpts and indices for this region
  179. int t;
  180. for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
  181. for (int y = 0; y < Tile::TILE_H; y++)
  182. for (int x = 0; x < Tile::TILE_W; x++)
  183. if (REGION(x,y,shapeindex) == region)
  184. indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
  185. }
  186. }
  187. }
  188. static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
  189. {
  190. return true;
  191. }
  192. static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
  193. {
  194. // ignore shapeindex
  195. out.write(p.mode, p.modebits);
  196. out.write(rotatemode, ROTATEMODE_BITS);
  197. // out.write(indexmode, INDEXMODE_BITS);
  198. for (int i=0; i<NREGIONS; ++i)
  199. for (int j=0; j<NCHANNELS_RGBA; ++j)
  200. {
  201. out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
  202. out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
  203. }
  204. nvAssert (out.getptr() == 66);
  205. }
  206. static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
  207. {
  208. int mode = AVPCL::getmode(in);
  209. pat_index = 0;
  210. nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
  211. nvAssert (in.getptr() == patterns[pat_index].modebits);
  212. p = patterns[pat_index];
  213. shapeindex = 0; // we don't have any
  214. rotatemode = in.read(ROTATEMODE_BITS);
  215. indexmode = 0; // we don't have any
  216. for (int i=0; i<NREGIONS; ++i)
  217. for (int j=0; j<NCHANNELS_RGBA; ++j)
  218. {
  219. endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
  220. endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
  221. }
  222. nvAssert (in.getptr() == 66);
  223. }
  224. static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
  225. {
  226. // the indices we shorten is always index 0
  227. // do the 2 bit indices first
  228. nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
  229. for (int i = 0; i < Tile::TILE_TOTAL; ++i)
  230. out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0]
  231. // then the 3 bit indices
  232. nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
  233. for (int i = 0; i < Tile::TILE_TOTAL; ++i)
  234. out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0]
  235. }
  236. static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
  237. {
  238. // the indices we shorten is always index 0
  239. // do the 2 bit indices first
  240. for (int i = 0; i < Tile::TILE_TOTAL; ++i)
  241. indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0)); // read i..[1:0] or i..[0]
  242. // then the 3 bit indices
  243. for (int i = 0; i < Tile::TILE_TOTAL; ++i)
  244. indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0)); // read i..[1:0] or i..[0]
  245. }
  246. static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
  247. {
  248. Bits out(block, AVPCL::BITSIZE);
  249. write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
  250. write_indices(indices, shapeindex, indexmode, out);
  251. nvAssert(out.getptr() == AVPCL::BITSIZE);
  252. }
  253. static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
  254. {
  255. // scale endpoints for RGB
  256. int a, b;
  257. a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]);
  258. b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
  259. // interpolate R
  260. for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
  261. palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
  262. a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]);
  263. b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
  264. // interpolate G
  265. for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
  266. palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
  267. a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]);
  268. b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
  269. // interpolate B
  270. for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
  271. palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
  272. a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]);
  273. b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
  274. // interpolate A
  275. for (int i = 0; i < NINDICES_A(indexmode); ++i)
  276. palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
  277. }
  278. static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
  279. {
  280. for (int i=0; i<NCHANNELS_RGBA; ++i)
  281. {
  282. if (p.transform_mode)
  283. {
  284. // endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
  285. endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
  286. endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
  287. endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
  288. }
  289. }
  290. }
  291. static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
  292. {
  293. out.size_x = in.size_x;
  294. out.size_y = in.size_y;
  295. for (int y=0; y<in.size_y; ++y)
  296. for (int x=0; x<in.size_x; ++x)
  297. {
  298. float t;
  299. out.data[y][x] = in.data[y][x];
  300. switch(rotatemode)
  301. {
  302. case ROTATEMODE_RGBA_RGBA: break;
  303. case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
  304. case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
  305. case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
  306. default: nvUnreachable();
  307. }
  308. }
  309. }
  310. void AVPCL::decompress_mode5(const char *block, Tile &t)
  311. {
  312. Bits in(block, AVPCL::BITSIZE);
  313. Pattern p;
  314. IntEndptsRGBA endpts[NREGIONS];
  315. int shapeindex, pat_index, rotatemode, indexmode;
  316. read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
  317. sign_extend(p, endpts);
  318. if (p.transform_mode)
  319. transform_inverse(p.transform_mode, endpts);
  320. Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2
  321. float palette_a[NREGIONS][NINDICES3]; // could be nindices2
  322. for (int region = 0; region < NREGIONS; ++region)
  323. generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
  324. int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
  325. read_indices(in, shapeindex, indexmode, indices);
  326. nvAssert(in.getptr() == AVPCL::BITSIZE);
  327. Tile temp(t.size_x, t.size_y);
  328. // lookup
  329. for (int y = 0; y < Tile::TILE_H; y++)
  330. for (int x = 0; x < Tile::TILE_W; x++)
  331. temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
  332. rotate_tile(temp, rotatemode, t);
  333. }
  334. // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
  335. // we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
  336. // exceeds what we already have
  337. static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
  338. {
  339. Vector3 palette_rgb[NINDICES3]; // could be nindices2
  340. float palette_a[NINDICES3]; // could be nindices2
  341. float toterr = 0;
  342. generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
  343. Vector3 rgb;
  344. float a;
  345. for (int i = 0; i < np; ++i)
  346. {
  347. float err, besterr;
  348. float palette_alpha = 0, tile_alpha = 0;
  349. if(AVPCL::flag_premult)
  350. tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
  351. (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
  352. (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
  353. rgb.x = (colors[i]).x;
  354. rgb.y = (colors[i]).y;
  355. rgb.z = (colors[i]).z;
  356. a = (colors[i]).w;
  357. // compute the two indices separately
  358. // if we're doing premultiplied alpha, we need to choose first the index that
  359. // determines the alpha value, and then do the other index
  360. if (rotatemode == ROTATEMODE_RGBA_RGBA)
  361. {
  362. // do A index first as it has the alpha
  363. besterr = FLT_MAX;
  364. for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
  365. {
  366. err = Utils::metric1(a, palette_a[j], rotatemode);
  367. if (err > besterr) // error increased, so we're done searching
  368. break;
  369. if (err < besterr)
  370. {
  371. besterr = err;
  372. palette_alpha = palette_a[j];
  373. indices[INDEXARRAY_A][i] = j;
  374. }
  375. }
  376. toterr += besterr; // squared-error norms are additive since we don't do the square root
  377. // do RGB index
  378. besterr = FLT_MAX;
  379. for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
  380. {
  381. err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
  382. Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
  383. if (err > besterr) // error increased, so we're done searching
  384. break;
  385. if (err < besterr)
  386. {
  387. besterr = err;
  388. indices[INDEXARRAY_RGB][i] = j;
  389. }
  390. }
  391. toterr += besterr;
  392. if (toterr > current_besterr)
  393. {
  394. // fill out bogus index values so it's initialized at least
  395. for (int k = i; k < np; ++k)
  396. {
  397. indices[INDEXARRAY_RGB][k] = -1;
  398. indices[INDEXARRAY_A][k] = -1;
  399. }
  400. return FLT_MAX;
  401. }
  402. }
  403. else
  404. {
  405. // do RGB index
  406. besterr = FLT_MAX;
  407. int bestindex;
  408. for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
  409. {
  410. err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
  411. Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
  412. if (err > besterr) // error increased, so we're done searching
  413. break;
  414. if (err < besterr)
  415. {
  416. besterr = err;
  417. bestindex = j;
  418. indices[INDEXARRAY_RGB][i] = j;
  419. }
  420. }
  421. palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
  422. (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
  423. (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
  424. toterr += besterr;
  425. // do A index
  426. besterr = FLT_MAX;
  427. for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
  428. {
  429. err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
  430. Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
  431. if (err > besterr) // error increased, so we're done searching
  432. break;
  433. if (err < besterr)
  434. {
  435. besterr = err;
  436. indices[INDEXARRAY_A][i] = j;
  437. }
  438. }
  439. toterr += besterr; // squared-error norms are additive since we don't do the square root
  440. if (toterr > current_besterr)
  441. {
  442. // fill out bogus index values so it's initialized at least
  443. for (int k = i; k < np; ++k)
  444. {
  445. indices[INDEXARRAY_RGB][k] = -1;
  446. indices[INDEXARRAY_A][k] = -1;
  447. }
  448. return FLT_MAX;
  449. }
  450. }
  451. }
  452. return toterr;
  453. }
  454. // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
  455. static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec,
  456. int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
  457. {
  458. Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2
  459. float palette_a[NREGIONS][NINDICES3]; // could be nindices2
  460. for (int region = 0; region < NREGIONS; ++region)
  461. {
  462. generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
  463. toterr[region] = 0;
  464. }
  465. Vector3 rgb;
  466. float a;
  467. for (int y = 0; y < tile.size_y; y++)
  468. for (int x = 0; x < tile.size_x; x++)
  469. {
  470. int region = REGION(x,y,shapeindex);
  471. float err, besterr;
  472. float palette_alpha = 0, tile_alpha = 0;
  473. rgb.x = (tile.data[y][x]).x;
  474. rgb.y = (tile.data[y][x]).y;
  475. rgb.z = (tile.data[y][x]).z;
  476. a = (tile.data[y][x]).w;
  477. if(AVPCL::flag_premult)
  478. tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
  479. (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
  480. (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
  481. // compute the two indices separately
  482. // if we're doing premultiplied alpha, we need to choose first the index that
  483. // determines the alpha value, and then do the other index
  484. if (rotatemode == ROTATEMODE_RGBA_RGBA)
  485. {
  486. // do A index first as it has the alpha
  487. besterr = FLT_MAX;
  488. for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
  489. {
  490. err = Utils::metric1(a, palette_a[region][i], rotatemode);
  491. if (err > besterr) // error increased, so we're done searching
  492. break;
  493. if (err < besterr)
  494. {
  495. besterr = err;
  496. indices[INDEXARRAY_A][y][x] = i;
  497. palette_alpha = palette_a[region][i];
  498. }
  499. }
  500. toterr[region] += besterr; // squared-error norms are additive since we don't do the square root
  501. // do RGB index
  502. besterr = FLT_MAX;
  503. for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
  504. {
  505. err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
  506. Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
  507. if (err > besterr) // error increased, so we're done searching
  508. break;
  509. if (err < besterr)
  510. {
  511. besterr = err;
  512. indices[INDEXARRAY_RGB][y][x] = i;
  513. }
  514. }
  515. toterr[region] += besterr;
  516. }
  517. else
  518. {
  519. // do RGB index first as it has the alpha
  520. besterr = FLT_MAX;
  521. int bestindex;
  522. for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
  523. {
  524. err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
  525. Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
  526. if (err > besterr) // error increased, so we're done searching
  527. break;
  528. if (err < besterr)
  529. {
  530. besterr = err;
  531. indices[INDEXARRAY_RGB][y][x] = i;
  532. bestindex = i;
  533. }
  534. }
  535. palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
  536. (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
  537. (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
  538. toterr[region] += besterr;
  539. // do A index
  540. besterr = FLT_MAX;
  541. for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
  542. {
  543. err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
  544. Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
  545. if (err > besterr) // error increased, so we're done searching
  546. break;
  547. if (err < besterr)
  548. {
  549. besterr = err;
  550. indices[INDEXARRAY_A][y][x] = i;
  551. }
  552. }
  553. toterr[region] += besterr; // squared-error norms are additive since we don't do the square root
  554. }
  555. }
  556. }
  557. // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
  558. // this function returns either old_err or a value smaller (if it was successful in improving the error)
  559. static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
  560. float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
  561. {
  562. // we have the old endpoints: old_endpts
  563. // we have the perturbed endpoints: new_endpts
  564. // we have the temporary endpoints: temp_endpts
  565. IntEndptsRGBA temp_endpts;
  566. float min_err = old_err; // start with the best current error
  567. int beststep;
  568. int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
  569. for (int j=0; j<NINDEXARRAYS; ++j)
  570. for (int i=0; i<np; ++i)
  571. indices[j][i] = -1;
  572. // copy real endpoints so we can perturb them
  573. temp_endpts = new_endpts = old_endpts;
  574. int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
  575. // do a logarithmic search for the best error for this endpoint (which)
  576. for (int step = 1 << (prec-1); step; step >>= 1)
  577. {
  578. bool improved = false;
  579. for (int sign = -1; sign <= 1; sign += 2)
  580. {
  581. if (do_b == 0)
  582. {
  583. temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
  584. if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
  585. continue;
  586. }
  587. else
  588. {
  589. temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
  590. if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
  591. continue;
  592. }
  593. float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
  594. if (err < min_err)
  595. {
  596. improved = true;
  597. min_err = err;
  598. beststep = sign * step;
  599. for (int j=0; j<NINDEXARRAYS; ++j)
  600. for (int i=0; i<np; ++i)
  601. indices[j][i] = temp_indices[j][i];
  602. }
  603. }
  604. // if this was an improvement, move the endpoint and continue search from there
  605. if (improved)
  606. {
  607. if (do_b == 0)
  608. new_endpts.A[ch] += beststep;
  609. else
  610. new_endpts.B[ch] += beststep;
  611. }
  612. }
  613. return min_err;
  614. }
  615. // the larger the error the more time it is worth spending on an exhaustive search.
  616. // perturb the endpoints at least -3 to 3.
  617. // if err > 5000 perturb endpoints 50% of precision
  618. // if err > 1000 25%
  619. // if err > 200 12.5%
  620. // if err > 40 6.25%
  621. // for np = 16 -- adjust error thresholds as a function of np
  622. // always ensure endpoint ordering is preserved (no need to overlap the scan)
  623. static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
  624. {
  625. IntEndptsRGBA temp_endpts;
  626. float best_err = orig_err;
  627. int aprec = region_prec.endpt_a_prec[ch];
  628. int bprec = region_prec.endpt_b_prec[ch];
  629. int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
  630. int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
  631. for (int j=0; j<NINDEXARRAYS; ++j)
  632. for (int i=0; i<np; ++i)
  633. indices[j][i] = -1;
  634. float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
  635. if (orig_err == 0) return orig_err;
  636. int adelta = 0, bdelta = 0;
  637. if (orig_err > 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
  638. else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
  639. else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
  640. else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
  641. adelta = max(adelta, 3);
  642. bdelta = max(bdelta, 3);
  643. #ifdef DISABLE_EXHAUSTIVE
  644. adelta = bdelta = 3;
  645. #endif
  646. temp_endpts = opt_endpts;
  647. // ok figure out the range of A and B
  648. int alow = max(0, opt_endpts.A[ch] - adelta);
  649. int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
  650. int blow = max(0, opt_endpts.B[ch] - bdelta);
  651. int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
  652. // now there's no need to swap the ordering of A and B
  653. bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
  654. int amin, bmin;
  655. if (opt_endpts.A[ch] <= opt_endpts.B[ch])
  656. {
  657. // keep a <= b
  658. for (int a = alow; a <= ahigh; ++a)
  659. for (int b = max(a, blow); b < bhigh; ++b)
  660. {
  661. temp_endpts.A[ch] = a;
  662. temp_endpts.B[ch] = b;
  663. float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
  664. if (err < best_err)
  665. {
  666. amin = a;
  667. bmin = b;
  668. best_err = err;
  669. for (int j=0; j<NINDEXARRAYS; ++j)
  670. for (int i=0; i<np; ++i)
  671. good_indices[j][i] = temp_indices[j][i];
  672. }
  673. }
  674. }
  675. else
  676. {
  677. // keep b <= a
  678. for (int b = blow; b < bhigh; ++b)
  679. for (int a = max(b, alow); a <= ahigh; ++a)
  680. {
  681. temp_endpts.A[ch] = a;
  682. temp_endpts.B[ch] = b;
  683. float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
  684. if (err < best_err)
  685. {
  686. amin = a;
  687. bmin = b;
  688. best_err = err;
  689. for (int j=0; j<NINDEXARRAYS; ++j)
  690. for (int i=0; i<np; ++i)
  691. good_indices[j][i] = temp_indices[j][i];
  692. }
  693. }
  694. }
  695. if (best_err < orig_err)
  696. {
  697. opt_endpts.A[ch] = amin;
  698. opt_endpts.B[ch] = bmin;
  699. orig_err = best_err;
  700. for (int j=0; j<NINDEXARRAYS; ++j)
  701. for (int i=0; i<np; ++i)
  702. indices[j][i] = good_indices[j][i];
  703. }
  704. return best_err;
  705. }
  706. static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
  707. {
  708. float opt_err = orig_err;
  709. opt_endpts = orig_endpts;
  710. /*
  711. err0 = perturb(rgb0, delta0)
  712. err1 = perturb(rgb1, delta1)
  713. if (err0 < err1)
  714. if (err0 >= initial_error) break
  715. rgb0 += delta0
  716. next = 1
  717. else
  718. if (err1 >= initial_error) break
  719. rgb1 += delta1
  720. next = 0
  721. initial_err = map()
  722. for (;;)
  723. err = perturb(next ? rgb1:rgb0, delta)
  724. if (err >= initial_err) break
  725. next? rgb1 : rgb0 += delta
  726. initial_err = err
  727. */
  728. IntEndptsRGBA new_a, new_b;
  729. IntEndptsRGBA new_endpt;
  730. int do_b;
  731. int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
  732. int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
  733. int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
  734. int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
  735. // now optimize each channel separately
  736. for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
  737. {
  738. // figure out which endpoint when perturbed gives the most improvement and start there
  739. // if we just alternate, we can easily end up in a local minima
  740. float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
  741. float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
  742. if (err0 < err1)
  743. {
  744. if (err0 >= opt_err)
  745. continue;
  746. for (int j=0; j<NINDEXARRAYS; ++j)
  747. for (int i=0; i<np; ++i)
  748. {
  749. new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
  750. nvAssert (orig_indices[j][i] != -1);
  751. }
  752. opt_endpts.A[ch] = new_a.A[ch];
  753. opt_err = err0;
  754. do_b = 1; // do B next
  755. }
  756. else
  757. {
  758. if (err1 >= opt_err)
  759. continue;
  760. for (int j=0; j<NINDEXARRAYS; ++j)
  761. for (int i=0; i<np; ++i)
  762. {
  763. new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
  764. nvAssert (orig_indices[j][i] != -1);
  765. }
  766. opt_endpts.B[ch] = new_b.B[ch];
  767. opt_err = err1;
  768. do_b = 0; // do A next
  769. }
  770. // now alternate endpoints and keep trying until there is no improvement
  771. for (;;)
  772. {
  773. float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
  774. if (err >= opt_err)
  775. break;
  776. for (int j=0; j<NINDEXARRAYS; ++j)
  777. for (int i=0; i<np; ++i)
  778. {
  779. new_indices[j][i] = temp_indices0[j][i];
  780. nvAssert (orig_indices[j][i] != -1);
  781. }
  782. if (do_b == 0)
  783. opt_endpts.A[ch] = new_endpt.A[ch];
  784. else
  785. opt_endpts.B[ch] = new_endpt.B[ch];
  786. opt_err = err;
  787. do_b = 1 - do_b; // now move the other endpoint
  788. }
  789. // see if the indices have changed
  790. int i;
  791. for (i=0; i<np; ++i)
  792. if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
  793. break;
  794. if (i<np)
  795. ch = -1; // start over
  796. }
  797. // finally, do a small exhaustive search around what we think is the global minima to be sure
  798. bool first = true;
  799. for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
  800. {
  801. float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
  802. if (new_err < opt_err)
  803. {
  804. opt_err = new_err;
  805. if (first)
  806. {
  807. for (int j=0; j<NINDEXARRAYS; ++j)
  808. for (int i=0; i<np; ++i)
  809. {
  810. orig_indices[j][i] = temp_indices0[j][i];
  811. nvAssert (orig_indices[j][i] != -1);
  812. }
  813. first = false;
  814. }
  815. else
  816. {
  817. // see if the indices have changed
  818. int i;
  819. for (i=0; i<np; ++i)
  820. if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
  821. break;
  822. if (i<np)
  823. {
  824. ch = -1; // start over
  825. first = true;
  826. }
  827. }
  828. }
  829. }
  830. return opt_err;
  831. }
  832. static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS],
  833. const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
  834. {
  835. Vector4 pixels[Tile::TILE_TOTAL];
  836. float importance[Tile::TILE_TOTAL];
  837. IntEndptsRGBA temp_in, temp_out;
  838. for (int region=0; region<NREGIONS; ++region)
  839. {
  840. // collect the pixels in the region
  841. int np = 0;
  842. for (int y = 0; y < tile.size_y; y++) {
  843. for (int x = 0; x < tile.size_x; x++) {
  844. if (REGION(x, y, shapeindex) == region) {
  845. pixels[np] = tile.data[y][x];
  846. importance[np] = tile.importance_map[y][x];
  847. np++;
  848. }
  849. }
  850. }
  851. opt_endpts[region] = temp_in = orig_endpts[region];
  852. opt_err[region] = orig_err[region];
  853. float best_err = orig_err[region];
  854. // make sure we have a valid error for temp_in
  855. // we didn't change temp_in, so orig_err[region] is still valid
  856. float temp_in_err = orig_err[region];
  857. // now try to optimize these endpoints
  858. float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
  859. // if we find an improvement, update the best so far and correct the output endpoints and errors
  860. if (temp_out_err < best_err)
  861. {
  862. best_err = temp_out_err;
  863. opt_err[region] = temp_out_err;
  864. opt_endpts[region] = temp_out;
  865. }
  866. }
  867. }
  868. /* optimization algorithm
  869. for each pattern
  870. convert endpoints using pattern precision
  871. assign indices and get initial error
  872. compress indices (and possibly reorder endpoints)
  873. transform endpoints
  874. if transformed endpoints fit pattern
  875. get original endpoints back
  876. optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
  877. compress new indices
  878. transform new endpoints
  879. if new endpoints fit pattern AND if error is improved
  880. emit compressed block with new data
  881. else
  882. emit compressed block with original data // to try to preserve maximum endpoint precision
  883. */
  884. static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
  885. {
  886. float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
  887. IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
  888. int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
  889. for (int sp = 0; sp < NPATTERNS; ++sp)
  890. {
  891. quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
  892. assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
  893. swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
  894. if (patterns[sp].transform_mode)
  895. transform_forward(patterns[sp].transform_mode, orig_endpts);
  896. // apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
  897. // the assumption made is that if they don't fit now, they won't fit after optimizing.
  898. if (endpts_fit(orig_endpts, patterns[sp]))
  899. {
  900. if (patterns[sp].transform_mode)
  901. transform_inverse(patterns[sp].transform_mode, orig_endpts);
  902. optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
  903. assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
  904. // (nreed) Commented out asserts because they go off all the time...not sure why
  905. //for (int i=0; i<NREGIONS; ++i)
  906. // nvAssert(expected_opt_err[i] == opt_err[i]);
  907. swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
  908. if (patterns[sp].transform_mode)
  909. transform_forward(patterns[sp].transform_mode, opt_endpts);
  910. orig_toterr = opt_toterr = 0;
  911. for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
  912. if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
  913. {
  914. emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
  915. return opt_toterr;
  916. }
  917. else
  918. {
  919. // either it stopped fitting when we optimized it, or there was no improvement
  920. // so go back to the unoptimized endpoints which we know will fit
  921. if (patterns[sp].transform_mode)
  922. transform_forward(patterns[sp].transform_mode, orig_endpts);
  923. emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
  924. return orig_toterr;
  925. }
  926. }
  927. }
  928. nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 5).";
  929. return FLT_MAX;
  930. }
  931. static void clamp(Vector4 &v)
  932. {
  933. if (v.x < 0.0f) v.x = 0.0f;
  934. if (v.x > 255.0f) v.x = 255.0f;
  935. if (v.y < 0.0f) v.y = 0.0f;
  936. if (v.y > 255.0f) v.y = 255.0f;
  937. if (v.z < 0.0f) v.z = 0.0f;
  938. if (v.z > 255.0f) v.z = 255.0f;
  939. if (v.w < 0.0f) v.w = 0.0f;
  940. if (v.w > 255.0f) v.w = 255.0f;
  941. }
  942. // compute initial endpoints for the "RGB" portion and the "A" portion.
  943. // Note these channels may have been rotated.
  944. static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
  945. {
  946. for (int region=0; region<NREGIONS; ++region)
  947. {
  948. int np = 0;
  949. Vector3 colors[Tile::TILE_TOTAL];
  950. float alphas[Tile::TILE_TOTAL];
  951. Vector4 mean(0,0,0,0);
  952. for (int y = 0; y < tile.size_y; y++)
  953. for (int x = 0; x < tile.size_x; x++)
  954. if (REGION(x,y,shapeindex) == region)
  955. {
  956. colors[np] = tile.data[y][x].xyz();
  957. alphas[np] = tile.data[y][x].w;
  958. mean += tile.data[y][x];
  959. ++np;
  960. }
  961. // handle simple cases
  962. if (np == 0)
  963. {
  964. Vector4 zero(0,0,0,255.0f);
  965. endpts[region].A = zero;
  966. endpts[region].B = zero;
  967. continue;
  968. }
  969. else if (np == 1)
  970. {
  971. endpts[region].A = Vector4(colors[0], alphas[0]);
  972. endpts[region].B = Vector4(colors[0], alphas[0]);
  973. continue;
  974. }
  975. else if (np == 2)
  976. {
  977. endpts[region].A = Vector4(colors[0], alphas[0]);
  978. endpts[region].B = Vector4(colors[1], alphas[1]);
  979. continue;
  980. }
  981. mean /= float(np);
  982. Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
  983. // project each pixel value along the principal direction
  984. float minp = FLT_MAX, maxp = -FLT_MAX;
  985. float mina = FLT_MAX, maxa = -FLT_MAX;
  986. for (int i = 0; i < np; i++)
  987. {
  988. float dp = dot(colors[i]-mean.xyz(), direction);
  989. if (dp < minp) minp = dp;
  990. if (dp > maxp) maxp = dp;
  991. dp = alphas[i] - mean.w;
  992. if (dp < mina) mina = dp;
  993. if (dp > maxa) maxa = dp;
  994. }
  995. // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
  996. endpts[region].A = mean + Vector4(minp*direction, mina);
  997. endpts[region].B = mean + Vector4(maxp*direction, maxa);
  998. // clamp endpoints
  999. // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
  1000. // shape based on endpoints being clamped
  1001. clamp(endpts[region].A);
  1002. clamp(endpts[region].B);
  1003. }
  1004. }
  1005. float AVPCL::compress_mode5(const Tile &t, char *block)
  1006. {
  1007. FltEndpts endpts[NREGIONS];
  1008. char tempblock[AVPCL::BLOCKSIZE];
  1009. float msebest = FLT_MAX;
  1010. int shape = 0;
  1011. Tile t1;
  1012. // try all rotations. refine tries the 2 different indexings.
  1013. for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
  1014. {
  1015. rotate_tile(t, r, t1);
  1016. rough(t1, shape, endpts);
  1017. // for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
  1018. for (int i = 0; i < 1 && msebest > 0; ++i)
  1019. {
  1020. float mse = refine(t1, shape, r, i, endpts, tempblock);
  1021. if (mse < msebest)
  1022. {
  1023. memcpy(block, tempblock, sizeof(tempblock));
  1024. msebest = mse;
  1025. }
  1026. }
  1027. }
  1028. return msebest;
  1029. }