avpcl_mode2.cpp 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004
  1. /*
  2. Copyright 2007 nVidia, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
  5. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
  6. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  7. See the License for the specific language governing permissions and limitations under the License.
  8. */
  9. // Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
  10. // x100 555x6 64p 2bi
  11. #include "bits.h"
  12. #include "tile.h"
  13. #include "avpcl.h"
  14. #include "nvcore/debug.h"
  15. #include "nvmath/vector.inl"
  16. #include "nvmath/matrix.inl"
  17. #include "nvmath/fitting.h"
  18. #include "avpcl_utils.h"
  19. #include "endpts.h"
  20. #include <string.h>
  21. #include <float.h>
  22. #include "shapes_three.h"
  23. using namespace nv;
  24. using namespace AVPCL;
  25. #define NINDICES 4
  26. #define INDEXBITS 2
  27. #define HIGH_INDEXBIT (1<<(INDEXBITS-1))
  28. #define DENOM (NINDICES-1)
  29. #define BIAS (DENOM/2)
  30. // WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
  31. // i.e. can we search shapes in a particular order so we can see the global error minima easily and
  32. // stop without having to touch all shapes?
  33. #define POS_TO_X(pos) ((pos)&3)
  34. #define POS_TO_Y(pos) (((pos)>>2)&3)
  35. #define NBITSIZES 6
  36. struct ChanBits
  37. {
  38. int nbitsizes[NBITSIZES]; // bitsizes for one channel
  39. };
  40. struct Pattern
  41. {
  42. ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel
  43. int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
  44. int mode; // associated mode value
  45. int modebits; // number of mode bits
  46. const char *encoding; // verilog description of encoding for this mode
  47. };
  48. #define NPATTERNS 1
  49. static Pattern patterns[NPATTERNS] =
  50. {
  51. // red green blue xfm mode mb
  52. 5,5,5,5,5,5, 5,5,5,5,5,5, 5,5,5,5,5,5, 0, 0x4, 3, "",
  53. };
  54. struct RegionPrec
  55. {
  56. int endpt_a_prec[NCHANNELS_RGB];
  57. int endpt_b_prec[NCHANNELS_RGB];
  58. };
  59. struct PatternPrec
  60. {
  61. RegionPrec region_precs[NREGIONS_THREE];
  62. };
  63. // this is the precision for each channel and region
  64. // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
  65. static PatternPrec pattern_precs[NPATTERNS] =
  66. {
  67. 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5,
  68. };
  69. // return # of bits needed to store n. handle signed or unsigned cases properly
  70. static int nbits(int n, bool issigned)
  71. {
  72. int nb;
  73. if (n==0)
  74. return 0; // no bits needed for 0, signed or not
  75. else if (n > 0)
  76. {
  77. for (nb=0; n; ++nb, n>>=1) ;
  78. return nb + (issigned?1:0);
  79. }
  80. else
  81. {
  82. nvAssert (issigned);
  83. for (nb=0; n<-1; ++nb, n>>=1) ;
  84. return nb + 1;
  85. }
  86. }
  87. #define R_0 ep[0].A[i]
  88. #define R_1 ep[0].B[i]
  89. #define R_2 ep[1].A[i]
  90. #define R_3 ep[1].B[i]
  91. static void transform_forward(IntEndptsRGB ep[NREGIONS])
  92. {
  93. for (int i=0; i<NCHANNELS_RGB; ++i)
  94. {
  95. R_1 -= R_3; R_2 -= R_3; R_0 -= R_3;
  96. }
  97. }
  98. static void transform_inverse(IntEndptsRGB ep[NREGIONS])
  99. {
  100. for (int i=0; i<NCHANNELS_RGB; ++i)
  101. {
  102. R_0 += R_3; R_2 += R_3; R_1 += R_3;
  103. }
  104. }
  105. static void quantize_endpts(const FltEndpts endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, IntEndptsRGB q_endpts[NREGIONS_THREE])
  106. {
  107. for (int region = 0; region < NREGIONS_THREE; ++region)
  108. {
  109. q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
  110. q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
  111. q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
  112. q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
  113. q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
  114. q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
  115. }
  116. }
  117. // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
  118. static void swap_indices(IntEndptsRGB endpts[NREGIONS_THREE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
  119. {
  120. for (int region = 0; region < NREGIONS_THREE; ++region)
  121. {
  122. int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
  123. int x = POS_TO_X(position);
  124. int y = POS_TO_Y(position);
  125. nvAssert(REGION(x,y,shapeindex) == region); // double check the table
  126. if (indices[y][x] & HIGH_INDEXBIT)
  127. {
  128. // high bit is set, swap the endpts and indices for this region
  129. int t;
  130. for (int i=0; i<NCHANNELS_RGB; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
  131. for (int y = 0; y < Tile::TILE_H; y++)
  132. for (int x = 0; x < Tile::TILE_W; x++)
  133. if (REGION(x,y,shapeindex) == region)
  134. indices[y][x] = NINDICES - 1 - indices[y][x];
  135. }
  136. }
  137. }
  138. static bool endpts_fit(IntEndptsRGB endpts[NREGIONS_THREE], const Pattern &p)
  139. {
  140. return true;
  141. }
  142. static void write_header(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex, const Pattern &p, Bits &out)
  143. {
  144. out.write(p.mode, p.modebits);
  145. out.write(shapeindex, SHAPEBITS);
  146. for (int j=0; j<NCHANNELS_RGB; ++j)
  147. for (int i=0; i<NREGIONS_THREE; ++i)
  148. {
  149. out.write(endpts[i].A[j], p.chan[j].nbitsizes[i*2+0]);
  150. out.write(endpts[i].B[j], p.chan[j].nbitsizes[i*2+1]);
  151. }
  152. nvAssert (out.getptr() == 99);
  153. }
  154. static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shapeindex, Pattern &p, int &pat_index)
  155. {
  156. int mode = AVPCL::getmode(in);
  157. pat_index = 0;
  158. nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
  159. nvAssert (in.getptr() == patterns[pat_index].modebits);
  160. shapeindex = in.read(SHAPEBITS);
  161. p = patterns[pat_index];
  162. for (int j=0; j<NCHANNELS_RGB; ++j)
  163. for (int i=0; i<NREGIONS_THREE; ++i)
  164. {
  165. endpts[i].A[j] = in.read(p.chan[j].nbitsizes[i*2+0]);
  166. endpts[i].B[j] = in.read(p.chan[j].nbitsizes[i*2+1]);
  167. }
  168. nvAssert (in.getptr() == 99);
  169. }
  170. // WORK PLACEHOLDER -- keep it simple for now
  171. static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
  172. {
  173. int positions[NREGIONS_THREE];
  174. for (int r = 0; r < NREGIONS_THREE; ++r)
  175. positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
  176. for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
  177. {
  178. int x = POS_TO_X(pos);
  179. int y = POS_TO_Y(pos);
  180. bool match = false;
  181. for (int r = 0; r < NREGIONS_THREE; ++r)
  182. if (positions[r] == pos) { match = true; break; }
  183. out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
  184. }
  185. }
  186. static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
  187. {
  188. int positions[NREGIONS_THREE];
  189. for (int r = 0; r < NREGIONS_THREE; ++r)
  190. positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
  191. for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
  192. {
  193. int x = POS_TO_X(pos);
  194. int y = POS_TO_Y(pos);
  195. bool match = false;
  196. for (int r = 0; r < NREGIONS_THREE; ++r)
  197. if (positions[r] == pos) { match = true; break; }
  198. indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
  199. }
  200. }
  201. static void emit_block(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
  202. {
  203. Bits out(block, AVPCL::BITSIZE);
  204. write_header(endpts, shapeindex, p, out);
  205. write_indices(indices, shapeindex, out);
  206. nvAssert(out.getptr() == AVPCL::BITSIZE);
  207. }
  208. static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionPrec &region_prec, Vector4 palette[NINDICES])
  209. {
  210. // scale endpoints
  211. int a, b; // really need a IntVec4...
  212. a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]);
  213. b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
  214. // interpolate
  215. for (int i = 0; i < NINDICES; ++i)
  216. palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
  217. a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]);
  218. b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
  219. // interpolate
  220. for (int i = 0; i < NINDICES; ++i)
  221. palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
  222. a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]);
  223. b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
  224. // interpolate
  225. for (int i = 0; i < NINDICES; ++i)
  226. palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
  227. // constant alpha
  228. for (int i = 0; i < NINDICES; ++i)
  229. palette[i].w = 255.0f;
  230. }
  231. // sign extend but only if it was transformed
  232. static void sign_extend(Pattern &p, IntEndptsRGB endpts[NREGIONS_THREE])
  233. {
  234. nvAssert (p.transformed != 0);
  235. for (int i=0; i<NCHANNELS_RGB; ++i)
  236. {
  237. // endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
  238. endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[1]);
  239. endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[2]);
  240. endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[3]);
  241. endpts[2].A[i] = SIGN_EXTEND(endpts[2].A[i], p.chan[i].nbitsizes[4]);
  242. endpts[2].B[i] = SIGN_EXTEND(endpts[2].B[i], p.chan[i].nbitsizes[5]);
  243. }
  244. }
  245. void AVPCL::decompress_mode2(const char *block, Tile &t)
  246. {
  247. Bits in(block, AVPCL::BITSIZE);
  248. Pattern p;
  249. IntEndptsRGB endpts[NREGIONS_THREE];
  250. int shapeindex, pat_index;
  251. read_header(in, endpts, shapeindex, p, pat_index);
  252. if (p.transformed)
  253. {
  254. sign_extend(p, endpts);
  255. transform_inverse(endpts);
  256. }
  257. Vector4 palette[NREGIONS_THREE][NINDICES];
  258. for (int r = 0; r < NREGIONS_THREE; ++r)
  259. generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
  260. int indices[Tile::TILE_H][Tile::TILE_W];
  261. read_indices(in, shapeindex, indices);
  262. nvAssert(in.getptr() == AVPCL::BITSIZE);
  263. // lookup
  264. for (int y = 0; y < Tile::TILE_H; y++)
  265. for (int x = 0; x < Tile::TILE_W; x++)
  266. t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
  267. }
  268. // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
  269. static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
  270. {
  271. Vector4 palette[NINDICES];
  272. float toterr = 0;
  273. Vector4 err;
  274. generate_palette_quantized(endpts, region_prec, palette);
  275. for (int i = 0; i < np; ++i)
  276. {
  277. float besterr = FLT_MAX;
  278. for (int j = 0; j < NINDICES && besterr > 0; ++j)
  279. {
  280. float err = Utils::metric4(colors[i], palette[j]) * importance[i];
  281. if (err > besterr) // error increased, so we're done searching
  282. break;
  283. if (err < besterr)
  284. {
  285. besterr = err;
  286. indices[i] = j;
  287. }
  288. }
  289. toterr += besterr;
  290. // check for early exit
  291. if (toterr > current_err)
  292. {
  293. // fill out bogus index values so it's initialized at least
  294. for (int k = i; k < np; ++k)
  295. indices[k] = -1;
  296. return FLT_MAX;
  297. }
  298. }
  299. return toterr;
  300. }
  301. // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
  302. static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec,
  303. int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_THREE])
  304. {
  305. // build list of possibles
  306. Vector4 palette[NREGIONS_THREE][NINDICES];
  307. for (int region = 0; region < NREGIONS_THREE; ++region)
  308. {
  309. generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
  310. toterr[region] = 0;
  311. }
  312. Vector4 err;
  313. for (int y = 0; y < tile.size_y; y++)
  314. for (int x = 0; x < tile.size_x; x++)
  315. {
  316. int region = REGION(x,y,shapeindex);
  317. float err, besterr = FLT_MAX;
  318. for (int i = 0; i < NINDICES && besterr > 0; ++i)
  319. {
  320. err = Utils::metric4(tile.data[y][x], palette[region][i]);
  321. if (err > besterr) // error increased, so we're done searching
  322. break;
  323. if (err < besterr)
  324. {
  325. besterr = err;
  326. indices[y][x] = i;
  327. }
  328. }
  329. toterr[region] += besterr;
  330. }
  331. }
  332. // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
  333. // this function returns either old_err or a value smaller (if it was successful in improving the error)
  334. static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts,
  335. float old_err, int do_b, int indices[Tile::TILE_TOTAL])
  336. {
  337. // we have the old endpoints: old_endpts
  338. // we have the perturbed endpoints: new_endpts
  339. // we have the temporary endpoints: temp_endpts
  340. IntEndptsRGB temp_endpts;
  341. float min_err = old_err; // start with the best current error
  342. int beststep;
  343. int temp_indices[Tile::TILE_TOTAL];
  344. for (int i=0; i<np; ++i)
  345. indices[i] = -1;
  346. // copy real endpoints so we can perturb them
  347. temp_endpts = new_endpts = old_endpts;
  348. int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
  349. // do a logarithmic search for the best error for this endpoint (which)
  350. for (int step = 1 << (prec-1); step; step >>= 1)
  351. {
  352. bool improved = false;
  353. for (int sign = -1; sign <= 1; sign += 2)
  354. {
  355. if (do_b == 0)
  356. {
  357. temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
  358. if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
  359. continue;
  360. }
  361. else
  362. {
  363. temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
  364. if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
  365. continue;
  366. }
  367. float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
  368. if (err < min_err)
  369. {
  370. improved = true;
  371. min_err = err;
  372. beststep = sign * step;
  373. for (int i=0; i<np; ++i)
  374. indices[i] = temp_indices[i];
  375. }
  376. }
  377. // if this was an improvement, move the endpoint and continue search from there
  378. if (improved)
  379. {
  380. if (do_b == 0)
  381. new_endpts.A[ch] += beststep;
  382. else
  383. new_endpts.B[ch] += beststep;
  384. }
  385. }
  386. return min_err;
  387. }
  388. // the larger the error the more time it is worth spending on an exhaustive search.
  389. // perturb the endpoints at least -3 to 3.
  390. // if err > 5000 perturb endpoints 50% of precision
  391. // if err > 1000 25%
  392. // if err > 200 12.5%
  393. // if err > 40 6.25%
  394. // for np = 16 -- adjust error thresholds as a function of np
  395. // always ensure endpoint ordering is preserved (no need to overlap the scan)
  396. // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
  397. static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
  398. {
  399. IntEndptsRGB temp_endpts;
  400. float best_err = orig_err;
  401. int aprec = region_prec.endpt_a_prec[ch];
  402. int bprec = region_prec.endpt_b_prec[ch];
  403. int good_indices[Tile::TILE_TOTAL];
  404. int temp_indices[Tile::TILE_TOTAL];
  405. for (int i=0; i<np; ++i)
  406. indices[i] = -1;
  407. float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
  408. if (orig_err == 0) return orig_err;
  409. int adelta = 0, bdelta = 0;
  410. if (orig_err > 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
  411. else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
  412. else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
  413. else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
  414. adelta = max(adelta, 3);
  415. bdelta = max(bdelta, 3);
  416. #ifdef DISABLE_EXHAUSTIVE
  417. adelta = bdelta = 3;
  418. #endif
  419. temp_endpts = opt_endpts;
  420. // ok figure out the range of A and B
  421. int alow = max(0, opt_endpts.A[ch] - adelta);
  422. int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
  423. int blow = max(0, opt_endpts.B[ch] - bdelta);
  424. int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
  425. // now there's no need to swap the ordering of A and B
  426. bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
  427. int amin, bmin;
  428. if (opt_endpts.A[ch] <= opt_endpts.B[ch])
  429. {
  430. // keep a <= b
  431. for (int a = alow; a <= ahigh; ++a)
  432. for (int b = max(a, blow); b < bhigh; ++b)
  433. {
  434. temp_endpts.A[ch] = a;
  435. temp_endpts.B[ch] = b;
  436. float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
  437. if (err < best_err)
  438. {
  439. amin = a;
  440. bmin = b;
  441. best_err = err;
  442. for (int i=0; i<np; ++i)
  443. good_indices[i] = temp_indices[i];
  444. }
  445. }
  446. }
  447. else
  448. {
  449. // keep b <= a
  450. for (int b = blow; b < bhigh; ++b)
  451. for (int a = max(b, alow); a <= ahigh; ++a)
  452. {
  453. temp_endpts.A[ch] = a;
  454. temp_endpts.B[ch] = b;
  455. float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
  456. if (err < best_err)
  457. {
  458. amin = a;
  459. bmin = b;
  460. best_err = err;
  461. for (int i=0; i<np; ++i)
  462. good_indices[i] = temp_indices[i];
  463. }
  464. }
  465. }
  466. if (best_err < orig_err)
  467. {
  468. opt_endpts.A[ch] = amin;
  469. opt_endpts.B[ch] = bmin;
  470. orig_err = best_err;
  471. // if we actually improved, update the indices
  472. for (int i=0; i<np; ++i)
  473. indices[i] = good_indices[i];
  474. }
  475. return best_err;
  476. }
  477. static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB &opt_endpts)
  478. {
  479. float opt_err = orig_err;
  480. opt_endpts = orig_endpts;
  481. /*
  482. err0 = perturb(rgb0, delta0)
  483. err1 = perturb(rgb1, delta1)
  484. if (err0 < err1)
  485. if (err0 >= initial_error) break
  486. rgb0 += delta0
  487. next = 1
  488. else
  489. if (err1 >= initial_error) break
  490. rgb1 += delta1
  491. next = 0
  492. initial_err = map()
  493. for (;;)
  494. err = perturb(next ? rgb1:rgb0, delta)
  495. if (err >= initial_err) break
  496. next? rgb1 : rgb0 += delta
  497. initial_err = err
  498. */
  499. IntEndptsRGB new_a, new_b;
  500. IntEndptsRGB new_endpt;
  501. int do_b;
  502. int orig_indices[Tile::TILE_TOTAL];
  503. int new_indices[Tile::TILE_TOTAL];
  504. int temp_indices0[Tile::TILE_TOTAL];
  505. int temp_indices1[Tile::TILE_TOTAL];
  506. // now optimize each channel separately
  507. // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
  508. // if they differ, we restart the loop (which then falls back to looking for a first improvement.)
  509. for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
  510. {
  511. // figure out which endpoint when perturbed gives the most improvement and start there
  512. // if we just alternate, we can easily end up in a local minima
  513. float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
  514. float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
  515. if (err0 < err1)
  516. {
  517. if (err0 >= opt_err)
  518. continue;
  519. for (int i=0; i<np; ++i)
  520. {
  521. new_indices[i] = orig_indices[i] = temp_indices0[i];
  522. nvAssert (orig_indices[i] != -1);
  523. }
  524. opt_endpts.A[ch] = new_a.A[ch];
  525. opt_err = err0;
  526. do_b = 1; // do B next
  527. }
  528. else
  529. {
  530. if (err1 >= opt_err)
  531. continue;
  532. for (int i=0; i<np; ++i)
  533. {
  534. new_indices[i] = orig_indices[i] = temp_indices1[i];
  535. nvAssert (orig_indices[i] != -1);
  536. }
  537. opt_endpts.B[ch] = new_b.B[ch];
  538. opt_err = err1;
  539. do_b = 0; // do A next
  540. }
  541. // now alternate endpoints and keep trying until there is no improvement
  542. for (;;)
  543. {
  544. float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
  545. if (err >= opt_err)
  546. break;
  547. for (int i=0; i<np; ++i)
  548. {
  549. new_indices[i] = temp_indices0[i];
  550. nvAssert (new_indices[i] != -1);
  551. }
  552. if (do_b == 0)
  553. opt_endpts.A[ch] = new_endpt.A[ch];
  554. else
  555. opt_endpts.B[ch] = new_endpt.B[ch];
  556. opt_err = err;
  557. do_b = 1 - do_b; // now move the other endpoint
  558. }
  559. // see if the indices have changed
  560. int i;
  561. for (i=0; i<np; ++i)
  562. if (orig_indices[i] != new_indices[i])
  563. break;
  564. if (i<np)
  565. ch = -1; // start over
  566. }
  567. // finally, do a small exhaustive search around what we think is the global minima to be sure
  568. // note this is independent of the above search, so we don't care about the indices from the above
  569. // we don't care about the above because if they differ, so what? we've already started at ch=0
  570. bool first = true;
  571. for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
  572. {
  573. float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
  574. if (new_err < opt_err)
  575. {
  576. opt_err = new_err;
  577. if (first)
  578. {
  579. for (int i=0; i<np; ++i)
  580. {
  581. orig_indices[i] = temp_indices0[i];
  582. nvAssert (orig_indices[i] != -1);
  583. }
  584. first = false;
  585. }
  586. else
  587. {
  588. // see if the indices have changed
  589. int i;
  590. for (i=0; i<np; ++i)
  591. if (orig_indices[i] != temp_indices0[i])
  592. break;
  593. if (i<np)
  594. {
  595. ch = -1; // start over
  596. first = true;
  597. }
  598. }
  599. }
  600. }
  601. return opt_err;
  602. }
  603. static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_THREE],
  604. const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE])
  605. {
  606. Vector4 pixels[Tile::TILE_TOTAL];
  607. float importance[Tile::TILE_TOTAL];
  608. IntEndptsRGB temp_in, temp_out;
  609. for (int region=0; region<NREGIONS_THREE; ++region)
  610. {
  611. // collect the pixels in the region
  612. int np = 0;
  613. for (int y = 0; y < tile.size_y; y++) {
  614. for (int x = 0; x < tile.size_x; x++) {
  615. if (REGION(x, y, shapeindex) == region) {
  616. pixels[np] = tile.data[y][x];
  617. importance[np] = tile.importance_map[y][x];
  618. np++;
  619. }
  620. }
  621. }
  622. opt_endpts[region] = temp_in = orig_endpts[region];
  623. opt_err[region] = orig_err[region];
  624. float best_err = orig_err[region];
  625. // make sure we have a valid error for temp_in
  626. // we didn't change temp_in, so orig_err[region] is still valid
  627. float temp_in_err = orig_err[region];
  628. // now try to optimize these endpoints
  629. float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
  630. // if we find an improvement, update the best so far and correct the output endpoints and errors
  631. if (temp_out_err < best_err)
  632. {
  633. best_err = temp_out_err;
  634. opt_err[region] = temp_out_err;
  635. opt_endpts[region] = temp_out;
  636. }
  637. }
  638. }
  639. /* optimization algorithm
  640. for each pattern
  641. convert endpoints using pattern precision
  642. assign indices and get initial error
  643. compress indices (and possibly reorder endpoints)
  644. transform endpoints
  645. if transformed endpoints fit pattern
  646. get original endpoints back
  647. optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
  648. compress new indices
  649. transform new endpoints
  650. if new endpoints fit pattern AND if error is improved
  651. emit compressed block with new data
  652. else
  653. emit compressed block with original data // to try to preserve maximum endpoint precision
  654. */
  655. static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_THREE], char *block)
  656. {
  657. float orig_err[NREGIONS_THREE], opt_err[NREGIONS_THREE], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
  658. IntEndptsRGB orig_endpts[NREGIONS_THREE], opt_endpts[NREGIONS_THREE];
  659. int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
  660. for (int sp = 0; sp < NPATTERNS; ++sp)
  661. {
  662. quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
  663. assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
  664. swap_indices(orig_endpts, orig_indices, shapeindex_best);
  665. if (patterns[sp].transformed)
  666. transform_forward(orig_endpts);
  667. // apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
  668. // the assumption made is that if they don't fit now, they won't fit after optimizing.
  669. if (endpts_fit(orig_endpts, patterns[sp]))
  670. {
  671. if (patterns[sp].transformed)
  672. transform_inverse(orig_endpts);
  673. optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
  674. assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
  675. // (nreed) Commented out asserts because they go off all the time...not sure why
  676. //for (int i=0; i<NREGIONS; ++i)
  677. // nvAssert(expected_opt_err[i] == opt_err[i]);
  678. swap_indices(opt_endpts, opt_indices, shapeindex_best);
  679. if (patterns[sp].transformed)
  680. transform_forward(opt_endpts);
  681. orig_toterr = opt_toterr = 0;
  682. for (int i=0; i < NREGIONS_THREE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
  683. if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
  684. {
  685. emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
  686. return opt_toterr;
  687. }
  688. else
  689. {
  690. // either it stopped fitting when we optimized it, or there was no improvement
  691. // so go back to the unoptimized endpoints which we know will fit
  692. if (patterns[sp].transformed)
  693. transform_forward(orig_endpts);
  694. emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
  695. return orig_toterr;
  696. }
  697. }
  698. }
  699. nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 2).";
  700. return FLT_MAX;
  701. }
  702. static void clamp(Vector4 &v)
  703. {
  704. if (v.x < 0.0f) v.x = 0.0f;
  705. if (v.x > 255.0f) v.x = 255.0f;
  706. if (v.y < 0.0f) v.y = 0.0f;
  707. if (v.y > 255.0f) v.y = 255.0f;
  708. if (v.z < 0.0f) v.z = 0.0f;
  709. if (v.z > 255.0f) v.z = 255.0f;
  710. v.w = 255.0f;
  711. }
  712. static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vector4 palette[NREGIONS_THREE][NINDICES])
  713. {
  714. for (int region = 0; region < NREGIONS_THREE; ++region)
  715. for (int i = 0; i < NINDICES; ++i)
  716. palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
  717. }
  718. // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
  719. static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE])
  720. {
  721. // build list of possibles
  722. Vector4 palette[NREGIONS_THREE][NINDICES];
  723. generate_palette_unquantized(endpts, palette);
  724. float toterr = 0;
  725. Vector4 err;
  726. for (int y = 0; y < tile.size_y; y++)
  727. for (int x = 0; x < tile.size_x; x++)
  728. {
  729. int region = REGION(x,y,shapeindex);
  730. float err, besterr = FLT_MAX;
  731. for (int i = 0; i < NINDICES && besterr > 0; ++i)
  732. {
  733. err = Utils::metric4(tile.data[y][x], palette[region][i]);
  734. if (err > besterr) // error increased, so we're done searching. this works for most norms.
  735. break;
  736. if (err < besterr)
  737. besterr = err;
  738. }
  739. toterr += besterr;
  740. }
  741. return toterr;
  742. }
  743. static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE])
  744. {
  745. for (int region=0; region<NREGIONS_THREE; ++region)
  746. {
  747. int np = 0;
  748. Vector3 colors[Tile::TILE_TOTAL];
  749. float alphas[2];
  750. Vector4 mean(0,0,0,0);
  751. for (int y = 0; y < tile.size_y; y++)
  752. for (int x = 0; x < tile.size_x; x++)
  753. if (REGION(x,y,shapeindex) == region)
  754. {
  755. colors[np] = tile.data[y][x].xyz();
  756. if (np < 2) alphas[np] = tile.data[y][x].w;
  757. mean += tile.data[y][x];
  758. ++np;
  759. }
  760. // handle simple cases
  761. if (np == 0)
  762. {
  763. Vector4 zero(0,0,0,255.0f);
  764. endpts[region].A = zero;
  765. endpts[region].B = zero;
  766. continue;
  767. }
  768. else if (np == 1)
  769. {
  770. endpts[region].A = Vector4(colors[0], alphas[0]);
  771. endpts[region].B = Vector4(colors[0], alphas[0]);
  772. continue;
  773. }
  774. else if (np == 2)
  775. {
  776. endpts[region].A = Vector4(colors[0], alphas[0]);
  777. endpts[region].B = Vector4(colors[1], alphas[1]);
  778. continue;
  779. }
  780. mean /= float(np);
  781. Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
  782. // project each pixel value along the principal direction
  783. float minp = FLT_MAX, maxp = -FLT_MAX;
  784. for (int i = 0; i < np; i++)
  785. {
  786. float dp = dot(colors[i]-mean.xyz(), direction);
  787. if (dp < minp) minp = dp;
  788. if (dp > maxp) maxp = dp;
  789. }
  790. // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
  791. endpts[region].A = mean + minp*Vector4(direction, 0);
  792. endpts[region].B = mean + maxp*Vector4(direction, 0);
  793. // clamp endpoints
  794. // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
  795. // shape based on endpoints being clamped
  796. clamp(endpts[region].A);
  797. clamp(endpts[region].B);
  798. }
  799. return map_colors(tile, shapeindex, endpts);
  800. }
  801. static void swap(float *list1, int *list2, int i, int j)
  802. {
  803. float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
  804. int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
  805. }
  806. float AVPCL::compress_mode2(const Tile &t, char *block)
  807. {
  808. // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
  809. // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
  810. const int NITEMS=NSHAPES/4;
  811. // pick the best NITEMS shapes and refine these.
  812. struct {
  813. FltEndpts endpts[NREGIONS_THREE];
  814. } all[NSHAPES];
  815. float roughmse[NSHAPES];
  816. int index[NSHAPES];
  817. char tempblock[AVPCL::BLOCKSIZE];
  818. float msebest = FLT_MAX;
  819. for (int i=0; i<NSHAPES; ++i)
  820. {
  821. roughmse[i] = rough(t, i, &all[i].endpts[0]);
  822. index[i] = i;
  823. }
  824. // bubble sort -- only need to bubble up the first NITEMS items
  825. for (int i=0; i<NITEMS; ++i)
  826. for (int j=i+1; j<NSHAPES; ++j)
  827. if (roughmse[i] > roughmse[j])
  828. swap(roughmse, index, i, j);
  829. for (int i=0; i<NITEMS && msebest>0; ++i)
  830. {
  831. int shape = index[i];
  832. float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
  833. if (mse < msebest)
  834. {
  835. memcpy(block, tempblock, sizeof(tempblock));
  836. msebest = mse;
  837. }
  838. }
  839. return msebest;
  840. }