zohone.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799
  1. /*
  2. Copyright 2007 nVidia, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
  5. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
  6. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  7. See the License for the specific language governing permissions and limitations under the License.
  8. */
  9. // one region zoh compress/decompress code
  10. // Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
  11. #include "bits.h"
  12. #include "tile.h"
  13. #include "zoh.h"
  14. #include "zoh_utils.h"
  15. #include "nvmath/vector.inl"
  16. #include "nvmath/fitting.h"
  17. #include <string.h> // strlen
  18. #include <float.h> // FLT_MAX
  19. using namespace nv;
  20. using namespace ZOH;
  21. #define NINDICES 16
  22. #define INDEXBITS 4
  23. #define HIGH_INDEXBIT (1<<(INDEXBITS-1))
  24. #define DENOM (NINDICES-1)
  25. #define NSHAPES 1
  26. static const int shapes[NSHAPES] =
  27. {
  28. 0x0000
  29. }; // only 1 shape
  30. #define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
  31. #define POS_TO_X(pos) ((pos)&3)
  32. #define POS_TO_Y(pos) (((pos)>>2)&3)
  33. #define NDELTA 2
  34. struct Chanpat
  35. {
  36. int prec[NDELTA]; // precision pattern for one channel
  37. };
  38. struct Pattern
  39. {
  40. Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel
  41. int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
  42. int mode; // associated mode value
  43. int modebits; // number of mode bits
  44. const char *encoding; // verilog description of encoding for this mode
  45. };
  46. #define MAXMODEBITS 5
  47. #define MAXMODES (1<<MAXMODEBITS)
  48. #define NPATTERNS 4
  49. static const Pattern patterns[NPATTERNS] =
  50. {
  51. 16,4, 16,4, 16,4, 1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
  52. 12,8, 12,8, 12,8, 1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
  53. 11,9, 11,9, 11,9, 1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
  54. 10,10, 10,10, 10,10, 0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
  55. };
  56. // mapping of mode to the corresponding index in pattern
  57. static const int mode_to_pat[MAXMODES] = {
  58. -1,-1,-1,
  59. 3, // 0x03
  60. -1,-1,-1,
  61. 2, // 0x07
  62. -1,-1,-1,
  63. 1, // 0x0b
  64. -1,-1,-1,
  65. 0, // 0x0f
  66. -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  67. };
  68. #define R_0(ep) (ep)[0].A[i]
  69. #define R_1(ep) (ep)[0].B[i]
  70. #define MASK(n) ((1<<(n))-1)
  71. // compress endpoints
  72. static void compress_endpts(const IntEndpts in[NREGIONS_ONE], ComprEndpts out[NREGIONS_ONE], const Pattern &p)
  73. {
  74. if (p.transformed)
  75. {
  76. for (int i=0; i<NCHANNELS; ++i)
  77. {
  78. R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
  79. R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
  80. }
  81. }
  82. else
  83. {
  84. for (int i=0; i<NCHANNELS; ++i)
  85. {
  86. R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
  87. R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
  88. }
  89. }
  90. }
  91. // decompress endpoints
  92. static void decompress_endpts(const ComprEndpts in[NREGIONS_ONE], IntEndpts out[NREGIONS_ONE], const Pattern &p)
  93. {
  94. bool issigned = Utils::FORMAT == SIGNED_F16;
  95. if (p.transformed)
  96. {
  97. for (int i=0; i<NCHANNELS; ++i)
  98. {
  99. R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
  100. int t;
  101. t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
  102. t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
  103. R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
  104. }
  105. }
  106. else
  107. {
  108. for (int i=0; i<NCHANNELS; ++i)
  109. {
  110. R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
  111. R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
  112. }
  113. }
  114. }
  115. static void quantize_endpts(const FltEndpts endpts[NREGIONS_ONE], int prec, IntEndpts q_endpts[NREGIONS_ONE])
  116. {
  117. for (int region = 0; region < NREGIONS_ONE; ++region)
  118. {
  119. q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
  120. q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
  121. q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
  122. q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
  123. q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
  124. q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
  125. }
  126. }
  127. // swap endpoints as needed to ensure that the indices at index_one and index_one have a 0 high-order bit
  128. // index_one is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
  129. static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
  130. {
  131. int index_positions[NREGIONS_ONE];
  132. index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0
  133. for (int region = 0; region < NREGIONS_ONE; ++region)
  134. {
  135. int x = index_positions[region] & 3;
  136. int y = (index_positions[region] >> 2) & 3;
  137. nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table
  138. if (indices[y][x] & HIGH_INDEXBIT)
  139. {
  140. // high bit is set, swap the endpts and indices for this region
  141. int t;
  142. for (int i=0; i<NCHANNELS; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
  143. for (int y = 0; y < Tile::TILE_H; y++)
  144. for (int x = 0; x < Tile::TILE_W; x++)
  145. if (REGION(x,y,shapeindex) == region)
  146. indices[y][x] = NINDICES - 1 - indices[y][x];
  147. }
  148. }
  149. }
  150. // endpoints fit only if the compression was lossless
  151. static bool endpts_fit(const IntEndpts orig[NREGIONS_ONE], const ComprEndpts compressed[NREGIONS_ONE], const Pattern &p)
  152. {
  153. IntEndpts uncompressed[NREGIONS_ONE];
  154. decompress_endpts(compressed, uncompressed, p);
  155. for (int j=0; j<NREGIONS_ONE; ++j)
  156. for (int i=0; i<NCHANNELS; ++i)
  157. {
  158. if (orig[j].A[i] != uncompressed[j].A[i]) return false;
  159. if (orig[j].B[i] != uncompressed[j].B[i]) return false;
  160. }
  161. return true;
  162. }
  163. static void write_header(const ComprEndpts endpts[NREGIONS_ONE], const Pattern &p, Bits &out)
  164. {
  165. // interpret the verilog backwards and process it
  166. int m = p.mode;
  167. int rw = endpts[0].A[0], rx = endpts[0].B[0];
  168. int gw = endpts[0].A[1], gx = endpts[0].B[1];
  169. int bw = endpts[0].A[2], bx = endpts[0].B[2];
  170. int ptr = int(strlen(p.encoding));
  171. while (ptr)
  172. {
  173. Field field;
  174. int endbit, len;
  175. // !!!UNDONE: get rid of string parsing!!!
  176. Utils::parse(p.encoding, ptr, field, endbit, len);
  177. switch(field)
  178. {
  179. case FIELD_M: out.write( m >> endbit, len); break;
  180. case FIELD_RW: out.write(rw >> endbit, len); break;
  181. case FIELD_RX: out.write(rx >> endbit, len); break;
  182. case FIELD_GW: out.write(gw >> endbit, len); break;
  183. case FIELD_GX: out.write(gx >> endbit, len); break;
  184. case FIELD_BW: out.write(bw >> endbit, len); break;
  185. case FIELD_BX: out.write(bx >> endbit, len); break;
  186. case FIELD_D:
  187. case FIELD_RY:
  188. case FIELD_RZ:
  189. case FIELD_GY:
  190. case FIELD_GZ:
  191. case FIELD_BY:
  192. case FIELD_BZ:
  193. default: nvUnreachable();
  194. }
  195. }
  196. }
  197. static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p)
  198. {
  199. // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
  200. int mode = in.read(2);
  201. if (mode != 0x00 && mode != 0x01)
  202. mode = (in.read(3) << 2) | mode;
  203. int pat_index = mode_to_pat[mode];
  204. nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
  205. nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
  206. p = patterns[pat_index];
  207. int d;
  208. int rw, rx;
  209. int gw, gx;
  210. int bw, bx;
  211. d = 0;
  212. rw = rx = 0;
  213. gw = gx = 0;
  214. bw = bx = 0;
  215. int ptr = int(strlen(p.encoding));
  216. while (ptr)
  217. {
  218. Field field;
  219. int endbit, len;
  220. // !!!UNDONE: get rid of string parsing!!!
  221. Utils::parse(p.encoding, ptr, field, endbit, len);
  222. switch(field)
  223. {
  224. case FIELD_M: break; // already processed so ignore
  225. case FIELD_RW: rw |= in.read(len) << endbit; break;
  226. case FIELD_RX: rx |= in.read(len) << endbit; break;
  227. case FIELD_GW: gw |= in.read(len) << endbit; break;
  228. case FIELD_GX: gx |= in.read(len) << endbit; break;
  229. case FIELD_BW: bw |= in.read(len) << endbit; break;
  230. case FIELD_BX: bx |= in.read(len) << endbit; break;
  231. case FIELD_D:
  232. case FIELD_RY:
  233. case FIELD_RZ:
  234. case FIELD_GY:
  235. case FIELD_GZ:
  236. case FIELD_BY:
  237. case FIELD_BZ:
  238. default: nvUnreachable();
  239. }
  240. }
  241. nvDebugCheck (in.getptr() == 128 - 63);
  242. endpts[0].A[0] = rw; endpts[0].B[0] = rx;
  243. endpts[0].A[1] = gw; endpts[0].B[1] = gx;
  244. endpts[0].A[2] = bw; endpts[0].B[2] = bx;
  245. }
  246. // compress index 0
  247. static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
  248. {
  249. for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
  250. {
  251. int x = POS_TO_X(pos);
  252. int y = POS_TO_Y(pos);
  253. out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0));
  254. }
  255. }
  256. static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
  257. {
  258. Bits out(block, ZOH::BITSIZE);
  259. write_header(endpts, p, out);
  260. write_indices(indices, shapeindex, out);
  261. nvDebugCheck(out.getptr() == ZOH::BITSIZE);
  262. }
  263. static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
  264. {
  265. // scale endpoints
  266. int a, b; // really need a IntVector3...
  267. a = Utils::unquantize(endpts.A[0], prec);
  268. b = Utils::unquantize(endpts.B[0], prec);
  269. // interpolate
  270. for (int i = 0; i < NINDICES; ++i)
  271. palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
  272. a = Utils::unquantize(endpts.A[1], prec);
  273. b = Utils::unquantize(endpts.B[1], prec);
  274. // interpolate
  275. for (int i = 0; i < NINDICES; ++i)
  276. palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
  277. a = Utils::unquantize(endpts.A[2], prec);
  278. b = Utils::unquantize(endpts.B[2], prec);
  279. // interpolate
  280. for (int i = 0; i < NINDICES; ++i)
  281. palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
  282. }
  283. // position 0 was compressed
  284. static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
  285. {
  286. for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
  287. {
  288. int x = POS_TO_X(pos);
  289. int y = POS_TO_Y(pos);
  290. indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0));
  291. }
  292. }
  293. void ZOH::decompressone(const char *block, Tile &t)
  294. {
  295. Bits in(block, ZOH::BITSIZE);
  296. Pattern p;
  297. IntEndpts endpts[NREGIONS_ONE];
  298. ComprEndpts compr_endpts[NREGIONS_ONE];
  299. read_header(in, compr_endpts, p);
  300. int shapeindex = 0; // only one shape
  301. decompress_endpts(compr_endpts, endpts, p);
  302. Vector3 palette[NREGIONS_ONE][NINDICES];
  303. for (int r = 0; r < NREGIONS_ONE; ++r)
  304. generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
  305. // read indices
  306. int indices[Tile::TILE_H][Tile::TILE_W];
  307. read_indices(in, shapeindex, indices);
  308. nvDebugCheck(in.getptr() == ZOH::BITSIZE);
  309. // lookup
  310. for (int y = 0; y < Tile::TILE_H; y++)
  311. for (int x = 0; x < Tile::TILE_W; x++)
  312. t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
  313. }
  314. // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
  315. static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
  316. {
  317. Vector3 palette[NINDICES];
  318. float toterr = 0;
  319. Vector3 err;
  320. generate_palette_quantized(endpts, prec, palette);
  321. for (int i = 0; i < np; ++i)
  322. {
  323. float err, besterr;
  324. besterr = Utils::norm(colors[i], palette[0]) * importance[i];
  325. for (int j = 1; j < NINDICES && besterr > 0; ++j)
  326. {
  327. err = Utils::norm(colors[i], palette[j]) * importance[i];
  328. if (err > besterr) // error increased, so we're done searching
  329. break;
  330. if (err < besterr)
  331. besterr = err;
  332. }
  333. toterr += besterr;
  334. }
  335. return toterr;
  336. }
  337. // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
  338. static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec,
  339. int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE])
  340. {
  341. // build list of possibles
  342. Vector3 palette[NREGIONS_ONE][NINDICES];
  343. for (int region = 0; region < NREGIONS_ONE; ++region)
  344. {
  345. generate_palette_quantized(endpts[region], prec, &palette[region][0]);
  346. toterr[region] = 0;
  347. }
  348. Vector3 err;
  349. for (int y = 0; y < tile.size_y; y++)
  350. for (int x = 0; x < tile.size_x; x++)
  351. {
  352. int region = REGION(x,y,shapeindex);
  353. float err, besterr;
  354. besterr = Utils::norm(tile.data[y][x], palette[region][0]);
  355. indices[y][x] = 0;
  356. for (int i = 1; i < NINDICES && besterr > 0; ++i)
  357. {
  358. err = Utils::norm(tile.data[y][x], palette[region][i]);
  359. if (err > besterr) // error increased, so we're done searching
  360. break;
  361. if (err < besterr)
  362. {
  363. besterr = err;
  364. indices[y][x] = i;
  365. }
  366. }
  367. toterr[region] += besterr;
  368. }
  369. }
  370. static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
  371. float old_err, int do_b)
  372. {
  373. // we have the old endpoints: old_endpts
  374. // we have the perturbed endpoints: new_endpts
  375. // we have the temporary endpoints: temp_endpts
  376. IntEndpts temp_endpts;
  377. float min_err = old_err; // start with the best current error
  378. int beststep;
  379. // copy real endpoints so we can perturb them
  380. for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
  381. // do a logarithmic search for the best error for this endpoint (which)
  382. for (int step = 1 << (prec-1); step; step >>= 1)
  383. {
  384. bool improved = false;
  385. for (int sign = -1; sign <= 1; sign += 2)
  386. {
  387. if (do_b == 0)
  388. {
  389. temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
  390. if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
  391. continue;
  392. }
  393. else
  394. {
  395. temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
  396. if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
  397. continue;
  398. }
  399. float err = map_colors(colors, importance, np, temp_endpts, prec);
  400. if (err < min_err)
  401. {
  402. improved = true;
  403. min_err = err;
  404. beststep = sign * step;
  405. }
  406. }
  407. // if this was an improvement, move the endpoint and continue search from there
  408. if (improved)
  409. {
  410. if (do_b == 0)
  411. new_endpts.A[ch] += beststep;
  412. else
  413. new_endpts.B[ch] += beststep;
  414. }
  415. }
  416. return min_err;
  417. }
  418. static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
  419. {
  420. float opt_err = orig_err;
  421. for (int ch = 0; ch < NCHANNELS; ++ch)
  422. {
  423. opt_endpts.A[ch] = orig_endpts.A[ch];
  424. opt_endpts.B[ch] = orig_endpts.B[ch];
  425. }
  426. /*
  427. err0 = perturb(rgb0, delta0)
  428. err1 = perturb(rgb1, delta1)
  429. if (err0 < err1)
  430. if (err0 >= initial_error) break
  431. rgb0 += delta0
  432. next = 1
  433. else
  434. if (err1 >= initial_error) break
  435. rgb1 += delta1
  436. next = 0
  437. initial_err = map()
  438. for (;;)
  439. err = perturb(next ? rgb1:rgb0, delta)
  440. if (err >= initial_err) break
  441. next? rgb1 : rgb0 += delta
  442. initial_err = err
  443. */
  444. IntEndpts new_a, new_b;
  445. IntEndpts new_endpt;
  446. int do_b;
  447. // now optimize each channel separately
  448. for (int ch = 0; ch < NCHANNELS; ++ch)
  449. {
  450. // figure out which endpoint when perturbed gives the most improvement and start there
  451. // if we just alternate, we can easily end up in a local minima
  452. float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A
  453. float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B
  454. if (err0 < err1)
  455. {
  456. if (err0 >= opt_err)
  457. continue;
  458. opt_endpts.A[ch] = new_a.A[ch];
  459. opt_err = err0;
  460. do_b = 1; // do B next
  461. }
  462. else
  463. {
  464. if (err1 >= opt_err)
  465. continue;
  466. opt_endpts.B[ch] = new_b.B[ch];
  467. opt_err = err1;
  468. do_b = 0; // do A next
  469. }
  470. // now alternate endpoints and keep trying until there is no improvement
  471. for (;;)
  472. {
  473. float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
  474. if (err >= opt_err)
  475. break;
  476. if (do_b == 0)
  477. opt_endpts.A[ch] = new_endpt.A[ch];
  478. else
  479. opt_endpts.B[ch] = new_endpt.B[ch];
  480. opt_err = err;
  481. do_b = 1 - do_b; // now move the other endpoint
  482. }
  483. }
  484. }
  485. static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE],
  486. const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE])
  487. {
  488. Vector3 pixels[Tile::TILE_TOTAL];
  489. float importance[Tile::TILE_TOTAL];
  490. float err = 0;
  491. for (int region=0; region<NREGIONS_ONE; ++region)
  492. {
  493. // collect the pixels in the region
  494. int np = 0;
  495. for (int y = 0; y < tile.size_y; y++) {
  496. for (int x = 0; x < tile.size_x; x++) {
  497. if (REGION(x, y, shapeindex) == region) {
  498. pixels[np] = tile.data[y][x];
  499. importance[np] = tile.importance_map[y][x];
  500. ++np;
  501. }
  502. }
  503. }
  504. optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
  505. }
  506. }
  507. /* optimization algorithm
  508. for each pattern
  509. convert endpoints using pattern precision
  510. assign indices and get initial error
  511. compress indices (and possibly reorder endpoints)
  512. transform endpoints
  513. if transformed endpoints fit pattern
  514. get original endpoints back
  515. optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
  516. compress new indices
  517. transform new endpoints
  518. if new endpoints fit pattern AND if error is improved
  519. emit compressed block with new data
  520. else
  521. emit compressed block with original data // to try to preserve maximum endpoint precision
  522. */
  523. float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block)
  524. {
  525. float orig_err[NREGIONS_ONE], opt_err[NREGIONS_ONE], orig_toterr, opt_toterr;
  526. IntEndpts orig_endpts[NREGIONS_ONE], opt_endpts[NREGIONS_ONE];
  527. ComprEndpts compr_orig[NREGIONS_ONE], compr_opt[NREGIONS_ONE];
  528. int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
  529. for (int sp = 0; sp < NPATTERNS; ++sp)
  530. {
  531. // precisions for all channels need to be the same
  532. for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
  533. quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
  534. assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
  535. swap_indices(orig_endpts, orig_indices, shapeindex_best);
  536. compress_endpts(orig_endpts, compr_orig, patterns[sp]);
  537. if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
  538. {
  539. optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
  540. assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
  541. swap_indices(opt_endpts, opt_indices, shapeindex_best);
  542. compress_endpts(opt_endpts, compr_opt, patterns[sp]);
  543. orig_toterr = opt_toterr = 0;
  544. for (int i=0; i < NREGIONS_ONE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
  545. if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
  546. {
  547. emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
  548. return opt_toterr;
  549. }
  550. else
  551. {
  552. // either it stopped fitting when we optimized it, or there was no improvement
  553. // so go back to the unoptimized endpoints which we know will fit
  554. emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
  555. return orig_toterr;
  556. }
  557. }
  558. }
  559. nvAssert (false); // "No candidate found, should never happen (refineone.)";
  560. return FLT_MAX;
  561. }
  562. static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
  563. {
  564. for (int region = 0; region < NREGIONS_ONE; ++region)
  565. for (int i = 0; i < NINDICES; ++i)
  566. palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
  567. }
  568. // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
  569. static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_ONE])
  570. {
  571. // build list of possibles
  572. Vector3 palette[NREGIONS_ONE][NINDICES];
  573. generate_palette_unquantized(endpts, palette);
  574. float toterr = 0;
  575. Vector3 err;
  576. for (int y = 0; y < tile.size_y; y++)
  577. for (int x = 0; x < tile.size_x; x++)
  578. {
  579. int region = REGION(x,y,shapeindex);
  580. float err, besterr;
  581. besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
  582. for (int i = 1; i < NINDICES && besterr > 0; ++i)
  583. {
  584. err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
  585. if (err > besterr) // error increased, so we're done searching
  586. break;
  587. if (err < besterr)
  588. besterr = err;
  589. }
  590. toterr += besterr;
  591. }
  592. return toterr;
  593. }
  594. float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE])
  595. {
  596. for (int region=0; region<NREGIONS_ONE; ++region)
  597. {
  598. int np = 0;
  599. Vector3 colors[Tile::TILE_TOTAL];
  600. Vector3 mean(0,0,0);
  601. for (int y = 0; y < tile.size_y; y++) {
  602. for (int x = 0; x < tile.size_x; x++) {
  603. if (REGION(x,y,shapeindex) == region)
  604. {
  605. colors[np] = tile.data[y][x];
  606. mean += tile.data[y][x];
  607. ++np;
  608. }
  609. }
  610. }
  611. // handle simple cases
  612. if (np == 0)
  613. {
  614. Vector3 zero(0,0,0);
  615. endpts[region].A = zero;
  616. endpts[region].B = zero;
  617. continue;
  618. }
  619. else if (np == 1)
  620. {
  621. endpts[region].A = colors[0];
  622. endpts[region].B = colors[0];
  623. continue;
  624. }
  625. else if (np == 2)
  626. {
  627. endpts[region].A = colors[0];
  628. endpts[region].B = colors[1];
  629. continue;
  630. }
  631. mean /= float(np);
  632. Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
  633. // project each pixel value along the principal direction
  634. float minp = FLT_MAX, maxp = -FLT_MAX;
  635. for (int i = 0; i < np; i++)
  636. {
  637. float dp = dot(colors[i]-mean, direction);
  638. if (dp < minp) minp = dp;
  639. if (dp > maxp) maxp = dp;
  640. }
  641. // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
  642. endpts[region].A = mean + minp*direction;
  643. endpts[region].B = mean + maxp*direction;
  644. // clamp endpoints
  645. // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
  646. // shape based on endpoints being clamped
  647. Utils::clamp(endpts[region].A);
  648. Utils::clamp(endpts[region].B);
  649. }
  650. return map_colors(tile, shapeindex, endpts);
  651. }
  652. float ZOH::compressone(const Tile &t, char *block)
  653. {
  654. int shapeindex_best = 0;
  655. FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE];
  656. float msebest = FLT_MAX;
  657. /*
  658. collect the mse values that are within 5% of the best values
  659. optimize each one and choose the best
  660. */
  661. // hack for now -- just use the best value WORK
  662. for (int i=0; i<NSHAPES && msebest>0.0; ++i)
  663. {
  664. float mse = roughone(t, i, tempendpts);
  665. if (mse < msebest)
  666. {
  667. msebest = mse;
  668. shapeindex_best = i;
  669. memcpy(endptsbest, tempendpts, sizeof(endptsbest));
  670. }
  671. }
  672. return refineone(t, shapeindex_best, endptsbest, block);
  673. }