| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883 |
- /*
- Copyright 2007 nVidia, Inc.
- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
- You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and limitations under the License.
- */
- // two regions zoh compress/decompress code
- // Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
- /* optimization algorithm
- get initial float endpoints
- convert endpoints using 16 bit precision, transform, and get bit delta. choose likely endpoint compression candidates.
- note that there will be 1 or 2 candidates; 2 will be chosen when the delta values are close to the max possible.
- for each EC candidate in order from max precision to smaller precision
- convert endpoints using the appropriate precision.
- optimize the endpoints and minimize square error. save the error and index assignments. apply index compression as well.
- (thus the endpoints and indices are in final form.)
- transform and get bit delta.
- if the bit delta fits, exit
- if we ended up with no candidates somehow, choose the tail set of EC candidates and retry. this should happen hardly ever.
- add a state variable to nvDebugCheck we only do this once.
- convert to bit stream.
- return the error.
- Global optimization
- order all tiles based on their errors
- do something special for high-error tiles
- the goal here is to try to avoid tiling artifacts. but I think this is a research problem. let's just generate an error image...
- display an image that shows partitioning and precision selected for each tile
- */
- #include "bits.h"
- #include "tile.h"
- #include "zoh.h"
- #include "zoh_utils.h"
- #include "nvmath/Fitting.h"
- #include "nvmath/Vector.inl"
- #include <string.h> // strlen
- #include <float.h> // FLT_MAX
- using namespace nv;
- using namespace ZOH;
- #define NINDICES 8
- #define INDEXBITS 3
- #define HIGH_INDEXBIT (1<<(INDEXBITS-1))
- #define DENOM (NINDICES-1)
- // WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
- // i.e. can we search shapes in a particular order so we can see the global error minima easily and
- // stop without having to touch all shapes?
- #include "shapes_two.h"
- // use only the first 32 available shapes
- #undef NSHAPES
- #undef SHAPEBITS
- #define NSHAPES 32
- #define SHAPEBITS 5
- #define POS_TO_X(pos) ((pos)&3)
- #define POS_TO_Y(pos) (((pos)>>2)&3)
- #define NDELTA 4
- struct Chanpat
- {
- int prec[NDELTA]; // precision pattern for one channel
- };
- struct Pattern
- {
- Chanpat chan[NCHANNELS]; // allow different bit patterns per channel -- but we still want constant precision per channel
- int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
- int mode; // associated mode value
- int modebits; // number of mode bits
- const char *encoding; // verilog description of encoding for this mode
- };
- #define MAXMODEBITS 5
- #define MAXMODES (1<<MAXMODEBITS)
- #define NPATTERNS 10
- static const Pattern patterns[NPATTERNS] =
- {
- 11,5,5,5, 11,4,4,4, 11,4,4,4, 1, 0x02, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],rw[10],rx[4:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
- 11,4,4,4, 11,5,5,5, 11,4,4,4, 1, 0x06, 5, "d[4:0],bz[3],gy[4],rz[3:0],bz[2],bz[0],ry[3:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],gw[10],gx[4:0],gy[3:0],gz[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
- 11,4,4,4, 11,4,4,4, 11,5,5,5, 1, 0x0a, 5, "d[4:0],bz[3],bz[4],rz[3:0],bz[2:1],ry[3:0],by[3:0],bw[10],bx[4:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],by[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
- 10,5,5,5, 10,5,5,5, 10,5,5,5, 1, 0x00, 2, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bw[9:0],gw[9:0],rw[9:0],bz[4],by[4],gy[4],m[1:0]",
- 9,5,5,5, 9,5,5,5, 9,5,5,5, 1, 0x0e, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bw[8:0],gy[4],gw[8:0],by[4],rw[8:0],m[4:0]",
- 8,6,6,6, 8,5,5,5, 8,5,5,5, 1, 0x12, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],rx[5:0],bz[4:3],bw[7:0],gy[4],bz[2],gw[7:0],by[4],gz[4],rw[7:0],m[4:0]",
- 8,5,5,5, 8,6,6,6, 8,5,5,5, 1, 0x16, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],gx[5:0],gy[3:0],gz[4],rx[4:0],bz[4],gz[5],bw[7:0],gy[4],gy[5],gw[7:0],by[4],bz[0],rw[7:0],m[4:0]",
- 8,5,5,5, 8,5,5,5, 8,6,6,6, 1, 0x1a, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bx[5:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bz[5],bw[7:0],gy[4],by[5],gw[7:0],by[4],bz[1],rw[7:0],m[4:0]",
- 7,6,6,6, 7,6,6,6, 7,6,6,6, 1, 0x01, 2, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],bw[6:0],gy[4],bz[2],by[5],gw[6:0],by[4],bz[1:0],rw[6:0],gz[5:4],gy[5],m[1:0]",
- 6,6,6,6, 6,6,6,6, 6,6,6,6, 0, 0x1e, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],gz[5],bw[5:0],gy[4],bz[2],by[5],gy[5],gw[5:0],by[4],bz[1:0],gz[4],rw[5:0],m[4:0]",
- };
- // mapping of mode to the corresponding index in pattern
- // UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f -- return -2 for these
- static const int mode_to_pat[MAXMODES] = {
- 3, // 0x00
- 8, // 0x01
- 0, // 0x02
- -1,-1,-1,
- 1, // 0x06
- -1,-1,-1,
- 2, // 0x0a
- -1,-1,-1,
- 4, // 0x0e
- -1,-1,-1,
- 5, // 0x12
- -2,-1,-1,
- 6, // 0x16
- -2,-1,-1,
- 7, // 0x1a
- -2,-1,-1,
- 9, // 0x1e
- -2
- };
- #define R_0(ep) (ep)[0].A[i]
- #define R_1(ep) (ep)[0].B[i]
- #define R_2(ep) (ep)[1].A[i]
- #define R_3(ep) (ep)[1].B[i]
- #define MASK(n) ((1<<(n))-1)
- // compress endpoints
- static void compress_endpts(const IntEndpts in[NREGIONS_TWO], ComprEndpts out[NREGIONS_TWO], const Pattern &p)
- {
- if (p.transformed)
- {
- for (int i=0; i<NCHANNELS; ++i)
- {
- R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
- R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
- R_2(out) = (R_2(in) - R_0(in)) & MASK(p.chan[i].prec[2]);
- R_3(out) = (R_3(in) - R_0(in)) & MASK(p.chan[i].prec[3]);
- }
- }
- else
- {
- for (int i=0; i<NCHANNELS; ++i)
- {
- R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
- R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
- R_2(out) = R_2(in) & MASK(p.chan[i].prec[2]);
- R_3(out) = R_3(in) & MASK(p.chan[i].prec[3]);
- }
- }
- }
- // decompress endpoints
- static void decompress_endpts(const ComprEndpts in[NREGIONS_TWO], IntEndpts out[NREGIONS_TWO], const Pattern &p)
- {
- bool issigned = Utils::FORMAT == SIGNED_F16;
- if (p.transformed)
- {
- for (int i=0; i<NCHANNELS; ++i)
- {
- R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
- int t;
- t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
- t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
- R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
- t = SIGN_EXTEND(R_2(in), p.chan[i].prec[2]);
- t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
- R_2(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
- t = SIGN_EXTEND(R_3(in), p.chan[i].prec[3]);
- t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
- R_3(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
- }
- }
- else
- {
- for (int i=0; i<NCHANNELS; ++i)
- {
- R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
- R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
- R_2(out) = issigned ? SIGN_EXTEND(R_2(in),p.chan[i].prec[2]) : R_2(in);
- R_3(out) = issigned ? SIGN_EXTEND(R_3(in),p.chan[i].prec[3]) : R_3(in);
- }
- }
- }
- static void quantize_endpts(const FltEndpts endpts[NREGIONS_TWO], int prec, IntEndpts q_endpts[NREGIONS_TWO])
- {
- for (int region = 0; region < NREGIONS_TWO; ++region)
- {
- q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
- q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
- q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
- q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
- q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
- q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
- }
- }
- // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
- static void swap_indices(IntEndpts endpts[NREGIONS_TWO], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
- {
- for (int region = 0; region < NREGIONS_TWO; ++region)
- {
- int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
- int x = POS_TO_X(position);
- int y = POS_TO_Y(position);
- nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table
- if (indices[y][x] & HIGH_INDEXBIT)
- {
- // high bit is set, swap the endpts and indices for this region
- int t;
- for (int i=0; i<NCHANNELS; ++i)
- {
- t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
- }
- for (int y = 0; y < Tile::TILE_H; y++)
- for (int x = 0; x < Tile::TILE_W; x++)
- if (REGION(x,y,shapeindex) == region)
- indices[y][x] = NINDICES - 1 - indices[y][x];
- }
- }
- }
- // endpoints fit only if the compression was lossless
- static bool endpts_fit(const IntEndpts orig[NREGIONS_TWO], const ComprEndpts compressed[NREGIONS_TWO], const Pattern &p)
- {
- IntEndpts uncompressed[NREGIONS_TWO];
- decompress_endpts(compressed, uncompressed, p);
- for (int j=0; j<NREGIONS_TWO; ++j)
- {
- for (int i=0; i<NCHANNELS; ++i)
- {
- if (orig[j].A[i] != uncompressed[j].A[i]) return false;
- if (orig[j].B[i] != uncompressed[j].B[i]) return false;
- }
- }
- return true;
- }
- static void write_header(const ComprEndpts endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, Bits &out)
- {
- // interpret the verilog backwards and process it
- int m = p.mode;
- int d = shapeindex;
- int rw = endpts[0].A[0], rx = endpts[0].B[0], ry = endpts[1].A[0], rz = endpts[1].B[0];
- int gw = endpts[0].A[1], gx = endpts[0].B[1], gy = endpts[1].A[1], gz = endpts[1].B[1];
- int bw = endpts[0].A[2], bx = endpts[0].B[2], by = endpts[1].A[2], bz = endpts[1].B[2];
- int ptr = int(strlen(p.encoding));
- while (ptr)
- {
- Field field;
- int endbit, len;
- // !!!UNDONE: get rid of string parsing!!!
- Utils::parse(p.encoding, ptr, field, endbit, len);
- switch(field)
- {
- case FIELD_M: out.write( m >> endbit, len); break;
- case FIELD_D: out.write( d >> endbit, len); break;
- case FIELD_RW: out.write(rw >> endbit, len); break;
- case FIELD_RX: out.write(rx >> endbit, len); break;
- case FIELD_RY: out.write(ry >> endbit, len); break;
- case FIELD_RZ: out.write(rz >> endbit, len); break;
- case FIELD_GW: out.write(gw >> endbit, len); break;
- case FIELD_GX: out.write(gx >> endbit, len); break;
- case FIELD_GY: out.write(gy >> endbit, len); break;
- case FIELD_GZ: out.write(gz >> endbit, len); break;
- case FIELD_BW: out.write(bw >> endbit, len); break;
- case FIELD_BX: out.write(bx >> endbit, len); break;
- case FIELD_BY: out.write(by >> endbit, len); break;
- case FIELD_BZ: out.write(bz >> endbit, len); break;
- default: nvUnreachable();
- }
- }
- }
- static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p)
- {
- // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
- int mode = in.read(2);
- if (mode != 0x00 && mode != 0x01)
- mode = (in.read(3) << 2) | mode;
- int pat_index = mode_to_pat[mode];
- if (pat_index == -2)
- return false; // reserved mode found
- nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
- nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
- p = patterns[pat_index];
- int d;
- int rw, rx, ry, rz;
- int gw, gx, gy, gz;
- int bw, bx, by, bz;
- d = 0;
- rw = rx = ry = rz = 0;
- gw = gx = gy = gz = 0;
- bw = bx = by = bz = 0;
- int ptr = int(strlen(p.encoding));
- while (ptr)
- {
- Field field;
- int endbit, len;
- // !!!UNDONE: get rid of string parsing!!!
- Utils::parse(p.encoding, ptr, field, endbit, len);
- switch(field)
- {
- case FIELD_M: break; // already processed so ignore
- case FIELD_D: d |= in.read(len) << endbit; break;
- case FIELD_RW: rw |= in.read(len) << endbit; break;
- case FIELD_RX: rx |= in.read(len) << endbit; break;
- case FIELD_RY: ry |= in.read(len) << endbit; break;
- case FIELD_RZ: rz |= in.read(len) << endbit; break;
- case FIELD_GW: gw |= in.read(len) << endbit; break;
- case FIELD_GX: gx |= in.read(len) << endbit; break;
- case FIELD_GY: gy |= in.read(len) << endbit; break;
- case FIELD_GZ: gz |= in.read(len) << endbit; break;
- case FIELD_BW: bw |= in.read(len) << endbit; break;
- case FIELD_BX: bx |= in.read(len) << endbit; break;
- case FIELD_BY: by |= in.read(len) << endbit; break;
- case FIELD_BZ: bz |= in.read(len) << endbit; break;
- default: nvUnreachable();
- }
- }
- nvDebugCheck (in.getptr() == 128 - 46);
- shapeindex = d;
- endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz;
- endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz;
- endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz;
- return true;
- }
- static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
- {
- int positions[NREGIONS_TWO];
- for (int r = 0; r < NREGIONS_TWO; ++r)
- positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
- for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
- {
- int x = POS_TO_X(pos);
- int y = POS_TO_Y(pos);
- bool match = false;
- for (int r = 0; r < NREGIONS_TWO; ++r)
- if (positions[r] == pos) { match = true; break; }
- out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
- }
- }
- static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
- {
- Bits out(block, ZOH::BITSIZE);
- write_header(compr_endpts, shapeindex, p, out);
- write_indices(indices, shapeindex, out);
- nvDebugCheck(out.getptr() == ZOH::BITSIZE);
- }
- static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
- {
- // scale endpoints
- int a, b; // really need a IntVector3...
- a = Utils::unquantize(endpts.A[0], prec);
- b = Utils::unquantize(endpts.B[0], prec);
- // interpolate
- for (int i = 0; i < NINDICES; ++i)
- palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
- a = Utils::unquantize(endpts.A[1], prec);
- b = Utils::unquantize(endpts.B[1], prec);
- // interpolate
- for (int i = 0; i < NINDICES; ++i)
- palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
- a = Utils::unquantize(endpts.A[2], prec);
- b = Utils::unquantize(endpts.B[2], prec);
- // interpolate
- for (int i = 0; i < NINDICES; ++i)
- palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
- }
- static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
- {
- int positions[NREGIONS_TWO];
- for (int r = 0; r < NREGIONS_TWO; ++r)
- positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
- for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
- {
- int x = POS_TO_X(pos);
- int y = POS_TO_Y(pos);
- bool match = false;
- for (int r = 0; r < NREGIONS_TWO; ++r)
- if (positions[r] == pos) { match = true; break; }
- indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
- }
- }
- void ZOH::decompresstwo(const char *block, Tile &t)
- {
- Bits in(block, ZOH::BITSIZE);
- Pattern p;
- IntEndpts endpts[NREGIONS_TWO];
- ComprEndpts compr_endpts[NREGIONS_TWO];
- int shapeindex;
- if (!read_header(in, compr_endpts, shapeindex, p))
- {
- // reserved mode, return all zeroes
- for (int y = 0; y < Tile::TILE_H; y++)
- for (int x = 0; x < Tile::TILE_W; x++)
- t.data[y][x] = Vector3(0.0f);
- return;
- }
- decompress_endpts(compr_endpts, endpts, p);
- Vector3 palette[NREGIONS_TWO][NINDICES];
- for (int r = 0; r < NREGIONS_TWO; ++r)
- generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
- int indices[Tile::TILE_H][Tile::TILE_W];
- read_indices(in, shapeindex, indices);
- nvDebugCheck(in.getptr() == ZOH::BITSIZE);
- // lookup
- for (int y = 0; y < Tile::TILE_H; y++)
- for (int x = 0; x < Tile::TILE_W; x++)
- t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
- }
- // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
- static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
- {
- Vector3 palette[NINDICES];
- float toterr = 0;
- Vector3 err;
- generate_palette_quantized(endpts, prec, palette);
- for (int i = 0; i < np; ++i)
- {
- float err, besterr;
- besterr = Utils::norm(colors[i], palette[0]) * importance[i];
- for (int j = 1; j < NINDICES && besterr > 0; ++j)
- {
- err = Utils::norm(colors[i], palette[j]) * importance[i];
- if (err > besterr) // error increased, so we're done searching
- break;
- if (err < besterr)
- besterr = err;
- }
- toterr += besterr;
- }
- return toterr;
- }
- // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
- static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec,
- int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_TWO])
- {
- // build list of possibles
- Vector3 palette[NREGIONS_TWO][NINDICES];
- for (int region = 0; region < NREGIONS_TWO; ++region)
- {
- generate_palette_quantized(endpts[region], prec, &palette[region][0]);
- toterr[region] = 0;
- }
- Vector3 err;
- for (int y = 0; y < tile.size_y; y++)
- for (int x = 0; x < tile.size_x; x++)
- {
- int region = REGION(x,y,shapeindex);
- float err, besterr;
- besterr = Utils::norm(tile.data[y][x], palette[region][0]);
- indices[y][x] = 0;
- for (int i = 1; i < NINDICES && besterr > 0; ++i)
- {
- err = Utils::norm(tile.data[y][x], palette[region][i]);
- if (err > besterr) // error increased, so we're done searching
- break;
- if (err < besterr)
- {
- besterr = err;
- indices[y][x] = i;
- }
- }
- toterr[region] += besterr;
- }
- }
- static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
- float old_err, int do_b)
- {
- // we have the old endpoints: old_endpts
- // we have the perturbed endpoints: new_endpts
- // we have the temporary endpoints: temp_endpts
- IntEndpts temp_endpts;
- float min_err = old_err; // start with the best current error
- int beststep;
- // copy real endpoints so we can perturb them
- for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
- // do a logarithmic search for the best error for this endpoint (which)
- for (int step = 1 << (prec-1); step; step >>= 1)
- {
- bool improved = false;
- for (int sign = -1; sign <= 1; sign += 2)
- {
- if (do_b == 0)
- {
- temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
- if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
- continue;
- }
- else
- {
- temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
- if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
- continue;
- }
- float err = map_colors(colors, importance, np, temp_endpts, prec);
- if (err < min_err)
- {
- improved = true;
- min_err = err;
- beststep = sign * step;
- }
- }
- // if this was an improvement, move the endpoint and continue search from there
- if (improved)
- {
- if (do_b == 0)
- new_endpts.A[ch] += beststep;
- else
- new_endpts.B[ch] += beststep;
- }
- }
- return min_err;
- }
- static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
- {
- float opt_err = orig_err;
- for (int ch = 0; ch < NCHANNELS; ++ch)
- {
- opt_endpts.A[ch] = orig_endpts.A[ch];
- opt_endpts.B[ch] = orig_endpts.B[ch];
- }
- /*
- err0 = perturb(rgb0, delta0)
- err1 = perturb(rgb1, delta1)
- if (err0 < err1)
- if (err0 >= initial_error) break
- rgb0 += delta0
- next = 1
- else
- if (err1 >= initial_error) break
- rgb1 += delta1
- next = 0
- initial_err = map()
- for (;;)
- err = perturb(next ? rgb1:rgb0, delta)
- if (err >= initial_err) break
- next? rgb1 : rgb0 += delta
- initial_err = err
- */
- IntEndpts new_a, new_b;
- IntEndpts new_endpt;
- int do_b;
- // now optimize each channel separately
- for (int ch = 0; ch < NCHANNELS; ++ch)
- {
- // figure out which endpoint when perturbed gives the most improvement and start there
- // if we just alternate, we can easily end up in a local minima
- float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A
- float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B
- if (err0 < err1)
- {
- if (err0 >= opt_err)
- continue;
- opt_endpts.A[ch] = new_a.A[ch];
- opt_err = err0;
- do_b = 1; // do B next
- }
- else
- {
- if (err1 >= opt_err)
- continue;
- opt_endpts.B[ch] = new_b.B[ch];
- opt_err = err1;
- do_b = 0; // do A next
- }
- // now alternate endpoints and keep trying until there is no improvement
- for (;;)
- {
- float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
- if (err >= opt_err)
- break;
- if (do_b == 0)
- opt_endpts.A[ch] = new_endpt.A[ch];
- else
- opt_endpts.B[ch] = new_endpt.B[ch];
- opt_err = err;
- do_b = 1 - do_b; // now move the other endpoint
- }
- }
- }
- static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_TWO],
- const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO])
- {
- Vector3 pixels[Tile::TILE_TOTAL];
- float importance[Tile::TILE_TOTAL];
- float err = 0;
- for (int region=0; region<NREGIONS_TWO; ++region)
- {
- // collect the pixels in the region
- int np = 0;
- for (int y = 0; y < tile.size_y; y++)
- for (int x = 0; x < tile.size_x; x++)
- if (REGION(x,y,shapeindex) == region)
- {
- pixels[np] = tile.data[y][x];
- importance[np] = tile.importance_map[y][x];
- ++np;
- }
- optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
- }
- }
- /* optimization algorithm
- for each pattern
- convert endpoints using pattern precision
- assign indices and get initial error
- compress indices (and possibly reorder endpoints)
- transform endpoints
- if transformed endpoints fit pattern
- get original endpoints back
- optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
- compress new indices
- transform new endpoints
- if new endpoints fit pattern AND if error is improved
- emit compressed block with new data
- else
- emit compressed block with original data // to try to preserve maximum endpoint precision
- */
- float ZOH::refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block)
- {
- float orig_err[NREGIONS_TWO], opt_err[NREGIONS_TWO], orig_toterr, opt_toterr;
- IntEndpts orig_endpts[NREGIONS_TWO], opt_endpts[NREGIONS_TWO];
- ComprEndpts compr_orig[NREGIONS_TWO], compr_opt[NREGIONS_TWO];
- int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
- for (int sp = 0; sp < NPATTERNS; ++sp)
- {
- // precisions for all channels need to be the same
- for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
- quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
- assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
- swap_indices(orig_endpts, orig_indices, shapeindex_best);
- compress_endpts(orig_endpts, compr_orig, patterns[sp]);
- if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
- {
- optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
- assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
- swap_indices(opt_endpts, opt_indices, shapeindex_best);
- compress_endpts(opt_endpts, compr_opt, patterns[sp]);
- orig_toterr = opt_toterr = 0;
- for (int i=0; i < NREGIONS_TWO; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
- if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
- {
- emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
- return opt_toterr;
- }
- else
- {
- // either it stopped fitting when we optimized it, or there was no improvement
- // so go back to the unoptimized endpoints which we know will fit
- emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
- return orig_toterr;
- }
- }
- }
- nvAssert(false); //throw "No candidate found, should never happen (refinetwo.)";
- return FLT_MAX;
- }
- static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])
- {
- for (int region = 0; region < NREGIONS_TWO; ++region)
- for (int i = 0; i < NINDICES; ++i)
- palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
- }
- // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
- static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_TWO])
- {
- // build list of possibles
- Vector3 palette[NREGIONS_TWO][NINDICES];
- generate_palette_unquantized(endpts, palette);
- float toterr = 0;
- Vector3 err;
- for (int y = 0; y < tile.size_y; y++)
- for (int x = 0; x < tile.size_x; x++)
- {
- int region = REGION(x,y,shapeindex);
- float err, besterr;
- besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
- for (int i = 1; i < NINDICES && besterr > 0; ++i)
- {
- err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
- if (err > besterr) // error increased, so we're done searching
- break;
- if (err < besterr)
- besterr = err;
- }
- toterr += besterr;
- }
- return toterr;
- }
- float ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO])
- {
- for (int region=0; region<NREGIONS_TWO; ++region)
- {
- int np = 0;
- Vector3 colors[Tile::TILE_TOTAL];
- Vector3 mean(0,0,0);
- for (int y = 0; y < tile.size_y; y++)
- for (int x = 0; x < tile.size_x; x++)
- if (REGION(x,y,shapeindex) == region)
- {
- colors[np] = tile.data[y][x];
- mean += tile.data[y][x];
- ++np;
- }
- // handle simple cases
- if (np == 0)
- {
- Vector3 zero(0,0,0);
- endpts[region].A = zero;
- endpts[region].B = zero;
- continue;
- }
- else if (np == 1)
- {
- endpts[region].A = colors[0];
- endpts[region].B = colors[0];
- continue;
- }
- else if (np == 2)
- {
- endpts[region].A = colors[0];
- endpts[region].B = colors[1];
- continue;
- }
- mean /= float(np);
- Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
- // project each pixel value along the principal direction
- float minp = FLT_MAX, maxp = -FLT_MAX;
- for (int i = 0; i < np; i++)
- {
- float dp = dot(colors[i]-mean, direction);
- if (dp < minp) minp = dp;
- if (dp > maxp) maxp = dp;
- }
- // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
- endpts[region].A = mean + minp*direction;
- endpts[region].B = mean + maxp*direction;
- // clamp endpoints
- // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
- // shape based on endpoints being clamped
- Utils::clamp(endpts[region].A);
- Utils::clamp(endpts[region].B);
- }
- return map_colors(tile, shapeindex, endpts);
- }
- float ZOH::compresstwo(const Tile &t, char *block)
- {
- int shapeindex_best = 0;
- FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO];
- float msebest = FLT_MAX;
- /*
- collect the mse values that are within 5% of the best values
- optimize each one and choose the best
- */
- // hack for now -- just use the best value WORK
- for (int i=0; i<NSHAPES && msebest>0.0; ++i)
- {
- float mse = roughtwo(t, i, tempendpts);
- if (mse < msebest)
- {
- msebest = mse;
- shapeindex_best = i;
- memcpy(endptsbest, tempendpts, sizeof(endptsbest));
- }
- }
- return refinetwo(t, shapeindex_best, endptsbest, block);
- }
|