indexcodec.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2. #include "meshoptimizer.h"
  3. #include <assert.h>
  4. #include <string.h>
  5. #ifndef TRACE
  6. #define TRACE 0
  7. #endif
  8. #if TRACE
  9. #include <stdio.h>
  10. #endif
  11. // This work is based on:
  12. // Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
  13. // Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
  14. namespace meshopt
  15. {
  16. const unsigned char kIndexHeader = 0xe0;
  17. typedef unsigned int VertexFifo[16];
  18. typedef unsigned int EdgeFifo[16][2];
  19. static const unsigned int kTriangleIndexOrder[3][3] = {
  20. {0, 1, 2},
  21. {1, 2, 0},
  22. {2, 0, 1},
  23. };
  24. static const unsigned char kCodeAuxEncodingTable[16] = {
  25. 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
  26. 0, 0, // last two entries aren't used for encoding
  27. };
  28. static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
  29. {
  30. (void)a;
  31. return (b == next) ? 1 : (c == next) ? 2 : 0;
  32. }
  33. static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
  34. {
  35. for (int i = 0; i < 16; ++i)
  36. {
  37. size_t index = (offset - 1 - i) & 15;
  38. unsigned int e0 = fifo[index][0];
  39. unsigned int e1 = fifo[index][1];
  40. if (e0 == a && e1 == b)
  41. return (i << 2) | 0;
  42. if (e0 == b && e1 == c)
  43. return (i << 2) | 1;
  44. if (e0 == c && e1 == a)
  45. return (i << 2) | 2;
  46. }
  47. return -1;
  48. }
  49. static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
  50. {
  51. fifo[offset][0] = a;
  52. fifo[offset][1] = b;
  53. offset = (offset + 1) & 15;
  54. }
  55. static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
  56. {
  57. for (int i = 0; i < 16; ++i)
  58. {
  59. size_t index = (offset - 1 - i) & 15;
  60. if (fifo[index] == v)
  61. return i;
  62. }
  63. return -1;
  64. }
  65. static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
  66. {
  67. fifo[offset] = v;
  68. offset = (offset + cond) & 15;
  69. }
  70. static void encodeVByte(unsigned char*& data, unsigned int v)
  71. {
  72. // encode 32-bit value in up to 5 7-bit groups
  73. do
  74. {
  75. *data++ = (v & 127) | (v > 127 ? 128 : 0);
  76. v >>= 7;
  77. } while (v);
  78. }
  79. static unsigned int decodeVByte(const unsigned char*& data)
  80. {
  81. unsigned char lead = *data++;
  82. // fast path: single byte
  83. if (lead < 128)
  84. return lead;
  85. // slow path: up to 4 extra bytes
  86. // note that this loop always terminates, which is important for malformed data
  87. unsigned int result = lead & 127;
  88. unsigned int shift = 7;
  89. for (int i = 0; i < 4; ++i)
  90. {
  91. unsigned char group = *data++;
  92. result |= (group & 127) << shift;
  93. shift += 7;
  94. if (group < 128)
  95. break;
  96. }
  97. return result;
  98. }
  99. static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int next, unsigned int last)
  100. {
  101. (void)next;
  102. unsigned int d = index - last;
  103. unsigned int v = (d << 1) ^ (int(d) >> 31);
  104. encodeVByte(data, v);
  105. }
  106. static unsigned int decodeIndex(const unsigned char*& data, unsigned int next, unsigned int last)
  107. {
  108. (void)next;
  109. unsigned int v = decodeVByte(data);
  110. unsigned int d = (v >> 1) ^ -int(v & 1);
  111. return last + d;
  112. }
  113. static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
  114. {
  115. for (int i = 0; i < 16; ++i)
  116. if (table[i] == v)
  117. return i;
  118. return -1;
  119. }
  120. static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c)
  121. {
  122. if (index_size == 2)
  123. {
  124. static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a);
  125. static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b);
  126. static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c);
  127. }
  128. else
  129. {
  130. static_cast<unsigned int*>(destination)[offset + 0] = a;
  131. static_cast<unsigned int*>(destination)[offset + 1] = b;
  132. static_cast<unsigned int*>(destination)[offset + 2] = c;
  133. }
  134. }
  135. #if TRACE
  136. static size_t sortTop16(unsigned char dest[16], size_t stats[256])
  137. {
  138. size_t destsize = 0;
  139. for (size_t i = 0; i < 256; ++i)
  140. {
  141. size_t j = 0;
  142. for (; j < destsize; ++j)
  143. {
  144. if (stats[i] >= stats[dest[j]])
  145. {
  146. if (destsize < 16)
  147. destsize++;
  148. memmove(&dest[j + 1], &dest[j], destsize - 1 - j);
  149. dest[j] = (unsigned char)i;
  150. break;
  151. }
  152. }
  153. if (j == destsize && destsize < 16)
  154. {
  155. dest[destsize] = (unsigned char)i;
  156. destsize++;
  157. }
  158. }
  159. return destsize;
  160. }
  161. #endif
  162. } // namespace meshopt
  163. size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
  164. {
  165. using namespace meshopt;
  166. assert(index_count % 3 == 0);
  167. #if TRACE
  168. size_t codestats[256] = {};
  169. size_t codeauxstats[256] = {};
  170. #endif
  171. // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
  172. if (buffer_size < 1 + index_count / 3 + 16)
  173. return 0;
  174. buffer[0] = kIndexHeader;
  175. EdgeFifo edgefifo;
  176. memset(edgefifo, -1, sizeof(edgefifo));
  177. VertexFifo vertexfifo;
  178. memset(vertexfifo, -1, sizeof(vertexfifo));
  179. size_t edgefifooffset = 0;
  180. size_t vertexfifooffset = 0;
  181. unsigned int next = 0;
  182. unsigned int last = 0;
  183. unsigned char* code = buffer + 1;
  184. unsigned char* data = code + index_count / 3;
  185. unsigned char* data_safe_end = buffer + buffer_size - 16;
  186. // use static encoding table; it's possible to pack the result and then build an optimal table and repack
  187. // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
  188. const unsigned char* codeaux_table = kCodeAuxEncodingTable;
  189. for (size_t i = 0; i < index_count; i += 3)
  190. {
  191. // make sure we have enough space to write a triangle
  192. // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
  193. // after this we can be sure we can write without extra bounds checks
  194. if (data > data_safe_end)
  195. return 0;
  196. int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
  197. if (fer >= 0 && (fer >> 2) < 15)
  198. {
  199. const unsigned int* order = kTriangleIndexOrder[fer & 3];
  200. unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
  201. // encode edge index and vertex fifo index, next or free index
  202. int fe = fer >> 2;
  203. int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
  204. int fec = (fc >= 1 && fc < 15) ? fc : (c == next) ? (next++, 0) : 15;
  205. *code++ = (unsigned char)((fe << 4) | fec);
  206. #if TRACE
  207. codestats[code[-1]]++;
  208. #endif
  209. // note that we need to update the last index since free indices are delta-encoded
  210. if (fec == 15)
  211. encodeIndex(data, c, next, last), last = c;
  212. // we only need to push third vertex since first two are likely already in the vertex fifo
  213. if (fec == 0 || fec == 15)
  214. pushVertexFifo(vertexfifo, c, vertexfifooffset);
  215. // we only need to push two new edges to edge fifo since the third one is already there
  216. pushEdgeFifo(edgefifo, c, b, edgefifooffset);
  217. pushEdgeFifo(edgefifo, a, c, edgefifooffset);
  218. }
  219. else
  220. {
  221. int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
  222. const unsigned int* order = kTriangleIndexOrder[rotation];
  223. unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
  224. int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
  225. int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
  226. // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
  227. int fea = (a == next) ? (next++, 0) : 15;
  228. int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15;
  229. int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15;
  230. // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
  231. unsigned char codeaux = (unsigned char)((feb << 4) | fec);
  232. int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
  233. // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
  234. if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14)
  235. {
  236. *code++ = (unsigned char)((15 << 4) | codeauxindex);
  237. }
  238. else
  239. {
  240. *code++ = (unsigned char)((15 << 4) | 14 | fea);
  241. *data++ = codeaux;
  242. }
  243. #if TRACE
  244. codestats[code[-1]]++;
  245. codeauxstats[codeaux]++;
  246. #endif
  247. // note that we need to update the last index since free indices are delta-encoded
  248. if (fea == 15)
  249. encodeIndex(data, a, next, last), last = a;
  250. if (feb == 15)
  251. encodeIndex(data, b, next, last), last = b;
  252. if (fec == 15)
  253. encodeIndex(data, c, next, last), last = c;
  254. // only push vertices that weren't already in fifo
  255. if (fea == 0 || fea == 15)
  256. pushVertexFifo(vertexfifo, a, vertexfifooffset);
  257. if (feb == 0 || feb == 15)
  258. pushVertexFifo(vertexfifo, b, vertexfifooffset);
  259. if (fec == 0 || fec == 15)
  260. pushVertexFifo(vertexfifo, c, vertexfifooffset);
  261. // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
  262. pushEdgeFifo(edgefifo, b, a, edgefifooffset);
  263. pushEdgeFifo(edgefifo, c, b, edgefifooffset);
  264. pushEdgeFifo(edgefifo, a, c, edgefifooffset);
  265. }
  266. }
  267. // make sure we have enough space to write codeaux table
  268. if (data > data_safe_end)
  269. return 0;
  270. // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
  271. // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
  272. // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
  273. for (size_t i = 0; i < 16; ++i)
  274. {
  275. // decoder assumes that table entries never refer to separately encoded indices
  276. assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf);
  277. *data++ = codeaux_table[i];
  278. }
  279. assert(data >= buffer + index_count / 3 + 16);
  280. assert(data <= buffer + buffer_size);
  281. #if TRACE
  282. unsigned char codetop[16], codeauxtop[16];
  283. size_t codetopsize = sortTop16(codetop, codestats);
  284. size_t codeauxtopsize = sortTop16(codeauxtop, codeauxstats);
  285. size_t sumcode = 0, sumcodeaux = 0;
  286. for (size_t i = 0; i < 256; ++i)
  287. sumcode += codestats[i], sumcodeaux += codeauxstats[i];
  288. size_t acccode = 0, acccodeaux = 0;
  289. printf("code\t\t\t\t\tcodeaux\n");
  290. for (size_t i = 0; i < codetopsize && i < codeauxtopsize; ++i)
  291. {
  292. acccode += codestats[codetop[i]];
  293. acccodeaux += codeauxstats[codeauxtop[i]];
  294. printf("%2d: %02x = %d (%.1f%% ..%.1f%%)\t\t%2d: %02x = %d (%.1f%% ..%.1f%%)\n",
  295. int(i), codetop[i], int(codestats[codetop[i]]), double(codestats[codetop[i]]) / double(sumcode) * 100, double(acccode) / double(sumcode) * 100,
  296. int(i), codeauxtop[i], int(codeauxstats[codeauxtop[i]]), double(codeauxstats[codeauxtop[i]]) / double(sumcodeaux) * 100, double(acccodeaux) / double(sumcodeaux) * 100);
  297. }
  298. #endif
  299. return data - buffer;
  300. }
  301. size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
  302. {
  303. assert(index_count % 3 == 0);
  304. // compute number of bits required for each index
  305. unsigned int vertex_bits = 1;
  306. while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
  307. vertex_bits++;
  308. // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
  309. unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
  310. return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16;
  311. }
  312. int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
  313. {
  314. using namespace meshopt;
  315. assert(index_count % 3 == 0);
  316. assert(index_size == 2 || index_size == 4);
  317. // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
  318. if (buffer_size < 1 + index_count / 3 + 16)
  319. return -2;
  320. if (buffer[0] != kIndexHeader)
  321. return -1;
  322. EdgeFifo edgefifo;
  323. memset(edgefifo, -1, sizeof(edgefifo));
  324. VertexFifo vertexfifo;
  325. memset(vertexfifo, -1, sizeof(vertexfifo));
  326. size_t edgefifooffset = 0;
  327. size_t vertexfifooffset = 0;
  328. unsigned int next = 0;
  329. unsigned int last = 0;
  330. // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
  331. const unsigned char* code = buffer + 1;
  332. const unsigned char* data = code + index_count / 3;
  333. const unsigned char* data_safe_end = buffer + buffer_size - 16;
  334. const unsigned char* codeaux_table = data_safe_end;
  335. for (size_t i = 0; i < index_count; i += 3)
  336. {
  337. // make sure we have enough data to read for a triangle
  338. // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
  339. // after this we can be sure we can read without extra bounds checks
  340. if (data > data_safe_end)
  341. return -2;
  342. unsigned char codetri = *code++;
  343. if (codetri < 0xf0)
  344. {
  345. int fe = codetri >> 4;
  346. // fifo reads are wrapped around 16 entry buffer
  347. unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
  348. unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
  349. int fec = codetri & 15;
  350. // note: this is the most common path in the entire decoder
  351. // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
  352. if (fec != 15)
  353. {
  354. // fifo reads are wrapped around 16 entry buffer
  355. unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
  356. unsigned int c = (fec == 0) ? next : cf;
  357. int fec0 = fec == 0;
  358. next += fec0;
  359. // output triangle
  360. writeTriangle(destination, i, index_size, a, b, c);
  361. // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
  362. pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
  363. pushEdgeFifo(edgefifo, c, b, edgefifooffset);
  364. pushEdgeFifo(edgefifo, a, c, edgefifooffset);
  365. }
  366. else
  367. {
  368. unsigned int c = 0;
  369. // note that we need to update the last index since free indices are delta-encoded
  370. last = c = decodeIndex(data, next, last);
  371. // output triangle
  372. writeTriangle(destination, i, index_size, a, b, c);
  373. // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
  374. pushVertexFifo(vertexfifo, c, vertexfifooffset);
  375. pushEdgeFifo(edgefifo, c, b, edgefifooffset);
  376. pushEdgeFifo(edgefifo, a, c, edgefifooffset);
  377. }
  378. }
  379. else
  380. {
  381. // fast path: read codeaux from the table
  382. if (codetri < 0xfe)
  383. {
  384. unsigned char codeaux = codeaux_table[codetri & 15];
  385. // note: table can't contain feb/fec=15
  386. int feb = codeaux >> 4;
  387. int fec = codeaux & 15;
  388. // fifo reads are wrapped around 16 entry buffer
  389. // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
  390. unsigned int a = next++;
  391. unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
  392. unsigned int b = (feb == 0) ? next : bf;
  393. int feb0 = feb == 0;
  394. next += feb0;
  395. unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
  396. unsigned int c = (fec == 0) ? next : cf;
  397. int fec0 = fec == 0;
  398. next += fec0;
  399. // output triangle
  400. writeTriangle(destination, i, index_size, a, b, c);
  401. // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
  402. pushVertexFifo(vertexfifo, a, vertexfifooffset);
  403. pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
  404. pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
  405. pushEdgeFifo(edgefifo, b, a, edgefifooffset);
  406. pushEdgeFifo(edgefifo, c, b, edgefifooffset);
  407. pushEdgeFifo(edgefifo, a, c, edgefifooffset);
  408. }
  409. else
  410. {
  411. // slow path: read a full byte for codeaux instead of using a table lookup
  412. unsigned char codeaux = *data++;
  413. int fea = codetri == 0xfe ? 0 : 15;
  414. int feb = codeaux >> 4;
  415. int fec = codeaux & 15;
  416. // fifo reads are wrapped around 16 entry buffer
  417. // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
  418. unsigned int a = (fea == 0) ? next++ : 0;
  419. unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
  420. unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
  421. // note that we need to update the last index since free indices are delta-encoded
  422. if (fea == 15)
  423. last = a = decodeIndex(data, next, last);
  424. if (feb == 15)
  425. last = b = decodeIndex(data, next, last);
  426. if (fec == 15)
  427. last = c = decodeIndex(data, next, last);
  428. // output triangle
  429. writeTriangle(destination, i, index_size, a, b, c);
  430. // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
  431. pushVertexFifo(vertexfifo, a, vertexfifooffset);
  432. pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
  433. pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
  434. pushEdgeFifo(edgefifo, b, a, edgefifooffset);
  435. pushEdgeFifo(edgefifo, c, b, edgefifooffset);
  436. pushEdgeFifo(edgefifo, a, c, edgefifooffset);
  437. }
  438. }
  439. }
  440. // we should've read all data bytes and stopped at the boundary between data and codeaux table
  441. if (data != data_safe_end)
  442. return -3;
  443. return 0;
  444. }