vcacheoptimizer.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2. #include "meshoptimizer.h"
  3. #include <assert.h>
  4. #include <string.h>
  5. // This work is based on:
  6. // Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
  7. // Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
  8. namespace meshopt
  9. {
  10. const size_t kCacheSizeMax = 16;
  11. const size_t kValenceMax = 8;
  12. static const float kVertexScoreTableCache[1 + kCacheSizeMax] = {
  13. 0.f,
  14. 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f};
  15. static const float kVertexScoreTableLive[1 + kValenceMax] = {
  16. 0.f,
  17. 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f};
  18. struct TriangleAdjacency
  19. {
  20. unsigned int* counts;
  21. unsigned int* offsets;
  22. unsigned int* data;
  23. };
  24. static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
  25. {
  26. size_t face_count = index_count / 3;
  27. // allocate arrays
  28. adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
  29. adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
  30. adjacency.data = allocator.allocate<unsigned int>(index_count);
  31. // fill triangle counts
  32. memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
  33. for (size_t i = 0; i < index_count; ++i)
  34. {
  35. assert(indices[i] < vertex_count);
  36. adjacency.counts[indices[i]]++;
  37. }
  38. // fill offset table
  39. unsigned int offset = 0;
  40. for (size_t i = 0; i < vertex_count; ++i)
  41. {
  42. adjacency.offsets[i] = offset;
  43. offset += adjacency.counts[i];
  44. }
  45. assert(offset == index_count);
  46. // fill triangle data
  47. for (size_t i = 0; i < face_count; ++i)
  48. {
  49. unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
  50. adjacency.data[adjacency.offsets[a]++] = unsigned(i);
  51. adjacency.data[adjacency.offsets[b]++] = unsigned(i);
  52. adjacency.data[adjacency.offsets[c]++] = unsigned(i);
  53. }
  54. // fix offsets that have been disturbed by the previous pass
  55. for (size_t i = 0; i < vertex_count; ++i)
  56. {
  57. assert(adjacency.offsets[i] >= adjacency.counts[i]);
  58. adjacency.offsets[i] -= adjacency.counts[i];
  59. }
  60. }
  61. static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
  62. {
  63. // check dead-end stack
  64. while (dead_end_top)
  65. {
  66. unsigned int vertex = dead_end[--dead_end_top];
  67. if (live_triangles[vertex] > 0)
  68. return vertex;
  69. }
  70. // input order
  71. while (input_cursor < vertex_count)
  72. {
  73. if (live_triangles[input_cursor] > 0)
  74. return input_cursor;
  75. ++input_cursor;
  76. }
  77. return ~0u;
  78. }
  79. static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
  80. {
  81. unsigned int best_candidate = ~0u;
  82. int best_priority = -1;
  83. for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
  84. {
  85. unsigned int vertex = *next_candidate;
  86. // otherwise we don't need to process it
  87. if (live_triangles[vertex] > 0)
  88. {
  89. int priority = 0;
  90. // will it be in cache after fanning?
  91. if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
  92. {
  93. priority = timestamp - cache_timestamps[vertex]; // position in cache
  94. }
  95. if (priority > best_priority)
  96. {
  97. best_candidate = vertex;
  98. best_priority = priority;
  99. }
  100. }
  101. }
  102. return best_candidate;
  103. }
  104. static float vertexScore(int cache_position, unsigned int live_triangles)
  105. {
  106. assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
  107. unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
  108. return kVertexScoreTableCache[1 + cache_position] + kVertexScoreTableLive[live_triangles_clamped];
  109. }
  110. static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
  111. {
  112. // input order
  113. while (input_cursor < face_count)
  114. {
  115. if (!emitted_flags[input_cursor])
  116. return input_cursor;
  117. ++input_cursor;
  118. }
  119. return ~0u;
  120. }
  121. } // namespace meshopt
  122. void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
  123. {
  124. using namespace meshopt;
  125. assert(index_count % 3 == 0);
  126. meshopt_Allocator allocator;
  127. // guard for empty meshes
  128. if (index_count == 0 || vertex_count == 0)
  129. return;
  130. // support in-place optimization
  131. if (destination == indices)
  132. {
  133. unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
  134. memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
  135. indices = indices_copy;
  136. }
  137. unsigned int cache_size = 16;
  138. assert(cache_size <= kCacheSizeMax);
  139. size_t face_count = index_count / 3;
  140. // build adjacency information
  141. TriangleAdjacency adjacency = {};
  142. buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
  143. // live triangle counts
  144. unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
  145. memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
  146. // emitted flags
  147. unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
  148. memset(emitted_flags, 0, face_count);
  149. // compute initial vertex scores
  150. float* vertex_scores = allocator.allocate<float>(vertex_count);
  151. for (size_t i = 0; i < vertex_count; ++i)
  152. vertex_scores[i] = vertexScore(-1, live_triangles[i]);
  153. // compute triangle scores
  154. float* triangle_scores = allocator.allocate<float>(face_count);
  155. for (size_t i = 0; i < face_count; ++i)
  156. {
  157. unsigned int a = indices[i * 3 + 0];
  158. unsigned int b = indices[i * 3 + 1];
  159. unsigned int c = indices[i * 3 + 2];
  160. triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
  161. }
  162. unsigned int cache_holder[2 * (kCacheSizeMax + 3)];
  163. unsigned int* cache = cache_holder;
  164. unsigned int* cache_new = cache_holder + kCacheSizeMax + 3;
  165. size_t cache_count = 0;
  166. unsigned int current_triangle = 0;
  167. unsigned int input_cursor = 1;
  168. unsigned int output_triangle = 0;
  169. while (current_triangle != ~0u)
  170. {
  171. assert(output_triangle < face_count);
  172. unsigned int a = indices[current_triangle * 3 + 0];
  173. unsigned int b = indices[current_triangle * 3 + 1];
  174. unsigned int c = indices[current_triangle * 3 + 2];
  175. // output indices
  176. destination[output_triangle * 3 + 0] = a;
  177. destination[output_triangle * 3 + 1] = b;
  178. destination[output_triangle * 3 + 2] = c;
  179. output_triangle++;
  180. // update emitted flags
  181. emitted_flags[current_triangle] = true;
  182. triangle_scores[current_triangle] = 0;
  183. // new triangle
  184. size_t cache_write = 0;
  185. cache_new[cache_write++] = a;
  186. cache_new[cache_write++] = b;
  187. cache_new[cache_write++] = c;
  188. // old triangles
  189. for (size_t i = 0; i < cache_count; ++i)
  190. {
  191. unsigned int index = cache[i];
  192. if (index != a && index != b && index != c)
  193. {
  194. cache_new[cache_write++] = index;
  195. }
  196. }
  197. unsigned int* cache_temp = cache;
  198. cache = cache_new, cache_new = cache_temp;
  199. cache_count = cache_write > cache_size ? cache_size : cache_write;
  200. // update live triangle counts
  201. live_triangles[a]--;
  202. live_triangles[b]--;
  203. live_triangles[c]--;
  204. // remove emitted triangle from adjacency data
  205. // this makes sure that we spend less time traversing these lists on subsequent iterations
  206. for (size_t k = 0; k < 3; ++k)
  207. {
  208. unsigned int index = indices[current_triangle * 3 + k];
  209. unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
  210. size_t neighbours_size = adjacency.counts[index];
  211. for (size_t i = 0; i < neighbours_size; ++i)
  212. {
  213. unsigned int tri = neighbours[i];
  214. if (tri == current_triangle)
  215. {
  216. neighbours[i] = neighbours[neighbours_size - 1];
  217. adjacency.counts[index]--;
  218. break;
  219. }
  220. }
  221. }
  222. unsigned int best_triangle = ~0u;
  223. float best_score = 0;
  224. // update cache positions, vertex scores and triangle scores, and find next best triangle
  225. for (size_t i = 0; i < cache_write; ++i)
  226. {
  227. unsigned int index = cache[i];
  228. int cache_position = i >= cache_size ? -1 : int(i);
  229. // update vertex score
  230. float score = vertexScore(cache_position, live_triangles[index]);
  231. float score_diff = score - vertex_scores[index];
  232. vertex_scores[index] = score;
  233. // update scores of vertex triangles
  234. const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index];
  235. const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index];
  236. for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
  237. {
  238. unsigned int tri = *it;
  239. assert(!emitted_flags[tri]);
  240. float tri_score = triangle_scores[tri] + score_diff;
  241. assert(tri_score > 0);
  242. if (best_score < tri_score)
  243. {
  244. best_triangle = tri;
  245. best_score = tri_score;
  246. }
  247. triangle_scores[tri] = tri_score;
  248. }
  249. }
  250. // step through input triangles in order if we hit a dead-end
  251. current_triangle = best_triangle;
  252. if (current_triangle == ~0u)
  253. {
  254. current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
  255. }
  256. }
  257. assert(input_cursor == face_count);
  258. assert(output_triangle == face_count);
  259. }
  260. void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
  261. {
  262. using namespace meshopt;
  263. assert(index_count % 3 == 0);
  264. assert(cache_size >= 3);
  265. meshopt_Allocator allocator;
  266. // guard for empty meshes
  267. if (index_count == 0 || vertex_count == 0)
  268. return;
  269. // support in-place optimization
  270. if (destination == indices)
  271. {
  272. unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
  273. memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
  274. indices = indices_copy;
  275. }
  276. size_t face_count = index_count / 3;
  277. // build adjacency information
  278. TriangleAdjacency adjacency = {};
  279. buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
  280. // live triangle counts
  281. unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
  282. memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
  283. // cache time stamps
  284. unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
  285. memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
  286. // dead-end stack
  287. unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);
  288. unsigned int dead_end_top = 0;
  289. // emitted flags
  290. unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
  291. memset(emitted_flags, 0, face_count);
  292. unsigned int current_vertex = 0;
  293. unsigned int timestamp = cache_size + 1;
  294. unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
  295. unsigned int output_triangle = 0;
  296. while (current_vertex != ~0u)
  297. {
  298. const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
  299. // emit all vertex neighbours
  300. const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
  301. const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex];
  302. for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
  303. {
  304. unsigned int triangle = *it;
  305. if (!emitted_flags[triangle])
  306. {
  307. unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
  308. // output indices
  309. destination[output_triangle * 3 + 0] = a;
  310. destination[output_triangle * 3 + 1] = b;
  311. destination[output_triangle * 3 + 2] = c;
  312. output_triangle++;
  313. // update dead-end stack
  314. dead_end[dead_end_top + 0] = a;
  315. dead_end[dead_end_top + 1] = b;
  316. dead_end[dead_end_top + 2] = c;
  317. dead_end_top += 3;
  318. // update live triangle counts
  319. live_triangles[a]--;
  320. live_triangles[b]--;
  321. live_triangles[c]--;
  322. // update cache info
  323. // if vertex is not in cache, put it in cache
  324. if (timestamp - cache_timestamps[a] > cache_size)
  325. cache_timestamps[a] = timestamp++;
  326. if (timestamp - cache_timestamps[b] > cache_size)
  327. cache_timestamps[b] = timestamp++;
  328. if (timestamp - cache_timestamps[c] > cache_size)
  329. cache_timestamps[c] = timestamp++;
  330. // update emitted flags
  331. emitted_flags[triangle] = true;
  332. }
  333. }
  334. // next candidates are the ones we pushed to dead-end stack just now
  335. const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
  336. // get next vertex
  337. current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
  338. if (current_vertex == ~0u)
  339. {
  340. current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
  341. }
  342. }
  343. assert(output_triangle == face_count);
  344. }