MeshPrimitiveOptimizer.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. #ifndef GUL_MESH_PRIMITIVE_OPTIMIZER_H
  2. #define GUL_MESH_PRIMITIVE_OPTIMIZER_H
  3. #include "MeshPrimitive2.h"
  4. #include <meshoptimizer.h>
  5. namespace gul
  6. {
  7. /**
  8. * @brief OptimizeMeshPrimitive
  9. * @param V
  10. *
  11. * Uses Arseny Kapoulkine's MeshOptimizer library to generate Level of Details
  12. * for the mesh primitive. ( https://github.com/zeux/meshoptimizer )
  13. *
  14. * Each level of detail is a set of different indices which are stored
  15. * as submeshes.
  16. *
  17. * Requirements:
  18. * The submesh array in the MeshPrimitive must be cleared
  19. * The mesh MUST have index buffers
  20. * The index buffer must be uint32
  21. *
  22. * GenerateMeshLOD(M, 10, 0.7f, 0.01f);
  23. * assert(M.subMeshes.size() > 0);
  24. *
  25. */
  26. inline std::vector<DrawCall> GenerateMeshLOD(MeshPrimitive & V, size_t lod_count = 10, float thresholdPowerBase=0.7f, float target_error = 1e-2)
  27. {
  28. assert( V.subMeshes.size() == 0);
  29. assert( V.indexCount() > 0);
  30. assert( V.INDEX.getAttributeSize() == 4);
  31. const size_t kCacheSize = 16;
  32. uint32_t vertexStride = V.POSITION.getAttributeSize();
  33. uint32_t vertexCount = V.POSITION.attributeCount();
  34. auto vertexData = static_cast<float const*>(V.POSITION.data());
  35. // generate 4 LOD levels (1-4), with each subsequent LOD using 70% triangles
  36. // note that each LOD uses the same (shared) vertex buffer
  37. std::vector<std::vector<uint32_t>> lods(lod_count);
  38. auto indices = V.INDEX.toVector<uint32_t>();
  39. lods[0] = indices;
  40. for (size_t i = 1; i < lod_count; ++i)
  41. {
  42. std::vector<unsigned int>& lod = lods[i];
  43. float threshold = powf(thresholdPowerBase, float(i));
  44. size_t target_index_count = size_t(indices.size() * threshold) / 3 * 3;
  45. //float target_error = 1e-2f;
  46. // we can simplify all the way from base level or from the last result
  47. // simplifying from the base level sometimes produces better results, but simplifying from last level is faster
  48. const std::vector<unsigned int>& source = lods[i - 1];
  49. //const std::vector<unsigned int>& source = lods[0];
  50. if (source.size() < target_index_count)
  51. target_index_count = source.size();
  52. lod.resize(source.size());
  53. lod.resize(meshopt_simplify(&lod[0], &source[0], source.size(), vertexData, vertexCount, vertexStride, target_index_count, target_error));
  54. }
  55. // optimize each individual LOD for vertex cache & overdraw
  56. for (size_t i = 0; i < lod_count; ++i)
  57. {
  58. std::vector<unsigned int>& lod = lods[i];
  59. meshopt_optimizeVertexCache(&lod[0], &lod[0], lod.size(), vertexCount);
  60. meshopt_optimizeOverdraw(&lod[0], &lod[0], lod.size(), vertexData, vertexCount, vertexStride, 1.0f);
  61. }
  62. // concatenate all LODs into one IB
  63. // note: the order of concatenation is important - since we optimize the entire IB for vertex fetch,
  64. // putting coarse LODs first makes sure that the vertex range referenced by them is as small as possible
  65. // some GPUs process the entire range referenced by the index buffer region so doing this optimizes the vertex transform
  66. // cost for coarse LODs
  67. // this order also produces much better vertex fetch cache coherency for coarse LODs (since they're essentially optimized first)
  68. // somewhat surprisingly, the vertex fetch cache coherency for fine LODs doesn't seem to suffer that much.
  69. auto lod_index_offsets = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
  70. auto lod_index_counts = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
  71. size_t total_index_count = 0;
  72. for (int i = lod_count - 1; i >= 0; --i)
  73. {
  74. lod_index_offsets[i] = total_index_count;
  75. lod_index_counts[i] = lods[i].size();
  76. total_index_count += lods[i].size();
  77. }
  78. indices.resize(total_index_count);
  79. V.subMeshes.clear();
  80. std::vector<DrawCall> _out;
  81. for (size_t i = 0; i < lod_count; ++i)
  82. {
  83. memcpy(&indices[lod_index_offsets[i]], &lods[i][0], lods[i].size() * sizeof(lods[i][0]));
  84. DrawCall dc;
  85. dc.indexCount = uint32_t(lods[i].size());
  86. dc.indexOffset = lod_index_offsets[i];
  87. dc.vertexCount = 0;
  88. dc.vertexOffset = 0;
  89. V.subMeshes.push_back(dc);
  90. _out.push_back(dc);
  91. }
  92. V.INDEX = indices;
  93. return _out;
  94. #if 0
  95. return lods;
  96. std::vector<Vertex> vertices = mesh.vertices;
  97. // vertex fetch optimization should go last as it depends on the final index order
  98. // note that the order of LODs above affects vertex fetch results
  99. meshopt_optimizeVertexFetch(&vertices[0], &indices[0], indices.size(), &vertices[0], vertices.size(), vertexStride);
  100. double end = timestamp();
  101. printf("%-9s: %d triangles => %d LOD levels down to %d triangles in %.2f msec, optimized in %.2f msec\n",
  102. "SimplifyC",
  103. int(lod_index_counts[0]) / 3, int(lod_count), int(lod_index_counts[lod_count - 1]) / 3,
  104. (middle - start) * 1000, (end - middle) * 1000);
  105. // for using LOD data at runtime, in addition to vertices and indices you have to save lod_index_offsets/lod_index_counts.
  106. {
  107. meshopt_VertexCacheStatistics vcs0 = meshopt_analyzeVertexCache(&indices[lod_index_offsets[0]], lod_index_counts[0], vertices.size(), kCacheSize, 0, 0);
  108. meshopt_VertexFetchStatistics vfs0 = meshopt_analyzeVertexFetch(&indices[lod_index_offsets[0]], lod_index_counts[0], vertices.size(), vertexStride);
  109. meshopt_VertexCacheStatistics vcsN = meshopt_analyzeVertexCache(&indices[lod_index_offsets[lod_count - 1]], lod_index_counts[lod_count - 1], vertices.size(), kCacheSize, 0, 0);
  110. meshopt_VertexFetchStatistics vfsN = meshopt_analyzeVertexFetch(&indices[lod_index_offsets[lod_count - 1]], lod_index_counts[lod_count - 1], vertices.size(), vertexStride);
  111. typedef PackedVertexOct PV;
  112. std::vector<PV> pv(vertices.size());
  113. packMesh(pv, vertices);
  114. std::vector<unsigned char> vbuf(meshopt_encodeVertexBufferBound(vertices.size(), sizeof(PV)));
  115. vbuf.resize(meshopt_encodeVertexBuffer(&vbuf[0], vbuf.size(), &pv[0], vertices.size(), sizeof(PV)));
  116. std::vector<unsigned char> ibuf(meshopt_encodeIndexBufferBound(indices.size(), vertices.size()));
  117. ibuf.resize(meshopt_encodeIndexBuffer(&ibuf[0], ibuf.size(), &indices[0], indices.size()));
  118. printf("%-9s ACMR %f...%f Overfetch %f..%f Codec VB %.1f bits/vertex IB %.1f bits/triangle\n",
  119. "",
  120. vcs0.acmr, vcsN.acmr, vfs0.overfetch, vfsN.overfetch,
  121. double(vbuf.size()) / double(vertices.size()) * 8,
  122. double(ibuf.size()) / double(indices.size() / 3) * 8);
  123. }
  124. #endif
  125. }
  126. inline std::vector<DrawCall> GenerateMeshLOD(MeshPrimitive & V, size_t lod_count = 10, float thresholdPowerBase=0.7f, float target_error = 1e-2)
  127. {
  128. assert( V.subMeshes.size() == 0);
  129. assert( V.indexCount() > 0);
  130. assert( V.INDEX.getAttributeSize() == 4);
  131. const size_t kCacheSize = 16;
  132. uint32_t vertexStride = V.POSITION.getAttributeSize();
  133. uint32_t vertexCount = V.POSITION.attributeCount();
  134. auto vertexData = static_cast<float const*>(V.POSITION.data());
  135. // generate 4 LOD levels (1-4), with each subsequent LOD using 70% triangles
  136. // note that each LOD uses the same (shared) vertex buffer
  137. std::vector<std::vector<uint32_t>> lods(lod_count);
  138. auto indices = V.INDEX.toVector<uint32_t>();
  139. lods[0] = indices;
  140. for (size_t i = 1; i < lod_count; ++i)
  141. {
  142. std::vector<unsigned int>& lod = lods[i];
  143. float threshold = powf(thresholdPowerBase, float(i));
  144. size_t target_index_count = size_t(indices.size() * threshold) / 3 * 3;
  145. //float target_error = 1e-2f;
  146. // we can simplify all the way from base level or from the last result
  147. // simplifying from the base level sometimes produces better results, but simplifying from last level is faster
  148. const std::vector<unsigned int>& source = lods[i - 1];
  149. //const std::vector<unsigned int>& source = lods[0];
  150. if (source.size() < target_index_count)
  151. target_index_count = source.size();
  152. lod.resize(source.size());
  153. lod.resize(meshopt_simplify(&lod[0], &source[0], source.size(), vertexData, vertexCount, vertexStride, target_index_count, target_error));
  154. }
  155. // optimize each individual LOD for vertex cache & overdraw
  156. for (size_t i = 0; i < lod_count; ++i)
  157. {
  158. std::vector<unsigned int>& lod = lods[i];
  159. meshopt_optimizeVertexCache(&lod[0], &lod[0], lod.size(), vertexCount);
  160. meshopt_optimizeOverdraw(&lod[0], &lod[0], lod.size(), vertexData, vertexCount, vertexStride, 1.0f);
  161. }
  162. // concatenate all LODs into one IB
  163. // note: the order of concatenation is important - since we optimize the entire IB for vertex fetch,
  164. // putting coarse LODs first makes sure that the vertex range referenced by them is as small as possible
  165. // some GPUs process the entire range referenced by the index buffer region so doing this optimizes the vertex transform
  166. // cost for coarse LODs
  167. // this order also produces much better vertex fetch cache coherency for coarse LODs (since they're essentially optimized first)
  168. // somewhat surprisingly, the vertex fetch cache coherency for fine LODs doesn't seem to suffer that much.
  169. auto lod_index_offsets = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
  170. auto lod_index_counts = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
  171. size_t total_index_count = 0;
  172. for (int i = lod_count - 1; i >= 0; --i)
  173. {
  174. lod_index_offsets[i] = total_index_count;
  175. lod_index_counts[i] = lods[i].size();
  176. total_index_count += lods[i].size();
  177. }
  178. indices.resize(total_index_count);
  179. V.subMeshes.clear();
  180. std::vector<DrawCall> _out;
  181. for (size_t i = 0; i < lod_count; ++i)
  182. {
  183. memcpy(&indices[lod_index_offsets[i]], &lods[i][0], lods[i].size() * sizeof(lods[i][0]));
  184. DrawCall dc;
  185. dc.indexCount = uint32_t(lods[i].size());
  186. dc.indexOffset = lod_index_offsets[i];
  187. dc.vertexCount = 0;
  188. dc.vertexOffset = 0;
  189. V.subMeshes.push_back(dc);
  190. _out.push_back(dc);
  191. }
  192. V.INDEX = indices;
  193. return _out;
  194. }
  195. }
  196. #endif