MeshPrimitiveOptimizer.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. #ifndef GUL_MESH_PRIMITIVE_OPTIMIZER_H
  2. #define GUL_MESH_PRIMITIVE_OPTIMIZER_H
  3. #include "MeshPrimitive2.h"
  4. #include <meshoptimizer.h>
  5. namespace gul
  6. {
  7. /**
  8. * @brief OptimizeMeshPrimitive
  9. * @param V
  10. *
  11. * Uses Zeux's MeshOptimizer library to generate Level of Details
  12. * for the mesh.
  13. *
  14. * Each level of detail is a set of different indices which are stored
  15. * as submeshes.
  16. *
  17. * Requireemnts:
  18. * The submesh array in the MeshPrimitive must be cleared
  19. * The mesh MUST have index buffers
  20. * The index buffer must be uint32
  21. *
  22. * GenerateMeshLOD(M, 10, 0.7f, 0.01f);
  23. * assert(M.subMeshes.size() > 0);
  24. *
  25. */
  26. inline void GenerateMeshLOD(MeshPrimitive & V, size_t lod_count = 10, float thresholdPowerBase=0.7f, float target_error = 1e-2)
  27. {
  28. assert( V.subMeshes.size() == 0);
  29. assert( V.indexCount() > 0);
  30. assert( V.INDEX.getAttributeSize() == 4);
  31. const size_t kCacheSize = 16;
  32. uint32_t vertexStride = V.POSITION.getAttributeSize();
  33. uint32_t vertexCount = V.POSITION.attributeCount();
  34. auto vertexData = static_cast<float const*>(V.POSITION.data());
  35. // generate 4 LOD levels (1-4), with each subsequent LOD using 70% triangles
  36. // note that each LOD uses the same (shared) vertex buffer
  37. std::vector<std::vector<uint32_t>> lods(lod_count);
  38. auto indices = V.INDEX.toVector<uint32_t>();
  39. lods[0] = indices;
  40. for (size_t i = 1; i < lod_count; ++i)
  41. {
  42. std::vector<unsigned int>& lod = lods[i];
  43. float threshold = powf(thresholdPowerBase, float(i));
  44. size_t target_index_count = size_t(indices.size() * threshold) / 3 * 3;
  45. //float target_error = 1e-2f;
  46. // we can simplify all the way from base level or from the last result
  47. // simplifying from the base level sometimes produces better results, but simplifying from last level is faster
  48. const std::vector<unsigned int>& source = lods[i - 1];
  49. //const std::vector<unsigned int>& source = lods[0];
  50. if (source.size() < target_index_count)
  51. target_index_count = source.size();
  52. lod.resize(source.size());
  53. lod.resize(meshopt_simplify(&lod[0], &source[0], source.size(), vertexData, vertexCount, vertexStride, target_index_count, target_error));
  54. }
  55. // optimize each individual LOD for vertex cache & overdraw
  56. for (size_t i = 0; i < lod_count; ++i)
  57. {
  58. std::vector<unsigned int>& lod = lods[i];
  59. meshopt_optimizeVertexCache(&lod[0], &lod[0], lod.size(), vertexCount);
  60. meshopt_optimizeOverdraw(&lod[0], &lod[0], lod.size(), vertexData, vertexCount, vertexStride, 1.0f);
  61. }
  62. // concatenate all LODs into one IB
  63. // note: the order of concatenation is important - since we optimize the entire IB for vertex fetch,
  64. // putting coarse LODs first makes sure that the vertex range referenced by them is as small as possible
  65. // some GPUs process the entire range referenced by the index buffer region so doing this optimizes the vertex transform
  66. // cost for coarse LODs
  67. // this order also produces much better vertex fetch cache coherency for coarse LODs (since they're essentially optimized first)
  68. // somewhat surprisingly, the vertex fetch cache coherency for fine LODs doesn't seem to suffer that much.
  69. auto lod_index_offsets = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
  70. auto lod_index_counts = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
  71. size_t total_index_count = 0;
  72. for (int i = lod_count - 1; i >= 0; --i)
  73. {
  74. lod_index_offsets[i] = total_index_count;
  75. lod_index_counts[i] = lods[i].size();
  76. total_index_count += lods[i].size();
  77. }
  78. indices.resize(total_index_count);
  79. V.subMeshes.clear();
  80. for (size_t i = 0; i < lod_count; ++i)
  81. {
  82. memcpy(&indices[lod_index_offsets[i]], &lods[i][0], lods[i].size() * sizeof(lods[i][0]));
  83. DrawCall dc;
  84. dc.indexCount = uint32_t(lods[i].size());
  85. dc.indexOffset = lod_index_offsets[i];
  86. dc.vertexCount = 0;
  87. dc.vertexOffset = 0;
  88. V.subMeshes.push_back(dc);
  89. }
  90. V.INDEX = indices;
  91. #if 0
  92. return lods;
  93. std::vector<Vertex> vertices = mesh.vertices;
  94. // vertex fetch optimization should go last as it depends on the final index order
  95. // note that the order of LODs above affects vertex fetch results
  96. meshopt_optimizeVertexFetch(&vertices[0], &indices[0], indices.size(), &vertices[0], vertices.size(), vertexStride);
  97. double end = timestamp();
  98. printf("%-9s: %d triangles => %d LOD levels down to %d triangles in %.2f msec, optimized in %.2f msec\n",
  99. "SimplifyC",
  100. int(lod_index_counts[0]) / 3, int(lod_count), int(lod_index_counts[lod_count - 1]) / 3,
  101. (middle - start) * 1000, (end - middle) * 1000);
  102. // for using LOD data at runtime, in addition to vertices and indices you have to save lod_index_offsets/lod_index_counts.
  103. {
  104. meshopt_VertexCacheStatistics vcs0 = meshopt_analyzeVertexCache(&indices[lod_index_offsets[0]], lod_index_counts[0], vertices.size(), kCacheSize, 0, 0);
  105. meshopt_VertexFetchStatistics vfs0 = meshopt_analyzeVertexFetch(&indices[lod_index_offsets[0]], lod_index_counts[0], vertices.size(), vertexStride);
  106. meshopt_VertexCacheStatistics vcsN = meshopt_analyzeVertexCache(&indices[lod_index_offsets[lod_count - 1]], lod_index_counts[lod_count - 1], vertices.size(), kCacheSize, 0, 0);
  107. meshopt_VertexFetchStatistics vfsN = meshopt_analyzeVertexFetch(&indices[lod_index_offsets[lod_count - 1]], lod_index_counts[lod_count - 1], vertices.size(), vertexStride);
  108. typedef PackedVertexOct PV;
  109. std::vector<PV> pv(vertices.size());
  110. packMesh(pv, vertices);
  111. std::vector<unsigned char> vbuf(meshopt_encodeVertexBufferBound(vertices.size(), sizeof(PV)));
  112. vbuf.resize(meshopt_encodeVertexBuffer(&vbuf[0], vbuf.size(), &pv[0], vertices.size(), sizeof(PV)));
  113. std::vector<unsigned char> ibuf(meshopt_encodeIndexBufferBound(indices.size(), vertices.size()));
  114. ibuf.resize(meshopt_encodeIndexBuffer(&ibuf[0], ibuf.size(), &indices[0], indices.size()));
  115. printf("%-9s ACMR %f...%f Overfetch %f..%f Codec VB %.1f bits/vertex IB %.1f bits/triangle\n",
  116. "",
  117. vcs0.acmr, vcsN.acmr, vfs0.overfetch, vfsN.overfetch,
  118. double(vbuf.size()) / double(vertices.size()) * 8,
  119. double(ibuf.size()) / double(indices.size() / 3) * 8);
  120. }
  121. #endif
  122. }
  123. }
  124. #endif