2
0

clusterizer.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2. #include "meshoptimizer.h"
  3. #include <assert.h>
  4. #include <math.h>
  5. #include <string.h>
  6. // This work is based on:
  7. // Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016
  8. // Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016
  9. // Jack Ritter. An Efficient Bounding Sphere. 1990
  10. namespace meshopt
  11. {
  12. static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
  13. {
  14. assert(count > 0);
  15. // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates
  16. size_t pmin[3] = {0, 0, 0};
  17. size_t pmax[3] = {0, 0, 0};
  18. for (size_t i = 0; i < count; ++i)
  19. {
  20. const float* p = points[i];
  21. for (int axis = 0; axis < 3; ++axis)
  22. {
  23. pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis];
  24. pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis];
  25. }
  26. }
  27. // find the pair of points with largest distance
  28. float paxisd2 = 0;
  29. int paxis = 0;
  30. for (int axis = 0; axis < 3; ++axis)
  31. {
  32. const float* p1 = points[pmin[axis]];
  33. const float* p2 = points[pmax[axis]];
  34. float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]);
  35. if (d2 > paxisd2)
  36. {
  37. paxisd2 = d2;
  38. paxis = axis;
  39. }
  40. }
  41. // use the longest segment as the initial sphere diameter
  42. const float* p1 = points[pmin[paxis]];
  43. const float* p2 = points[pmax[paxis]];
  44. float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2};
  45. float radius = sqrtf(paxisd2) / 2;
  46. // iteratively adjust the sphere up until all points fit
  47. for (size_t i = 0; i < count; ++i)
  48. {
  49. const float* p = points[i];
  50. float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
  51. if (d2 > radius * radius)
  52. {
  53. float d = sqrtf(d2);
  54. assert(d > 0);
  55. float k = 0.5f + (radius / d) / 2;
  56. center[0] = center[0] * k + p[0] * (1 - k);
  57. center[1] = center[1] * k + p[1] * (1 - k);
  58. center[2] = center[2] * k + p[2] * (1 - k);
  59. radius = (radius + d) / 2;
  60. }
  61. }
  62. result[0] = center[0];
  63. result[1] = center[1];
  64. result[2] = center[2];
  65. result[3] = radius;
  66. }
  67. } // namespace meshopt
  68. size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
  69. {
  70. assert(index_count % 3 == 0);
  71. assert(max_vertices >= 3);
  72. assert(max_triangles >= 1);
  73. // meshlet construction is limited by max vertices and max triangles per meshlet
  74. // the worst case is that the input is an unindexed stream since this equally stresses both limits
  75. // note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle
  76. size_t max_vertices_conservative = max_vertices - 2;
  77. size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative;
  78. size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles;
  79. return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
  80. }
  81. size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
  82. {
  83. assert(index_count % 3 == 0);
  84. assert(max_vertices >= 3);
  85. assert(max_triangles >= 1);
  86. meshopt_Allocator allocator;
  87. meshopt_Meshlet meshlet;
  88. memset(&meshlet, 0, sizeof(meshlet));
  89. assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0]));
  90. assert(max_triangles <= sizeof(meshlet.indices) / 3);
  91. // index of the vertex in the meshlet, 0xff if the vertex isn't used
  92. unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
  93. memset(used, -1, vertex_count);
  94. size_t offset = 0;
  95. for (size_t i = 0; i < index_count; i += 3)
  96. {
  97. unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
  98. assert(a < vertex_count && b < vertex_count && c < vertex_count);
  99. unsigned char& av = used[a];
  100. unsigned char& bv = used[b];
  101. unsigned char& cv = used[c];
  102. unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
  103. if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
  104. {
  105. destination[offset++] = meshlet;
  106. for (size_t j = 0; j < meshlet.vertex_count; ++j)
  107. used[meshlet.vertices[j]] = 0xff;
  108. memset(&meshlet, 0, sizeof(meshlet));
  109. }
  110. if (av == 0xff)
  111. {
  112. av = meshlet.vertex_count;
  113. meshlet.vertices[meshlet.vertex_count++] = a;
  114. }
  115. if (bv == 0xff)
  116. {
  117. bv = meshlet.vertex_count;
  118. meshlet.vertices[meshlet.vertex_count++] = b;
  119. }
  120. if (cv == 0xff)
  121. {
  122. cv = meshlet.vertex_count;
  123. meshlet.vertices[meshlet.vertex_count++] = c;
  124. }
  125. meshlet.indices[meshlet.triangle_count][0] = av;
  126. meshlet.indices[meshlet.triangle_count][1] = bv;
  127. meshlet.indices[meshlet.triangle_count][2] = cv;
  128. meshlet.triangle_count++;
  129. }
  130. if (meshlet.triangle_count)
  131. destination[offset++] = meshlet;
  132. assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
  133. return offset;
  134. }
  135. meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
  136. {
  137. using namespace meshopt;
  138. assert(index_count % 3 == 0);
  139. assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
  140. assert(vertex_positions_stride % sizeof(float) == 0);
  141. assert(index_count / 3 <= 256);
  142. (void)vertex_count;
  143. size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
  144. // compute triangle normals and gather triangle corners
  145. float normals[256][3];
  146. float corners[256][3][3];
  147. size_t triangles = 0;
  148. for (size_t i = 0; i < index_count; i += 3)
  149. {
  150. unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
  151. assert(a < vertex_count && b < vertex_count && c < vertex_count);
  152. const float* p0 = vertex_positions + vertex_stride_float * a;
  153. const float* p1 = vertex_positions + vertex_stride_float * b;
  154. const float* p2 = vertex_positions + vertex_stride_float * c;
  155. float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
  156. float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
  157. float normalx = p10[1] * p20[2] - p10[2] * p20[1];
  158. float normaly = p10[2] * p20[0] - p10[0] * p20[2];
  159. float normalz = p10[0] * p20[1] - p10[1] * p20[0];
  160. float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
  161. // no need to include degenerate triangles - they will be invisible anyway
  162. if (area == 0.f)
  163. continue;
  164. // record triangle normals & corners for future use; normal and corner 0 define a plane equation
  165. normals[triangles][0] = normalx / area;
  166. normals[triangles][1] = normaly / area;
  167. normals[triangles][2] = normalz / area;
  168. memcpy(corners[triangles][0], p0, 3 * sizeof(float));
  169. memcpy(corners[triangles][1], p1, 3 * sizeof(float));
  170. memcpy(corners[triangles][2], p2, 3 * sizeof(float));
  171. triangles++;
  172. }
  173. meshopt_Bounds bounds = {};
  174. // degenerate cluster, no valid triangles => trivial reject (cone data is 0)
  175. if (triangles == 0)
  176. return bounds;
  177. // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well
  178. float psphere[4] = {};
  179. computeBoundingSphere(psphere, corners[0], triangles * 3);
  180. float center[3] = {psphere[0], psphere[1], psphere[2]};
  181. // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis
  182. float nsphere[4] = {};
  183. computeBoundingSphere(nsphere, normals, triangles);
  184. float axis[3] = {nsphere[0], nsphere[1], nsphere[2]};
  185. float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);
  186. float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength;
  187. axis[0] *= invaxislength;
  188. axis[1] *= invaxislength;
  189. axis[2] *= invaxislength;
  190. // compute a tight cone around all normals, mindp = cos(angle/2)
  191. float mindp = 1.f;
  192. for (size_t i = 0; i < triangles; ++i)
  193. {
  194. float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2];
  195. mindp = (dp < mindp) ? dp : mindp;
  196. }
  197. // fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones
  198. bounds.center[0] = center[0];
  199. bounds.center[1] = center[1];
  200. bounds.center[2] = center[2];
  201. bounds.radius = psphere[3];
  202. // degenerate cluster, normal cone is larger than a hemisphere => trivial accept
  203. // note that if mindp is positive but close to 0, the triangle intersection code below gets less stable
  204. // we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful
  205. if (mindp <= 0.1f)
  206. {
  207. bounds.cone_cutoff = 1;
  208. bounds.cone_cutoff_s8 = 127;
  209. return bounds;
  210. }
  211. float maxt = 0;
  212. // we need to find the point on center-t*axis ray that lies in negative half-space of all triangles
  213. for (size_t i = 0; i < triangles; ++i)
  214. {
  215. // dot(center-t*axis-corner, trinormal) = 0
  216. // dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0
  217. float cx = center[0] - corners[i][0][0];
  218. float cy = center[1] - corners[i][0][1];
  219. float cz = center[2] - corners[i][0][2];
  220. float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2];
  221. float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2];
  222. // dn should be larger than mindp cutoff above
  223. assert(dn > 0.f);
  224. float t = dc / dn;
  225. maxt = (t > maxt) ? t : maxt;
  226. }
  227. // cone apex should be in the negative half-space of all cluster triangles by construction
  228. bounds.cone_apex[0] = center[0] - axis[0] * maxt;
  229. bounds.cone_apex[1] = center[1] - axis[1] * maxt;
  230. bounds.cone_apex[2] = center[2] - axis[2] * maxt;
  231. // note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis
  232. bounds.cone_axis[0] = axis[0];
  233. bounds.cone_axis[1] = axis[1];
  234. bounds.cone_axis[2] = axis[2];
  235. // cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone
  236. // which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a))
  237. bounds.cone_cutoff = sqrtf(1 - mindp * mindp);
  238. // quantize axis & cutoff to 8-bit SNORM format
  239. bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8));
  240. bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8));
  241. bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8));
  242. // for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error
  243. float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]);
  244. float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]);
  245. float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]);
  246. // note that we need to round this up instead of rounding to nearest, hence +1
  247. int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1);
  248. bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8);
  249. return bounds;
  250. }
  251. meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
  252. {
  253. assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
  254. assert(vertex_positions_stride % sizeof(float) == 0);
  255. unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])];
  256. for (size_t i = 0; i < meshlet->triangle_count; ++i)
  257. {
  258. unsigned int a = meshlet->vertices[meshlet->indices[i][0]];
  259. unsigned int b = meshlet->vertices[meshlet->indices[i][1]];
  260. unsigned int c = meshlet->vertices[meshlet->indices[i][2]];
  261. assert(a < vertex_count && b < vertex_count && c < vertex_count);
  262. indices[i * 3 + 0] = a;
  263. indices[i * 3 + 1] = b;
  264. indices[i * 3 + 2] = c;
  265. }
  266. return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
  267. }