partition.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2. #include "meshoptimizer.h"
  3. #include <assert.h>
  4. #include <math.h>
  5. #include <string.h>
  6. // This work is based on:
  7. // Takio Kurita. An efficient agglomerative clustering algorithm using a heap. 1991
  8. namespace meshopt
  9. {
  10. // To avoid excessive recursion for malformed inputs, we switch to bisection after some depth
  11. const int kMergeDepthCutoff = 40;
  12. struct ClusterAdjacency
  13. {
  14. unsigned int* offsets;
  15. unsigned int* clusters;
  16. unsigned int* shared;
  17. };
  18. static void filterClusterIndices(unsigned int* data, unsigned int* offsets, const unsigned int* cluster_indices, const unsigned int* cluster_index_counts, size_t cluster_count, unsigned char* used, size_t vertex_count, size_t total_index_count)
  19. {
  20. (void)vertex_count;
  21. (void)total_index_count;
  22. size_t cluster_start = 0;
  23. size_t cluster_write = 0;
  24. for (size_t i = 0; i < cluster_count; ++i)
  25. {
  26. offsets[i] = unsigned(cluster_write);
  27. // copy cluster indices, skipping duplicates
  28. for (size_t j = 0; j < cluster_index_counts[i]; ++j)
  29. {
  30. unsigned int v = cluster_indices[cluster_start + j];
  31. assert(v < vertex_count);
  32. data[cluster_write] = v;
  33. cluster_write += 1 - used[v];
  34. used[v] = 1;
  35. }
  36. // reset used flags for the next cluster
  37. for (size_t j = offsets[i]; j < cluster_write; ++j)
  38. used[data[j]] = 0;
  39. cluster_start += cluster_index_counts[i];
  40. }
  41. assert(cluster_start == total_index_count);
  42. assert(cluster_write <= total_index_count);
  43. offsets[cluster_count] = unsigned(cluster_write);
  44. }
  45. static float computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, float* out_center)
  46. {
  47. size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
  48. float center[3] = {0, 0, 0};
  49. // approximate center of the cluster by averaging all vertex positions
  50. for (size_t j = 0; j < index_count; ++j)
  51. {
  52. const float* p = vertex_positions + indices[j] * vertex_stride_float;
  53. center[0] += p[0];
  54. center[1] += p[1];
  55. center[2] += p[2];
  56. }
  57. // note: technically clusters can't be empty per meshopt_partitionCluster but we check for a division by zero in case that changes
  58. if (index_count)
  59. {
  60. center[0] /= float(index_count);
  61. center[1] /= float(index_count);
  62. center[2] /= float(index_count);
  63. }
  64. // compute radius of the bounding sphere for each cluster
  65. float radiussq = 0;
  66. for (size_t j = 0; j < index_count; ++j)
  67. {
  68. const float* p = vertex_positions + indices[j] * vertex_stride_float;
  69. float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
  70. radiussq = radiussq < d2 ? d2 : radiussq;
  71. }
  72. memcpy(out_center, center, sizeof(center));
  73. return sqrtf(radiussq);
  74. }
  75. static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, size_t vertex_count, meshopt_Allocator& allocator)
  76. {
  77. unsigned int* ref_offsets = allocator.allocate<unsigned int>(vertex_count + 1);
  78. // compute number of clusters referenced by each vertex
  79. memset(ref_offsets, 0, vertex_count * sizeof(unsigned int));
  80. for (size_t i = 0; i < cluster_count; ++i)
  81. {
  82. for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
  83. ref_offsets[cluster_indices[j]]++;
  84. }
  85. // compute (worst-case) number of adjacent clusters for each cluster
  86. size_t total_adjacency = 0;
  87. for (size_t i = 0; i < cluster_count; ++i)
  88. {
  89. size_t count = 0;
  90. // worst case is every vertex has a disjoint cluster list
  91. for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
  92. count += ref_offsets[cluster_indices[j]] - 1;
  93. // ... but only every other cluster can be adjacent in the end
  94. total_adjacency += count < cluster_count - 1 ? count : cluster_count - 1;
  95. }
  96. // we can now allocate adjacency buffers
  97. adjacency.offsets = allocator.allocate<unsigned int>(cluster_count + 1);
  98. adjacency.clusters = allocator.allocate<unsigned int>(total_adjacency);
  99. adjacency.shared = allocator.allocate<unsigned int>(total_adjacency);
  100. // convert ref counts to offsets
  101. size_t total_refs = 0;
  102. for (size_t i = 0; i < vertex_count; ++i)
  103. {
  104. size_t count = ref_offsets[i];
  105. ref_offsets[i] = unsigned(total_refs);
  106. total_refs += count;
  107. }
  108. unsigned int* ref_data = allocator.allocate<unsigned int>(total_refs);
  109. // fill cluster refs for each vertex
  110. for (size_t i = 0; i < cluster_count; ++i)
  111. {
  112. for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
  113. ref_data[ref_offsets[cluster_indices[j]]++] = unsigned(i);
  114. }
  115. // after the previous pass, ref_offsets contain the end of the data for each vertex; shift it forward to get the start
  116. memmove(ref_offsets + 1, ref_offsets, vertex_count * sizeof(unsigned int));
  117. ref_offsets[0] = 0;
  118. // fill cluster adjacency for each cluster...
  119. adjacency.offsets[0] = 0;
  120. for (size_t i = 0; i < cluster_count; ++i)
  121. {
  122. unsigned int* adj = adjacency.clusters + adjacency.offsets[i];
  123. unsigned int* shd = adjacency.shared + adjacency.offsets[i];
  124. size_t count = 0;
  125. for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j)
  126. {
  127. unsigned int v = cluster_indices[j];
  128. // merge the entire cluster list of each vertex into current list
  129. for (size_t k = ref_offsets[v]; k < ref_offsets[v + 1]; ++k)
  130. {
  131. unsigned int c = ref_data[k];
  132. assert(c < cluster_count);
  133. if (c == unsigned(i))
  134. continue;
  135. // if the cluster is already in the list, increment the shared count
  136. bool found = false;
  137. for (size_t l = 0; l < count; ++l)
  138. if (adj[l] == c)
  139. {
  140. found = true;
  141. shd[l]++;
  142. break;
  143. }
  144. // .. or append a new cluster
  145. if (!found)
  146. {
  147. adj[count] = c;
  148. shd[count] = 1;
  149. count++;
  150. }
  151. }
  152. }
  153. // mark the end of the adjacency list; the next cluster will start there as well
  154. adjacency.offsets[i + 1] = adjacency.offsets[i] + unsigned(count);
  155. }
  156. assert(adjacency.offsets[cluster_count] <= total_adjacency);
  157. // ref_offsets can't be deallocated as it was allocated before adjacency
  158. allocator.deallocate(ref_data);
  159. }
  160. struct ClusterGroup
  161. {
  162. int group;
  163. int next;
  164. unsigned int size; // 0 unless root
  165. unsigned int vertices;
  166. float center[3];
  167. float radius;
  168. };
  169. struct GroupOrder
  170. {
  171. unsigned int id;
  172. int order;
  173. };
  174. static void heapPush(GroupOrder* heap, size_t size, GroupOrder item)
  175. {
  176. // insert a new element at the end (breaks heap invariant)
  177. heap[size++] = item;
  178. // bubble up the new element to its correct position
  179. size_t i = size - 1;
  180. while (i > 0 && heap[i].order < heap[(i - 1) / 2].order)
  181. {
  182. size_t p = (i - 1) / 2;
  183. GroupOrder temp = heap[i];
  184. heap[i] = heap[p];
  185. heap[p] = temp;
  186. i = p;
  187. }
  188. }
  189. static GroupOrder heapPop(GroupOrder* heap, size_t size)
  190. {
  191. assert(size > 0);
  192. GroupOrder top = heap[0];
  193. // move the last element to the top (breaks heap invariant)
  194. heap[0] = heap[--size];
  195. // bubble down the new top element to its correct position
  196. size_t i = 0;
  197. while (i * 2 + 1 < size)
  198. {
  199. // find the smallest child
  200. size_t j = i * 2 + 1;
  201. j += (j + 1 < size && heap[j + 1].order < heap[j].order);
  202. // if the parent is already smaller than both children, we're done
  203. if (heap[j].order >= heap[i].order)
  204. break;
  205. // otherwise, swap the parent and child and continue
  206. GroupOrder temp = heap[i];
  207. heap[i] = heap[j];
  208. heap[j] = temp;
  209. i = j;
  210. }
  211. return top;
  212. }
  213. static unsigned int countShared(const ClusterGroup* groups, int group1, int group2, const ClusterAdjacency& adjacency)
  214. {
  215. unsigned int total = 0;
  216. for (int i1 = group1; i1 >= 0; i1 = groups[i1].next)
  217. for (int i2 = group2; i2 >= 0; i2 = groups[i2].next)
  218. {
  219. for (unsigned int adj = adjacency.offsets[i1]; adj < adjacency.offsets[i1 + 1]; ++adj)
  220. if (adjacency.clusters[adj] == unsigned(i2))
  221. {
  222. total += adjacency.shared[adj];
  223. break;
  224. }
  225. }
  226. return total;
  227. }
  228. static void mergeBounds(ClusterGroup& target, const ClusterGroup& source)
  229. {
  230. float r1 = target.radius, r2 = source.radius;
  231. float dx = source.center[0] - target.center[0], dy = source.center[1] - target.center[1], dz = source.center[2] - target.center[2];
  232. float d = sqrtf(dx * dx + dy * dy + dz * dz);
  233. if (d + r1 < r2)
  234. {
  235. target.center[0] = source.center[0];
  236. target.center[1] = source.center[1];
  237. target.center[2] = source.center[2];
  238. target.radius = source.radius;
  239. return;
  240. }
  241. if (d + r2 > r1)
  242. {
  243. float k = d > 0 ? (d + r2 - r1) / (2 * d) : 0.f;
  244. target.center[0] += dx * k;
  245. target.center[1] += dy * k;
  246. target.center[2] += dz * k;
  247. target.radius = (d + r2 + r1) / 2;
  248. }
  249. }
  250. static float boundsScore(const ClusterGroup& target, const ClusterGroup& source)
  251. {
  252. float r1 = target.radius, r2 = source.radius;
  253. float dx = source.center[0] - target.center[0], dy = source.center[1] - target.center[1], dz = source.center[2] - target.center[2];
  254. float d = sqrtf(dx * dx + dy * dy + dz * dz);
  255. float mr = d + r1 < r2 ? r2 : (d + r2 < r1 ? r1 : (d + r2 + r1) / 2);
  256. return mr > 0 ? r1 / mr : 0.f;
  257. }
  258. static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdjacency& adjacency, size_t max_partition_size, bool use_bounds)
  259. {
  260. assert(groups[id].size > 0);
  261. float group_rsqrt = 1.f / sqrtf(float(int(groups[id].vertices)));
  262. int best_group = -1;
  263. float best_score = 0;
  264. for (int ci = id; ci >= 0; ci = groups[ci].next)
  265. {
  266. for (unsigned int adj = adjacency.offsets[ci]; adj != adjacency.offsets[ci + 1]; ++adj)
  267. {
  268. int other = groups[adjacency.clusters[adj]].group;
  269. if (other < 0)
  270. continue;
  271. assert(groups[other].size > 0);
  272. if (groups[id].size + groups[other].size > max_partition_size)
  273. continue;
  274. unsigned int shared = countShared(groups, id, other, adjacency);
  275. float other_rsqrt = 1.f / sqrtf(float(int(groups[other].vertices)));
  276. // normalize shared count by the expected boundary of each group (+ keeps scoring symmetric)
  277. float score = float(int(shared)) * (group_rsqrt + other_rsqrt);
  278. // incorporate spatial score to favor merging nearby groups
  279. if (use_bounds)
  280. score *= 1.f + 0.4f * boundsScore(groups[id], groups[other]);
  281. if (score > best_score)
  282. {
  283. best_group = other;
  284. best_score = score;
  285. }
  286. }
  287. }
  288. return best_group;
  289. }
  290. static void mergeLeaf(ClusterGroup* groups, unsigned int* order, size_t count, size_t target_partition_size, size_t max_partition_size)
  291. {
  292. for (size_t i = 0; i < count; ++i)
  293. {
  294. unsigned int id = order[i];
  295. if (groups[id].size == 0 || groups[id].size >= target_partition_size)
  296. continue;
  297. float best_score = -1.f;
  298. int best_group = -1;
  299. for (size_t j = 0; j < count; ++j)
  300. {
  301. unsigned int other = order[j];
  302. if (id == other || groups[other].size == 0)
  303. continue;
  304. if (groups[id].size + groups[other].size > max_partition_size)
  305. continue;
  306. // favor merging nearby groups
  307. float score = boundsScore(groups[id], groups[other]);
  308. if (score > best_score)
  309. {
  310. best_score = score;
  311. best_group = other;
  312. }
  313. }
  314. // merge id *into* best_group; that way, we may merge more groups into the same best_group, maximizing the chance of reaching target
  315. if (best_group != -1)
  316. {
  317. // combine groups by linking them together
  318. unsigned int tail = best_group;
  319. while (groups[tail].next >= 0)
  320. tail = groups[tail].next;
  321. groups[tail].next = id;
  322. // update group sizes; note, we omit vertices update for simplicity as it's not used for spatial merge
  323. groups[best_group].size += groups[id].size;
  324. groups[id].size = 0;
  325. // merge bounding spheres
  326. mergeBounds(groups[best_group], groups[id]);
  327. groups[id].radius = 0.f;
  328. }
  329. }
  330. }
  331. static size_t mergePartition(unsigned int* order, size_t count, const ClusterGroup* groups, int axis, float pivot)
  332. {
  333. size_t m = 0;
  334. // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot
  335. for (size_t i = 0; i < count; ++i)
  336. {
  337. float v = groups[order[i]].center[axis];
  338. // swap(m, i) unconditionally
  339. unsigned int t = order[m];
  340. order[m] = order[i];
  341. order[i] = t;
  342. // when v >= pivot, we swap i with m without advancing it, preserving invariants
  343. m += v < pivot;
  344. }
  345. return m;
  346. }
  347. static void mergeSpatial(ClusterGroup* groups, unsigned int* order, size_t count, size_t target_partition_size, size_t max_partition_size, size_t leaf_size, int depth)
  348. {
  349. size_t total = 0;
  350. for (size_t i = 0; i < count; ++i)
  351. total += groups[order[i]].size;
  352. if (total <= max_partition_size || count <= leaf_size)
  353. return mergeLeaf(groups, order, count, target_partition_size, max_partition_size);
  354. float mean[3] = {};
  355. float vars[3] = {};
  356. float runc = 1, runs = 1;
  357. // gather statistics on the points in the subtree using Welford's algorithm
  358. for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)
  359. {
  360. const float* point = groups[order[i]].center;
  361. for (int k = 0; k < 3; ++k)
  362. {
  363. float delta = point[k] - mean[k];
  364. mean[k] += delta * runs;
  365. vars[k] += delta * (point[k] - mean[k]);
  366. }
  367. }
  368. // split axis is one where the variance is largest
  369. int axis = (vars[0] >= vars[1] && vars[0] >= vars[2]) ? 0 : (vars[1] >= vars[2] ? 1 : 2);
  370. float split = mean[axis];
  371. size_t middle = mergePartition(order, count, groups, axis, split);
  372. // enforce balance for degenerate partitions
  373. // this also ensures recursion depth is bounded on pathological inputs
  374. if (middle <= leaf_size / 2 || count - middle <= leaf_size / 2 || depth >= kMergeDepthCutoff)
  375. middle = count / 2;
  376. // recursion depth is logarithmic and bounded due to max depth check above
  377. mergeSpatial(groups, order, middle, target_partition_size, max_partition_size, leaf_size, depth + 1);
  378. mergeSpatial(groups, order + middle, count - middle, target_partition_size, max_partition_size, leaf_size, depth + 1);
  379. }
  380. } // namespace meshopt
  381. size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size)
  382. {
  383. using namespace meshopt;
  384. assert((vertex_positions == NULL || vertex_positions_stride >= 12) && vertex_positions_stride <= 256);
  385. assert(vertex_positions_stride % sizeof(float) == 0);
  386. assert(target_partition_size > 0);
  387. size_t max_partition_size = target_partition_size + target_partition_size / 3;
  388. meshopt_Allocator allocator;
  389. unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
  390. memset(used, 0, vertex_count);
  391. unsigned int* cluster_newindices = allocator.allocate<unsigned int>(total_index_count);
  392. unsigned int* cluster_offsets = allocator.allocate<unsigned int>(cluster_count + 1);
  393. // make new cluster index list that filters out duplicate indices
  394. filterClusterIndices(cluster_newindices, cluster_offsets, cluster_indices, cluster_index_counts, cluster_count, used, vertex_count, total_index_count);
  395. cluster_indices = cluster_newindices;
  396. // build cluster adjacency along with edge weights (shared vertex count)
  397. ClusterAdjacency adjacency = {};
  398. buildClusterAdjacency(adjacency, cluster_indices, cluster_offsets, cluster_count, vertex_count, allocator);
  399. ClusterGroup* groups = allocator.allocate<ClusterGroup>(cluster_count);
  400. memset(groups, 0, sizeof(ClusterGroup) * cluster_count);
  401. GroupOrder* order = allocator.allocate<GroupOrder>(cluster_count);
  402. size_t pending = 0;
  403. // create a singleton group for each cluster and order them by priority
  404. for (size_t i = 0; i < cluster_count; ++i)
  405. {
  406. groups[i].group = int(i);
  407. groups[i].next = -1;
  408. groups[i].size = 1;
  409. groups[i].vertices = cluster_offsets[i + 1] - cluster_offsets[i];
  410. assert(groups[i].vertices > 0);
  411. // compute bounding sphere for each cluster if positions are provided
  412. if (vertex_positions)
  413. groups[i].radius = computeClusterBounds(cluster_indices + cluster_offsets[i], cluster_offsets[i + 1] - cluster_offsets[i], vertex_positions, vertex_positions_stride, groups[i].center);
  414. GroupOrder item = {};
  415. item.id = unsigned(i);
  416. item.order = groups[i].vertices;
  417. heapPush(order, pending++, item);
  418. }
  419. // iteratively merge the smallest group with the best group
  420. while (pending)
  421. {
  422. GroupOrder top = heapPop(order, pending--);
  423. // this group was merged into another group earlier
  424. if (groups[top.id].size == 0)
  425. continue;
  426. // disassociate clusters from the group to prevent them from being merged again; we will re-associate them if the group is reinserted
  427. for (int i = top.id; i >= 0; i = groups[i].next)
  428. {
  429. assert(groups[i].group == int(top.id));
  430. groups[i].group = -1;
  431. }
  432. // the group is large enough, emit as is
  433. if (groups[top.id].size >= target_partition_size)
  434. continue;
  435. int best_group = pickGroupToMerge(groups, top.id, adjacency, max_partition_size, /* use_bounds= */ vertex_positions);
  436. // we can't grow the group any more, emit as is
  437. if (best_group == -1)
  438. continue;
  439. // compute shared vertices to adjust the total vertices estimate after merging
  440. unsigned int shared = countShared(groups, top.id, best_group, adjacency);
  441. // combine groups by linking them together
  442. unsigned int tail = top.id;
  443. while (groups[tail].next >= 0)
  444. tail = groups[tail].next;
  445. groups[tail].next = best_group;
  446. // update group sizes; note, the vertex update is a O(1) approximation which avoids recomputing the true size
  447. groups[top.id].size += groups[best_group].size;
  448. groups[top.id].vertices += groups[best_group].vertices;
  449. groups[top.id].vertices = (groups[top.id].vertices > shared) ? groups[top.id].vertices - shared : 1;
  450. groups[best_group].size = 0;
  451. groups[best_group].vertices = 0;
  452. // merge bounding spheres if bounds are available
  453. if (vertex_positions)
  454. {
  455. mergeBounds(groups[top.id], groups[best_group]);
  456. groups[best_group].radius = 0;
  457. }
  458. // re-associate all clusters back to the merged group
  459. for (int i = top.id; i >= 0; i = groups[i].next)
  460. groups[i].group = int(top.id);
  461. top.order = groups[top.id].vertices;
  462. heapPush(order, pending++, top);
  463. }
  464. // if vertex positions are provided, we do a final pass to see if we can merge small groups based on spatial locality alone
  465. if (vertex_positions)
  466. {
  467. unsigned int* merge_order = reinterpret_cast<unsigned int*>(order);
  468. size_t merge_offset = 0;
  469. for (size_t i = 0; i < cluster_count; ++i)
  470. if (groups[i].size)
  471. merge_order[merge_offset++] = unsigned(i);
  472. mergeSpatial(groups, merge_order, merge_offset, target_partition_size, max_partition_size, /* leaf_size= */ 8, 0);
  473. }
  474. // output each remaining group
  475. size_t next_group = 0;
  476. for (size_t i = 0; i < cluster_count; ++i)
  477. {
  478. if (groups[i].size == 0)
  479. continue;
  480. for (int j = int(i); j >= 0; j = groups[j].next)
  481. destination[j] = unsigned(next_group);
  482. next_group++;
  483. }
  484. assert(next_group <= cluster_count);
  485. return next_group;
  486. }