indexanalyzer.cpp 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
  2. #include "meshoptimizer.h"
  3. #include <assert.h>
  4. #include <string.h>
  5. meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size)
  6. {
  7. assert(index_count % 3 == 0);
  8. assert(cache_size >= 3);
  9. assert(warp_size == 0 || warp_size >= 3);
  10. meshopt_Allocator allocator;
  11. meshopt_VertexCacheStatistics result = {};
  12. unsigned int warp_offset = 0;
  13. unsigned int primgroup_offset = 0;
  14. unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
  15. memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
  16. unsigned int timestamp = cache_size + 1;
  17. for (size_t i = 0; i < index_count; i += 3)
  18. {
  19. unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
  20. assert(a < vertex_count && b < vertex_count && c < vertex_count);
  21. bool ac = (timestamp - cache_timestamps[a]) > cache_size;
  22. bool bc = (timestamp - cache_timestamps[b]) > cache_size;
  23. bool cc = (timestamp - cache_timestamps[c]) > cache_size;
  24. // flush cache if triangle doesn't fit into warp or into the primitive buffer
  25. if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size))
  26. {
  27. result.warps_executed += warp_offset > 0;
  28. warp_offset = 0;
  29. primgroup_offset = 0;
  30. // reset cache
  31. timestamp += cache_size + 1;
  32. }
  33. // update cache and add vertices to warp
  34. for (int j = 0; j < 3; ++j)
  35. {
  36. unsigned int index = indices[i + j];
  37. if (timestamp - cache_timestamps[index] > cache_size)
  38. {
  39. cache_timestamps[index] = timestamp++;
  40. result.vertices_transformed++;
  41. warp_offset++;
  42. }
  43. }
  44. primgroup_offset++;
  45. }
  46. size_t unique_vertex_count = 0;
  47. for (size_t i = 0; i < vertex_count; ++i)
  48. unique_vertex_count += cache_timestamps[i] > 0;
  49. result.warps_executed += warp_offset > 0;
  50. result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3);
  51. result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count);
  52. return result;
  53. }
  54. meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
  55. {
  56. assert(index_count % 3 == 0);
  57. assert(vertex_size > 0 && vertex_size <= 256);
  58. meshopt_Allocator allocator;
  59. meshopt_VertexFetchStatistics result = {};
  60. unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
  61. memset(vertex_visited, 0, vertex_count);
  62. const size_t kCacheLine = 64;
  63. const size_t kCacheSize = 128 * 1024;
  64. // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
  65. size_t cache[kCacheSize / kCacheLine] = {};
  66. for (size_t i = 0; i < index_count; ++i)
  67. {
  68. unsigned int index = indices[i];
  69. assert(index < vertex_count);
  70. vertex_visited[index] = 1;
  71. size_t start_address = index * vertex_size;
  72. size_t end_address = start_address + vertex_size;
  73. size_t start_tag = start_address / kCacheLine;
  74. size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
  75. assert(start_tag < end_tag);
  76. for (size_t tag = start_tag; tag < end_tag; ++tag)
  77. {
  78. size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
  79. // we store +1 since cache is filled with 0 by default
  80. result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
  81. cache[line] = tag + 1;
  82. }
  83. }
  84. size_t unique_vertex_count = 0;
  85. for (size_t i = 0; i < vertex_count; ++i)
  86. unique_vertex_count += vertex_visited[i];
  87. result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
  88. return result;
  89. }