vcachetuner.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. #include "../src/meshoptimizer.h"
  2. #include "fast_obj.h"
  3. #include "../demo/miniz.h"
  4. #include <algorithm>
  5. #include <functional>
  6. #include <vector>
  7. #include <cmath>
  8. #include <cstdint>
  9. #include <cstdio>
  10. #include <cstring>
  11. const int kCacheSizeMax = 16;
  12. const int kValenceMax = 8;
  13. namespace meshopt
  14. {
  15. extern thread_local float kVertexScoreTableCache[1 + kCacheSizeMax];
  16. extern thread_local float kVertexScoreTableLive[1 + kValenceMax];
  17. } // namespace meshopt
  18. struct Profile
  19. {
  20. float weight;
  21. int cache, warp, triangle; // vcache tuning parameters
  22. };
  23. Profile profiles[] =
  24. {
  25. {1.f, 0, 0, 0}, // Compression
  26. {1.f, 14, 64, 128}, // AMD GCN
  27. {1.f, 32, 32, 32}, // NVidia Pascal
  28. // {1.f, 16, 32, 32}, // NVidia Kepler, Maxwell
  29. // {1.f, 128, 0, 0}, // Intel
  30. };
  31. const int Profile_Count = sizeof(profiles) / sizeof(profiles[0]);
  32. struct pcg32_random_t
  33. {
  34. uint64_t state;
  35. uint64_t inc;
  36. };
  37. #define PCG32_INITIALIZER { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL }
  38. uint32_t pcg32_random_r(pcg32_random_t* rng)
  39. {
  40. uint64_t oldstate = rng->state;
  41. // Advance internal state
  42. rng->state = oldstate * 6364136223846793005ULL + (rng->inc | 1);
  43. // Calculate output function (XSH RR), uses old state for max ILP
  44. uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
  45. uint32_t rot = oldstate >> 59u;
  46. return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
  47. }
  48. pcg32_random_t rngstate = PCG32_INITIALIZER;
  49. float rand01()
  50. {
  51. return pcg32_random_r(&rngstate) / float(1ull << 32);
  52. }
  53. uint32_t rand32()
  54. {
  55. return pcg32_random_r(&rngstate);
  56. }
  57. struct State
  58. {
  59. float cache[kCacheSizeMax];
  60. float live[kValenceMax];
  61. float fitness;
  62. };
  63. struct Mesh
  64. {
  65. const char* name;
  66. size_t vertex_count;
  67. std::vector<unsigned int> indices;
  68. float metric_base[Profile_Count];
  69. };
  70. Mesh gridmesh(unsigned int N)
  71. {
  72. Mesh result;
  73. result.name = "grid";
  74. result.vertex_count = (N + 1) * (N + 1);
  75. result.indices.reserve(N * N * 6);
  76. for (unsigned int y = 0; y < N; ++y)
  77. for (unsigned int x = 0; x < N; ++x)
  78. {
  79. result.indices.push_back((y + 0) * (N + 1) + (x + 0));
  80. result.indices.push_back((y + 0) * (N + 1) + (x + 1));
  81. result.indices.push_back((y + 1) * (N + 1) + (x + 0));
  82. result.indices.push_back((y + 1) * (N + 1) + (x + 0));
  83. result.indices.push_back((y + 0) * (N + 1) + (x + 1));
  84. result.indices.push_back((y + 1) * (N + 1) + (x + 1));
  85. }
  86. return result;
  87. }
  88. Mesh objmesh(const char* path)
  89. {
  90. fastObjMesh* obj = fast_obj_read(path);
  91. if (!obj)
  92. {
  93. printf("Error loading %s: file not found\n", path);
  94. return Mesh();
  95. }
  96. size_t total_indices = 0;
  97. for (unsigned int i = 0; i < obj->face_count; ++i)
  98. total_indices += 3 * (obj->face_vertices[i] - 2);
  99. struct Vertex
  100. {
  101. float px, py, pz;
  102. float nx, ny, nz;
  103. float tx, ty;
  104. };
  105. std::vector<Vertex> vertices(total_indices);
  106. size_t vertex_offset = 0;
  107. size_t index_offset = 0;
  108. for (unsigned int i = 0; i < obj->face_count; ++i)
  109. {
  110. for (unsigned int j = 0; j < obj->face_vertices[i]; ++j)
  111. {
  112. fastObjIndex gi = obj->indices[index_offset + j];
  113. Vertex v =
  114. {
  115. obj->positions[gi.p * 3 + 0],
  116. obj->positions[gi.p * 3 + 1],
  117. obj->positions[gi.p * 3 + 2],
  118. obj->normals[gi.n * 3 + 0],
  119. obj->normals[gi.n * 3 + 1],
  120. obj->normals[gi.n * 3 + 2],
  121. obj->texcoords[gi.t * 2 + 0],
  122. obj->texcoords[gi.t * 2 + 1],
  123. };
  124. // triangulate polygon on the fly; offset-3 is always the first polygon vertex
  125. if (j >= 3)
  126. {
  127. vertices[vertex_offset + 0] = vertices[vertex_offset - 3];
  128. vertices[vertex_offset + 1] = vertices[vertex_offset - 1];
  129. vertex_offset += 2;
  130. }
  131. vertices[vertex_offset] = v;
  132. vertex_offset++;
  133. }
  134. index_offset += obj->face_vertices[i];
  135. }
  136. fast_obj_destroy(obj);
  137. Mesh result;
  138. result.name = path;
  139. std::vector<unsigned int> remap(total_indices);
  140. size_t total_vertices = meshopt_generateVertexRemap(&remap[0], NULL, total_indices, &vertices[0], total_indices, sizeof(Vertex));
  141. result.indices.resize(total_indices);
  142. meshopt_remapIndexBuffer(&result.indices[0], NULL, total_indices, &remap[0]);
  143. result.vertex_count = total_vertices;
  144. return result;
  145. }
  146. template <typename T>
  147. size_t compress(const std::vector<T>& data)
  148. {
  149. std::vector<unsigned char> cbuf(tdefl_compress_bound(data.size() * sizeof(T)));
  150. unsigned int flags = tdefl_create_comp_flags_from_zip_params(MZ_DEFAULT_LEVEL, 15, MZ_DEFAULT_STRATEGY);
  151. return tdefl_compress_mem_to_mem(&cbuf[0], cbuf.size(), &data[0], data.size() * sizeof(T), flags);
  152. }
  153. void compute_metric(const State& state, const Mesh& mesh, float result[Profile_Count])
  154. {
  155. memcpy(meshopt::kVertexScoreTableCache + 1, state.cache, kCacheSizeMax * sizeof(float));
  156. memcpy(meshopt::kVertexScoreTableLive + 1, state.live, kValenceMax * sizeof(float));
  157. std::vector<unsigned int> indices(mesh.indices.size());
  158. meshopt_optimizeVertexCache(&indices[0], &mesh.indices[0], mesh.indices.size(), mesh.vertex_count);
  159. meshopt_optimizeVertexFetch(NULL, &indices[0], indices.size(), NULL, mesh.vertex_count, 0);
  160. for (int profile = 0; profile < Profile_Count; ++profile)
  161. {
  162. if (profiles[profile].cache)
  163. {
  164. meshopt_VertexCacheStatistics stats = meshopt_analyzeVertexCache(&indices[0], indices.size(), mesh.vertex_count, profiles[profile].cache, profiles[profile].warp, profiles[profile].triangle);
  165. result[profile] = stats.atvr;
  166. }
  167. else
  168. {
  169. std::vector<unsigned char> ibuf(meshopt_encodeIndexBufferBound(indices.size(), mesh.vertex_count));
  170. ibuf.resize(meshopt_encodeIndexBuffer(&ibuf[0], ibuf.size(), &indices[0], indices.size()));
  171. size_t csize = compress(ibuf);
  172. result[profile] = double(csize) / double(indices.size() / 3);
  173. }
  174. }
  175. }
  176. float fitness_score(const State& state, const std::vector<Mesh>& meshes)
  177. {
  178. float result = 0;
  179. float count = 0;
  180. for (auto& mesh : meshes)
  181. {
  182. float metric[Profile_Count];
  183. compute_metric(state, mesh, metric);
  184. for (int profile = 0; profile < Profile_Count; ++profile)
  185. {
  186. result += mesh.metric_base[profile] / metric[profile] * profiles[profile].weight;
  187. count += profiles[profile].weight;
  188. }
  189. }
  190. return result / count;
  191. }
  192. std::vector<State> gen0(size_t count, const std::vector<Mesh>& meshes)
  193. {
  194. std::vector<State> result;
  195. for (size_t i = 0; i < count; ++i)
  196. {
  197. State state = {};
  198. for (int j = 0; j < kCacheSizeMax; ++j)
  199. state.cache[j] = rand01();
  200. for (int j = 0; j < kValenceMax; ++j)
  201. state.live[j] = rand01();
  202. state.fitness = fitness_score(state, meshes);
  203. result.push_back(state);
  204. }
  205. return result;
  206. }
  207. // https://en.wikipedia.org/wiki/Differential_evolution
  208. // Good Parameters for Differential Evolution. Magnus Erik Hvass Pedersen, 2010
  209. std::pair<State, float> genN(std::vector<State>& seed, const std::vector<Mesh>& meshes, float crossover = 0.8803f, float weight = 0.4717f)
  210. {
  211. std::vector<State> result(seed.size());
  212. for (size_t i = 0; i < seed.size(); ++i)
  213. {
  214. for (;;)
  215. {
  216. int a = rand32() % seed.size();
  217. int b = rand32() % seed.size();
  218. int c = rand32() % seed.size();
  219. if (a == b || a == c || b == c || a == int(i) || b == int(i) || c == int(i))
  220. continue;
  221. int rc = rand32() % kCacheSizeMax;
  222. int rl = rand32() % kValenceMax;
  223. for (int j = 0; j < kCacheSizeMax; ++j)
  224. {
  225. float r = rand01();
  226. if (r < crossover || j == rc)
  227. result[i].cache[j] = std::max(0.f, std::min(1.f, seed[a].cache[j] + weight * (seed[b].cache[j] - seed[c].cache[j])));
  228. else
  229. result[i].cache[j] = seed[i].cache[j];
  230. }
  231. for (int j = 0; j < kValenceMax; ++j)
  232. {
  233. float r = rand01();
  234. if (r < crossover || j == rl)
  235. result[i].live[j] = std::max(0.f, std::min(1.f, seed[a].live[j] + weight * (seed[b].live[j] - seed[c].live[j])));
  236. else
  237. result[i].live[j] = seed[i].live[j];
  238. }
  239. break;
  240. }
  241. }
  242. #pragma omp parallel for
  243. for (size_t i = 0; i < seed.size(); ++i)
  244. {
  245. result[i].fitness = fitness_score(result[i], meshes);
  246. }
  247. State best = {};
  248. float bestfit = 0;
  249. for (size_t i = 0; i < seed.size(); ++i)
  250. {
  251. if (result[i].fitness > seed[i].fitness)
  252. seed[i] = result[i];
  253. if (seed[i].fitness > bestfit)
  254. {
  255. best = seed[i];
  256. bestfit = seed[i].fitness;
  257. }
  258. }
  259. return std::make_pair(best, bestfit);
  260. }
  261. bool load_state(const char* path, std::vector<State>& result)
  262. {
  263. FILE* file = fopen(path, "rb");
  264. if (!file)
  265. return false;
  266. State state;
  267. result.clear();
  268. while (fread(&state, sizeof(State), 1, file) == 1)
  269. result.push_back(state);
  270. fclose(file);
  271. return true;
  272. }
  273. bool save_state(const char* path, const std::vector<State>& result)
  274. {
  275. FILE* file = fopen(path, "wb");
  276. if (!file)
  277. return false;
  278. for (auto& state : result)
  279. {
  280. if (fwrite(&state, sizeof(State), 1, file) != 1)
  281. {
  282. fclose(file);
  283. return false;
  284. }
  285. }
  286. return fclose(file) == 0;
  287. }
  288. void dump_state(const State& state)
  289. {
  290. printf("cache:");
  291. for (int i = 0; i < kCacheSizeMax; ++i)
  292. {
  293. printf(" %.3f", state.cache[i]);
  294. }
  295. printf("\n");
  296. printf("live:");
  297. for (int i = 0; i < kValenceMax; ++i)
  298. {
  299. printf(" %.3f", state.live[i]);
  300. }
  301. printf("\n");
  302. }
  303. void dump_stats(const State& state, const std::vector<Mesh>& meshes)
  304. {
  305. float improvement[Profile_Count] = {};
  306. for (size_t i = 0; i < meshes.size(); ++i)
  307. {
  308. float metric[Profile_Count];
  309. compute_metric(state, meshes[i], metric);
  310. printf(" %s", meshes[i].name);
  311. for (int profile = 0; profile < Profile_Count; ++profile)
  312. printf(" %f", metric[profile]);
  313. for (int profile = 0; profile < Profile_Count; ++profile)
  314. improvement[profile] += meshes[i].metric_base[profile] / metric[profile];
  315. }
  316. printf("; improvement");
  317. for (int profile = 0; profile < Profile_Count; ++profile)
  318. printf(" %f", improvement[profile] / float(meshes.size()));
  319. printf("\n");
  320. }
  321. int main(int argc, char** argv)
  322. {
  323. State baseline;
  324. memcpy(baseline.cache, meshopt::kVertexScoreTableCache + 1, kCacheSizeMax * sizeof(float));
  325. memcpy(baseline.live, meshopt::kVertexScoreTableLive + 1, kValenceMax * sizeof(float));
  326. std::vector<Mesh> meshes;
  327. meshes.push_back(gridmesh(50));
  328. for (int i = 1; i < argc; ++i)
  329. meshes.push_back(objmesh(argv[i]));
  330. size_t total_triangles = 0;
  331. for (auto& mesh : meshes)
  332. {
  333. compute_metric(baseline, mesh, mesh.metric_base);
  334. total_triangles += mesh.indices.size() / 3;
  335. }
  336. std::vector<State> pop;
  337. size_t gen = 0;
  338. if (load_state("mutator.state", pop))
  339. {
  340. printf("Loaded %d state vectors\n", int(pop.size()));
  341. }
  342. else
  343. {
  344. pop = gen0(95, meshes);
  345. }
  346. printf("%d meshes, %.1fM triangles\n", int(meshes.size()), double(total_triangles) / 1e6);
  347. printf("baseline:");
  348. dump_stats(baseline, meshes);
  349. for (;;)
  350. {
  351. auto best = genN(pop, meshes);
  352. gen++;
  353. if (gen % 10 == 0)
  354. {
  355. printf("%d: fitness %f;", int(gen), best.second);
  356. dump_stats(best.first, meshes);
  357. }
  358. else
  359. {
  360. printf("%d: fitness %f\n", int(gen), best.second);
  361. }
  362. dump_state(best.first);
  363. if (save_state("mutator.state-temp", pop) && rename("mutator.state-temp", "mutator.state") == 0)
  364. {
  365. }
  366. else
  367. {
  368. printf("ERROR: Can't save state\n");
  369. }
  370. }
  371. }