vcachetuner.cpp 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. #include "../src/meshoptimizer.h"
  2. #include "fast_obj.h"
  3. #include <algorithm>
  4. #include <functional>
  5. #include <vector>
  6. #include <cmath>
  7. #include <cstdint>
  8. #include <cstdio>
  9. #include <cstring>
  10. const int kCacheSizeMax = 16;
  11. const int kValenceMax = 8;
  12. namespace meshopt
  13. {
  14. extern thread_local float kVertexScoreTableCache[1 + kCacheSizeMax];
  15. extern thread_local float kVertexScoreTableLive[1 + kValenceMax];
  16. } // namespace meshopt
  17. struct { int cache, warp, triangle; } profiles[] =
  18. {
  19. {14, 64, 128}, // AMD GCN
  20. {32, 32, 32}, // NVidia Pascal
  21. // { 16, 32, 32 }, // NVidia Kepler, Maxwell
  22. // { 128, 0, 0 }, // Intel
  23. };
  24. const int Profile_Count = sizeof(profiles) / sizeof(profiles[0]);
  25. struct pcg32_random_t
  26. {
  27. uint64_t state;
  28. uint64_t inc;
  29. };
  30. #define PCG32_INITIALIZER { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL }
  31. uint32_t pcg32_random_r(pcg32_random_t* rng)
  32. {
  33. uint64_t oldstate = rng->state;
  34. // Advance internal state
  35. rng->state = oldstate * 6364136223846793005ULL + (rng->inc | 1);
  36. // Calculate output function (XSH RR), uses old state for max ILP
  37. uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
  38. uint32_t rot = oldstate >> 59u;
  39. return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
  40. }
  41. pcg32_random_t rngstate = PCG32_INITIALIZER;
  42. float rand01()
  43. {
  44. return pcg32_random_r(&rngstate) / float(1ull << 32);
  45. }
  46. uint32_t rand32()
  47. {
  48. return pcg32_random_r(&rngstate);
  49. }
  50. struct State
  51. {
  52. float cache[kCacheSizeMax];
  53. float live[kValenceMax];
  54. float fitness;
  55. };
  56. struct Mesh
  57. {
  58. size_t vertex_count;
  59. std::vector<unsigned int> indices;
  60. float atvr_base[Profile_Count];
  61. };
  62. Mesh gridmesh(unsigned int N)
  63. {
  64. Mesh result;
  65. result.vertex_count = (N + 1) * (N + 1);
  66. result.indices.reserve(N * N * 6);
  67. for (unsigned int y = 0; y < N; ++y)
  68. for (unsigned int x = 0; x < N; ++x)
  69. {
  70. result.indices.push_back((y + 0) * (N + 1) + (x + 0));
  71. result.indices.push_back((y + 0) * (N + 1) + (x + 1));
  72. result.indices.push_back((y + 1) * (N + 1) + (x + 0));
  73. result.indices.push_back((y + 1) * (N + 1) + (x + 0));
  74. result.indices.push_back((y + 0) * (N + 1) + (x + 1));
  75. result.indices.push_back((y + 1) * (N + 1) + (x + 1));
  76. }
  77. return result;
  78. }
  79. Mesh objmesh(const char* path)
  80. {
  81. fastObjMesh* obj = fast_obj_read(path);
  82. if (!obj)
  83. {
  84. printf("Error loading %s: file not found\n", path);
  85. return Mesh();
  86. }
  87. size_t total_indices = 0;
  88. for (unsigned int i = 0; i < obj->face_count; ++i)
  89. total_indices += 3 * (obj->face_vertices[i] - 2);
  90. struct Vertex
  91. {
  92. float px, py, pz;
  93. float nx, ny, nz;
  94. float tx, ty;
  95. };
  96. std::vector<Vertex> vertices(total_indices);
  97. size_t vertex_offset = 0;
  98. size_t index_offset = 0;
  99. for (unsigned int i = 0; i < obj->face_count; ++i)
  100. {
  101. for (unsigned int j = 0; j < obj->face_vertices[i]; ++j)
  102. {
  103. fastObjIndex gi = obj->indices[index_offset + j];
  104. Vertex v =
  105. {
  106. obj->positions[gi.p * 3 + 0],
  107. obj->positions[gi.p * 3 + 1],
  108. obj->positions[gi.p * 3 + 2],
  109. obj->normals[gi.n * 3 + 0],
  110. obj->normals[gi.n * 3 + 1],
  111. obj->normals[gi.n * 3 + 2],
  112. obj->texcoords[gi.t * 2 + 0],
  113. obj->texcoords[gi.t * 2 + 1],
  114. };
  115. // triangulate polygon on the fly; offset-3 is always the first polygon vertex
  116. if (j >= 3)
  117. {
  118. vertices[vertex_offset + 0] = vertices[vertex_offset - 3];
  119. vertices[vertex_offset + 1] = vertices[vertex_offset - 1];
  120. vertex_offset += 2;
  121. }
  122. vertices[vertex_offset] = v;
  123. vertex_offset++;
  124. }
  125. index_offset += obj->face_vertices[i];
  126. }
  127. fast_obj_destroy(obj);
  128. Mesh result;
  129. std::vector<unsigned int> remap(total_indices);
  130. size_t total_vertices = meshopt_generateVertexRemap(&remap[0], NULL, total_indices, &vertices[0], total_indices, sizeof(Vertex));
  131. result.indices.resize(total_indices);
  132. meshopt_remapIndexBuffer(&result.indices[0], NULL, total_indices, &remap[0]);
  133. result.vertex_count = total_vertices;
  134. return result;
  135. }
  136. void compute_atvr(const State& state, const Mesh& mesh, float result[Profile_Count])
  137. {
  138. memcpy(meshopt::kVertexScoreTableCache + 1, state.cache, kCacheSizeMax * sizeof(float));
  139. memcpy(meshopt::kVertexScoreTableLive + 1, state.live, kValenceMax * sizeof(float));
  140. std::vector<unsigned int> indices(mesh.indices.size());
  141. meshopt_optimizeVertexCache(&indices[0], &mesh.indices[0], mesh.indices.size(), mesh.vertex_count);
  142. for (int profile = 0; profile < Profile_Count; ++profile)
  143. result[profile] = meshopt_analyzeVertexCache(&indices[0], indices.size(), mesh.vertex_count, profiles[profile].cache, profiles[profile].warp, profiles[profile].triangle).atvr;
  144. }
  145. float fitness_score(const State& state, const std::vector<Mesh>& meshes)
  146. {
  147. float result = 0;
  148. float count = 0;
  149. for (auto& mesh : meshes)
  150. {
  151. float atvr[Profile_Count];
  152. compute_atvr(state, mesh, atvr);
  153. for (int profile = 0; profile < Profile_Count; ++profile)
  154. {
  155. result += mesh.atvr_base[profile] / atvr[profile];
  156. count += 1;
  157. }
  158. }
  159. return result / count;
  160. }
  161. std::vector<State> gen0(size_t count, const std::vector<Mesh>& meshes)
  162. {
  163. std::vector<State> result;
  164. for (size_t i = 0; i < count; ++i)
  165. {
  166. State state = {};
  167. for (int j = 0; j < kCacheSizeMax; ++j)
  168. state.cache[j] = rand01();
  169. for (int j = 0; j < kValenceMax; ++j)
  170. state.live[j] = rand01();
  171. state.fitness = fitness_score(state, meshes);
  172. result.push_back(state);
  173. }
  174. return result;
  175. }
  176. // https://en.wikipedia.org/wiki/Differential_evolution
  177. // Good Parameters for Differential Evolution. Magnus Erik Hvass Pedersen, 2010
  178. std::pair<State, float> genN(std::vector<State>& seed, const std::vector<Mesh>& meshes, float crossover = 0.8803f, float weight = 0.4717f)
  179. {
  180. std::vector<State> result(seed.size());
  181. for (size_t i = 0; i < seed.size(); ++i)
  182. {
  183. for (;;)
  184. {
  185. int a = rand32() % seed.size();
  186. int b = rand32() % seed.size();
  187. int c = rand32() % seed.size();
  188. if (a == b || a == c || b == c || a == int(i) || b == int(i) || c == int(i))
  189. continue;
  190. int rc = rand32() % kCacheSizeMax;
  191. int rl = rand32() % kValenceMax;
  192. for (int j = 0; j < kCacheSizeMax; ++j)
  193. {
  194. float r = rand01();
  195. if (r < crossover || j == rc)
  196. result[i].cache[j] = std::max(0.f, std::min(1.f, seed[a].cache[j] + weight * (seed[b].cache[j] - seed[c].cache[j])));
  197. else
  198. result[i].cache[j] = seed[i].cache[j];
  199. }
  200. for (int j = 0; j < kValenceMax; ++j)
  201. {
  202. float r = rand01();
  203. if (r < crossover || j == rl)
  204. result[i].live[j] = std::max(0.f, std::min(1.f, seed[a].live[j] + weight * (seed[b].live[j] - seed[c].live[j])));
  205. else
  206. result[i].live[j] = seed[i].live[j];
  207. }
  208. break;
  209. }
  210. }
  211. #pragma omp parallel for
  212. for (size_t i = 0; i < seed.size(); ++i)
  213. {
  214. result[i].fitness = fitness_score(result[i], meshes);
  215. }
  216. State best = {};
  217. float bestfit = 0;
  218. for (size_t i = 0; i < seed.size(); ++i)
  219. {
  220. if (result[i].fitness > seed[i].fitness)
  221. seed[i] = result[i];
  222. if (seed[i].fitness > bestfit)
  223. {
  224. best = seed[i];
  225. bestfit = seed[i].fitness;
  226. }
  227. }
  228. return std::make_pair(best, bestfit);
  229. }
  230. bool load_state(const char* path, std::vector<State>& result)
  231. {
  232. FILE* file = fopen(path, "rb");
  233. if (!file)
  234. return false;
  235. State state;
  236. result.clear();
  237. while (fread(&state, sizeof(State), 1, file) == 1)
  238. result.push_back(state);
  239. fclose(file);
  240. return true;
  241. }
  242. bool save_state(const char* path, const std::vector<State>& result)
  243. {
  244. FILE* file = fopen(path, "wb");
  245. if (!file)
  246. return false;
  247. for (auto& state : result)
  248. {
  249. if (fwrite(&state, sizeof(State), 1, file) != 1)
  250. {
  251. fclose(file);
  252. return false;
  253. }
  254. }
  255. return fclose(file) == 0;
  256. }
  257. void dump_state(const State& state)
  258. {
  259. printf("cache:");
  260. for (int i = 0; i < kCacheSizeMax; ++i)
  261. {
  262. printf(" %.3f", state.cache[i]);
  263. }
  264. printf("\n");
  265. printf("live:");
  266. for (int i = 0; i < kValenceMax; ++i)
  267. {
  268. printf(" %.3f", state.live[i]);
  269. }
  270. printf("\n");
  271. }
  272. int main(int argc, char** argv)
  273. {
  274. bool annealing = false;
  275. State baseline;
  276. memcpy(baseline.cache, meshopt::kVertexScoreTableCache + 1, kCacheSizeMax * sizeof(float));
  277. memcpy(baseline.live, meshopt::kVertexScoreTableLive + 1, kValenceMax * sizeof(float));
  278. std::vector<Mesh> meshes;
  279. meshes.push_back(gridmesh(50));
  280. for (int i = 1; i < argc; ++i)
  281. meshes.push_back(objmesh(argv[i]));
  282. size_t total_triangles = 0;
  283. for (auto& mesh : meshes)
  284. {
  285. compute_atvr(baseline, mesh, mesh.atvr_base);
  286. total_triangles += mesh.indices.size() / 3;
  287. }
  288. std::vector<State> pop;
  289. size_t gen = 0;
  290. if (load_state("mutator.state", pop))
  291. {
  292. printf("Loaded %d state vectors\n", int(pop.size()));
  293. }
  294. else
  295. {
  296. pop = gen0(95, meshes);
  297. }
  298. printf("%d meshes, %.1fM triangles\n", int(meshes.size()), double(total_triangles) / 1e6);
  299. float atvr_0[Profile_Count];
  300. float atvr_N[Profile_Count];
  301. compute_atvr(baseline, meshes[0], atvr_0);
  302. compute_atvr(baseline, meshes.back(), atvr_N);
  303. printf("baseline: grid %f %f %s %f %f\n", atvr_0[0], atvr_0[1], argv[argc - 1], atvr_N[0], atvr_N[1]);
  304. for (;;)
  305. {
  306. auto best = genN(pop, meshes);
  307. gen++;
  308. compute_atvr(best.first, meshes[0], atvr_0);
  309. compute_atvr(best.first, meshes.back(), atvr_N);
  310. printf("%d: fitness %f; grid %f %f %s %f %f\n", int(gen), best.second, atvr_0[0], atvr_0[1], argv[argc - 1], atvr_N[0], atvr_N[1]);
  311. if (gen % 100 == 0)
  312. {
  313. char buf[128];
  314. sprintf(buf, "gcloud logging write vcache-log \"fitness %f; grid %f %f %s %f %f\"", best.second, atvr_0[0], atvr_0[1], argv[argc - 1], atvr_N[0], atvr_N[1]);
  315. int rc = system(buf);
  316. (void)rc;
  317. }
  318. dump_state(best.first);
  319. if (save_state("mutator.state-temp", pop) && rename("mutator.state-temp", "mutator.state") == 0)
  320. {
  321. }
  322. else
  323. {
  324. printf("ERROR: Can't save state\n");
  325. }
  326. }
  327. }