Mesh Optimize.cpp 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429
  1. /******************************************************************************/
  2. #include "stdafx.h"
  3. /******************************************************************************
  4. -----FPS------ optimizeTimeSeconds
  5. Method GL DX9 DX10+
  6. DirectX 31.1 25.6 0.014
  7. Forsyth 29.1 23.6 0.028
  8. ForsythSorted 23.4 0.030
  9. Forsyth2 23.2 0.022
  10. Tipsify 20.9 0.005
  11. /******************************************************************************/
  12. #define VTX_CACHE_SIZE 64
  13. #define METHOD_DIRECTX 0
  14. #define METHOD_FORSYTH 1
  15. #define METHOD_FORSYTH_SORTED 2
  16. #define METHOD_FORSYTH2 3
  17. #define METHOD_TIPSIFY 4
  18. #define METHOD_TEST 5
  19. #define METHOD METHOD_DIRECTX
  20. #if METHOD==METHOD_DIRECTX || METHOD==METHOD_TEST
  21. #if DEBUG
  22. #undef NDEBUG
  23. #endif
  24. #include "../../../../ThirdPartyLibs/begin.h"
  25. #include "../../../../ThirdPartyLibs/DirectXMath/include.h"
  26. #include "../../../../ThirdPartyLibs/DirectXMesh/DirectXMeshOptimize.cpp"
  27. #include "../../../../ThirdPartyLibs/end.h"
  28. #endif
  29. /******************************************************************************/
  30. #if METHOD==METHOD_FORSYTH || METHOD==METHOD_TEST
  31. namespace Forsyth
  32. {
  33. // This is an implementation of Tom Forsyth's "Linear-Speed Vertex Cache Optimization" algorithm as described here:
  34. // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html
  35. // This code was authored and released into the public domain by Adrian Stone ([email protected])
  36. typedef Int Index;
  37. static const Index IndexMax=INT_MAX;
  38. // code for computing vertex score was taken, as much as possible directly from the original publication
  39. static Flt ComputeVertexCacheScore(int cachePosition, UInt vertexCacheSize)
  40. {
  41. const Flt FindVertexScore_CacheDecayPower=1.5f;
  42. const Flt FindVertexScore_LastTriScore =0.75f;
  43. Flt score=0;
  44. if(cachePosition>=0) // <0 Vertex is not in FIFO cache-no score, otherwise:
  45. {
  46. if(cachePosition<3)
  47. {
  48. // This vertex was used in the last triangle, so it has a fixed score, whichever of the three it's in.
  49. // Otherwise, you can get very different answers depending on whether you add the triangle 1,2,3 or 3,1,2-which is silly.
  50. score=FindVertexScore_LastTriScore;
  51. }else
  52. {
  53. DEBUG_ASSERT(cachePosition<vertexCacheSize, "cache opt");
  54. // Points for being high in the cache.
  55. const Flt scaler=1.0f/(vertexCacheSize-3);
  56. score=1.0f-(cachePosition-3)*scaler;
  57. score=Pow(score, FindVertexScore_CacheDecayPower);
  58. }
  59. }
  60. return score;
  61. }
  62. static Flt ComputeVertexValenceScore(UInt numActiveFaces)
  63. {
  64. const Flt FindVertexScore_ValenceBoostScale=2.0f;
  65. const Flt FindVertexScore_ValenceBoostPower=0.5f;
  66. // Bonus points for having a low number of tris still to use the vert, so we get rid of lone verts quickly.
  67. Flt valenceBoost=Pow((Flt)numActiveFaces, -FindVertexScore_ValenceBoostPower);
  68. return FindVertexScore_ValenceBoostScale*valenceBoost;
  69. }
  70. static const UInt kMaxVertexCacheSize=VTX_CACHE_SIZE;
  71. static const UInt kMaxPrecomputedVertexValenceScores=VTX_CACHE_SIZE;
  72. static Flt s_vertexCacheScores [kMaxVertexCacheSize+1][kMaxVertexCacheSize];
  73. static Flt s_vertexValenceScores[kMaxPrecomputedVertexValenceScores];
  74. static Bool ComputeVertexScores()
  75. {
  76. for(UInt cacheSize=0; cacheSize<=kMaxVertexCacheSize; ++cacheSize)
  77. for(UInt cachePos =0; cachePos < cacheSize; ++cachePos )
  78. s_vertexCacheScores[cacheSize][cachePos]=ComputeVertexCacheScore(cachePos, cacheSize);
  79. for(UInt valence=0; valence<kMaxPrecomputedVertexValenceScores; ++valence)
  80. s_vertexValenceScores[valence]=ComputeVertexValenceScore(valence);
  81. return true;
  82. }
  83. //static inline Flt FindVertexCacheScore (UInt cachePosition, UInt maxSizeVertexCache) {return s_vertexCacheScores [maxSizeVertexCache][cachePosition];}
  84. //static inline Flt FindVertexValenceScore(UInt numActiveTris ) {return s_vertexValenceScores[numActiveTris];}
  85. static Flt FindVertexScore(UInt numActiveFaces, UInt cachePosition, UInt vertexCacheSize)
  86. {
  87. if(!numActiveFaces)return -1; // No tri needs this vertex
  88. Flt score=0;
  89. if(cachePosition<vertexCacheSize)score+=s_vertexCacheScores[vertexCacheSize][cachePosition];
  90. if(numActiveFaces<kMaxPrecomputedVertexValenceScores)score+=s_vertexValenceScores [numActiveFaces];
  91. else score+=ComputeVertexValenceScore(numActiveFaces);
  92. return score;
  93. }
  94. struct OptimizeVertexData
  95. {
  96. Flt score;
  97. UInt activeFaceListStart, activeFaceListSize;
  98. Index cachePos0, cachePos1;
  99. OptimizeVertexData() : score(0), activeFaceListStart(0), activeFaceListSize(0), cachePos0(0), cachePos1(0) {}
  100. };
  101. static void OptimizeFaces(const Index *indexList, UInt indexCount, UInt vertexCount, Index *newIndexList, UInt lruCacheSize)
  102. {
  103. DEBUG_ASSERT(lruCacheSize<=kMaxVertexCacheSize, "cache opt"); // these codes assume that cache size is no greater than kMaxVertexCacheSize
  104. Mems<OptimizeVertexData> vertexDataList; vertexDataList.setNum(vertexCount);
  105. // compute face count per vertex
  106. for(UInt i=0; i<indexCount; ++i)
  107. {
  108. Index index=indexList[i];
  109. DEBUG_ASSERT(index<vertexCount, "cache opt");
  110. OptimizeVertexData &vertexData=vertexDataList[index];
  111. vertexData.activeFaceListSize++;
  112. }
  113. Memc<UInt> activeFaceList;
  114. const Index kEvictedCacheIndex=IndexMax;
  115. {
  116. // allocate face list per vertex
  117. UInt curActiveFaceListPos=0;
  118. for(UInt i=0; i<vertexCount; ++i)
  119. {
  120. OptimizeVertexData &vertexData=vertexDataList[i];
  121. vertexData.cachePos0=kEvictedCacheIndex;
  122. vertexData.cachePos1=kEvictedCacheIndex;
  123. vertexData.activeFaceListStart=curActiveFaceListPos;
  124. curActiveFaceListPos+=vertexData.activeFaceListSize;
  125. vertexData.score=FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos0, lruCacheSize);
  126. vertexData.activeFaceListSize=0;
  127. }
  128. activeFaceList.setNum(curActiveFaceListPos);
  129. }
  130. // fill out face list per vertex
  131. for(UInt i=0; i<indexCount; i+=3)
  132. for(UInt j=0; j<3; ++j)
  133. {
  134. Index index=indexList[i+j];
  135. OptimizeVertexData &vertexData=vertexDataList[index];
  136. activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize]=i;
  137. vertexData.activeFaceListSize++;
  138. }
  139. Mems<Bool> processedFaceList; processedFaceList.setNumZero(indexCount);
  140. Index vertexCacheBuffer[(kMaxVertexCacheSize+3)*2],
  141. *cache0=vertexCacheBuffer,
  142. *cache1=vertexCacheBuffer+(kMaxVertexCacheSize+3),
  143. entriesInCache0=0;
  144. UInt bestFace = 0;
  145. Flt bestScore=-1;
  146. const Flt maxValenceScore=FindVertexScore(1, kEvictedCacheIndex, lruCacheSize)*3;
  147. for(UInt i=0; i<indexCount; i+=3)
  148. {
  149. if(bestScore<0)
  150. {
  151. // no verts in the cache are used by any unprocessed faces so search all unprocessed faces for a new starting point
  152. for(UInt j=0; j<indexCount; j+=3)
  153. {
  154. if(!processedFaceList[j])
  155. {
  156. UInt face=j;
  157. Flt faceScore=0;
  158. for(UInt k=0; k<3; ++k)
  159. {
  160. Index index=indexList[face+k];
  161. OptimizeVertexData &vertexData=vertexDataList[index];
  162. DEBUG_ASSERT(vertexData.activeFaceListSize>0, "cache opt");
  163. DEBUG_ASSERT(vertexData.cachePos0>=lruCacheSize, "cache opt");
  164. faceScore+=vertexData.score;
  165. }
  166. if(faceScore>bestScore)
  167. {
  168. bestScore=faceScore;
  169. bestFace =face;
  170. DEBUG_ASSERT(bestScore <= maxValenceScore, "cache opt");
  171. if(bestScore>=maxValenceScore)break;
  172. }
  173. }
  174. }
  175. DEBUG_ASSERT(bestScore>=0, "cache opt");
  176. }
  177. processedFaceList[bestFace]=1;
  178. Index entriesInCache1=0;
  179. // add bestFace to LRU cache and to newIndexList
  180. for(UInt v=0; v<3; ++v)
  181. {
  182. Index index=indexList[bestFace+v];
  183. newIndexList[i+v]=index;
  184. OptimizeVertexData &vertexData=vertexDataList[index];
  185. if(vertexData.cachePos1>=entriesInCache1)
  186. {
  187. vertexData.cachePos1=entriesInCache1;
  188. cache1[entriesInCache1++]=index;
  189. if(vertexData.activeFaceListSize==1)
  190. {
  191. --vertexData.activeFaceListSize;
  192. continue;
  193. }
  194. }
  195. DEBUG_ASSERT(vertexData.activeFaceListSize>0, "cache opt");
  196. REP(vertexData.activeFaceListSize)
  197. {
  198. if(activeFaceList[vertexData.activeFaceListStart+i]==bestFace)
  199. {
  200. Swap(activeFaceList[vertexData.activeFaceListStart+i], activeFaceList[vertexData.activeFaceListStart+vertexData.activeFaceListSize-1]);
  201. vertexData.activeFaceListSize--;
  202. goto found;
  203. }
  204. }
  205. DEBUG_ASSERT(false, "cache opt");
  206. found:;
  207. vertexData.score=FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos1, lruCacheSize);
  208. }
  209. // move the rest of the old verts in the cache down and compute their new scores
  210. for(UInt c0=0; c0<entriesInCache0; ++c0)
  211. {
  212. Index index=cache0[c0];
  213. OptimizeVertexData &vertexData=vertexDataList[index];
  214. if(vertexData.cachePos1>=entriesInCache1)
  215. {
  216. vertexData.cachePos1=entriesInCache1;
  217. cache1[entriesInCache1++]=index;
  218. vertexData.score=FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos1, lruCacheSize);
  219. }
  220. }
  221. // find the best scoring triangle in the current cache (including up to 3 that were just evicted)
  222. bestScore=-1;
  223. for(UInt c1=0; c1<entriesInCache1; ++c1)
  224. {
  225. Index index=cache1[c1];
  226. OptimizeVertexData &vertexData=vertexDataList[index];
  227. vertexData.cachePos0=vertexData.cachePos1;
  228. vertexData.cachePos1=kEvictedCacheIndex;
  229. for(UInt j=0; j<vertexData.activeFaceListSize; ++j)
  230. {
  231. UInt face=activeFaceList[vertexData.activeFaceListStart+j];
  232. Flt faceScore=0;
  233. for(UInt v=0; v<3; v++)
  234. {
  235. Index faceIndex=indexList[face+v];
  236. OptimizeVertexData &faceVertexData=vertexDataList[faceIndex];
  237. faceScore+=faceVertexData.score;
  238. }
  239. if(faceScore>bestScore)
  240. {
  241. bestScore=faceScore;
  242. bestFace =face;
  243. }
  244. }
  245. }
  246. Swap(cache0, cache1);
  247. entriesInCache0=Min(entriesInCache1, lruCacheSize);
  248. }
  249. }
  250. } // Forsyth
  251. #endif
  252. /******************************************************************************/
  253. #if METHOD==METHOD_FORSYTH_SORTED || METHOD==METHOD_TEST
  254. namespace ForsythSorted
  255. {
  256. namespace
  257. {
  258. // code for computing vertex score was taken, as much as possible
  259. // directly from the original publication.
  260. float ComputeVertexCacheScore(int cachePosition, uint32_t vertexCacheSize)
  261. {
  262. const float FindVertexScore_CacheDecayPower = 1.5f;
  263. const float FindVertexScore_LastTriScore = 0.75f;
  264. float score = 0.0f;
  265. if ( cachePosition < 0 )
  266. {
  267. // Vertex is not in FIFO cache - no score.
  268. }
  269. else
  270. {
  271. if ( cachePosition < 3 )
  272. {
  273. // This vertex was used in the last triangle,
  274. // so it has a fixed score, whichever of the three
  275. // it's in. Otherwise, you can get very different
  276. // answers depending on whether you add
  277. // the triangle 1,2,3 or 3,1,2 - which is silly.
  278. score = FindVertexScore_LastTriScore;
  279. }
  280. else
  281. {
  282. assert ( cachePosition < int(vertexCacheSize) );
  283. // Points for being high in the cache.
  284. const float scaler = 1.0f / ( vertexCacheSize - 3 );
  285. score = 1.0f - ( cachePosition - 3 ) * scaler;
  286. score = powf ( score, FindVertexScore_CacheDecayPower );
  287. }
  288. }
  289. return score;
  290. }
  291. float ComputeVertexValenceScore(uint32_t numActiveFaces)
  292. {
  293. const float FindVertexScore_ValenceBoostScale = 2.0f;
  294. const float FindVertexScore_ValenceBoostPower = 0.5f;
  295. float score = 0.f;
  296. // Bonus points for having a low number of tris still to
  297. // use the vert, so we get rid of lone verts quickly.
  298. float valenceBoost = powf ( static_cast<float>(numActiveFaces),
  299. -FindVertexScore_ValenceBoostPower );
  300. score += FindVertexScore_ValenceBoostScale * valenceBoost;
  301. return score;
  302. }
  303. enum {kMaxVertexCacheSize = VTX_CACHE_SIZE};
  304. enum {kMaxPrecomputedVertexValenceScores = VTX_CACHE_SIZE};
  305. float s_vertexCacheScores[kMaxVertexCacheSize+1][kMaxVertexCacheSize];
  306. float s_vertexValenceScores[kMaxPrecomputedVertexValenceScores];
  307. bool ComputeVertexScores()
  308. {
  309. for (uint32_t cacheSize=0; cacheSize<=kMaxVertexCacheSize; ++cacheSize)
  310. {
  311. for (uint32_t cachePos=0; cachePos<cacheSize; ++cachePos)
  312. {
  313. s_vertexCacheScores[cacheSize][cachePos] = ComputeVertexCacheScore(cachePos, cacheSize);
  314. }
  315. }
  316. for (uint32_t valence=0; valence<kMaxPrecomputedVertexValenceScores; ++valence)
  317. {
  318. s_vertexValenceScores[valence] = ComputeVertexValenceScore(valence);
  319. }
  320. return true;
  321. }
  322. inline float FindVertexCacheScore(uint32_t cachePosition, uint32_t maxSizeVertexCache)
  323. {
  324. return s_vertexCacheScores[maxSizeVertexCache][cachePosition];
  325. }
  326. inline float FindVertexValenceScore(uint32_t numActiveTris)
  327. {
  328. return s_vertexValenceScores[numActiveTris];
  329. }
  330. float FindVertexScore(uint32_t numActiveFaces, uint32_t cachePosition, uint32_t vertexCacheSize)
  331. {
  332. if ( numActiveFaces == 0 )
  333. {
  334. // No tri needs this vertex!
  335. return -1.0f;
  336. }
  337. float score = 0.f;
  338. if (cachePosition < vertexCacheSize)
  339. {
  340. score += s_vertexCacheScores[vertexCacheSize][cachePosition];
  341. }
  342. if (numActiveFaces < kMaxPrecomputedVertexValenceScores)
  343. {
  344. score += s_vertexValenceScores[numActiveFaces];
  345. }
  346. else
  347. {
  348. score += ComputeVertexValenceScore(numActiveFaces);
  349. }
  350. return score;
  351. }
  352. template <typename IndexType>
  353. struct OptimizeVertexData
  354. {
  355. float score;
  356. uint32_t activeFaceListStart;
  357. uint32_t activeFaceListSize;
  358. IndexType cachePos0;
  359. IndexType cachePos1;
  360. OptimizeVertexData() : score(0.f), activeFaceListStart(0), activeFaceListSize(0), cachePos0(0), cachePos1(0) { }
  361. };
  362. }
  363. template <typename TYPE, typename IndexType>
  364. struct IndexSortCompareIndexed
  365. {
  366. const IndexType *_indexData;
  367. IndexSortCompareIndexed(const IndexType *indexData)
  368. : _indexData(indexData)
  369. {
  370. }
  371. bool operator()(TYPE a, TYPE b) const
  372. {
  373. IndexType indexA = _indexData[a];
  374. IndexType indexB = _indexData[b];
  375. if (indexA < indexB)
  376. return true;
  377. return false;
  378. }
  379. };
  380. template <typename TYPE, typename IndexType>
  381. struct FaceValenceSort
  382. {
  383. const OptimizeVertexData<IndexType> *_vertexData;
  384. FaceValenceSort(const OptimizeVertexData<IndexType> *vertexData)
  385. : _vertexData(vertexData)
  386. {
  387. }
  388. bool operator()(TYPE a, TYPE b) const
  389. {
  390. const OptimizeVertexData<IndexType> *vA0 = _vertexData + a * 3 + 0;
  391. const OptimizeVertexData<IndexType> *vA1 = _vertexData + a * 3 + 1;
  392. const OptimizeVertexData<IndexType> *vA2 = _vertexData + a * 3 + 2;
  393. const OptimizeVertexData<IndexType> *vB0 = _vertexData + b * 3 + 0;
  394. const OptimizeVertexData<IndexType> *vB1 = _vertexData + b * 3 + 1;
  395. const OptimizeVertexData<IndexType> *vB2 = _vertexData + b * 3 + 2;
  396. int aValence = vA0->activeFaceListSize + vA1->activeFaceListSize + vA2->activeFaceListSize;
  397. int bValence = vB0->activeFaceListSize + vB1->activeFaceListSize + vB2->activeFaceListSize;
  398. // higher scoring faces are those with lower valence totals
  399. // reverse sort (reverse of reverse)
  400. if (aValence < bValence)
  401. return true;
  402. return false;
  403. }
  404. };
  405. //-----------------------------------------------------------------------------
  406. // OptimizeFaces
  407. //-----------------------------------------------------------------------------
  408. // Parameters:
  409. // indexList
  410. // input index list
  411. // indexCount
  412. // the number of indices in the list
  413. // vertexCount
  414. // the largest index value in indexList
  415. // newIndexList
  416. // a pointer to a preallocated buffer the same size as indexList to
  417. // hold the optimized index list
  418. // lruCacheSize
  419. // the size of the simulated post-transform cache (max:VTX_CACHE_SIZE)
  420. //-----------------------------------------------------------------------------
  421. template <typename IndexType>
  422. void OptimizeFaces(const IndexType *indexList, uint32_t indexCount, IndexType *newIndexList, uint16_t lruCacheSize)
  423. {
  424. OptimizeVertexData<IndexType> *vertexDataList = new OptimizeVertexData<IndexType> [indexCount]; // upper bounds on size is indexCount
  425. IndexType *vertexRemap = new IndexType [indexCount];
  426. uint32_t *activeFaceList = new uint32_t [indexCount];
  427. uint32_t faceCount = indexCount / 3;
  428. uint8_t *processedFaceList = new uint8_t [faceCount];
  429. memset(processedFaceList, 0, sizeof(uint8_t) * faceCount);
  430. unsigned int *faceSorted = new unsigned int [faceCount];
  431. unsigned int *faceReverseLookup = new unsigned int [faceCount];
  432. // build the vertex remap table
  433. unsigned int uniqueVertexCount = 0;
  434. {
  435. typedef IndexSortCompareIndexed<unsigned int, IndexType> indexSorter;
  436. unsigned int *indexSorted = new unsigned int [indexCount];
  437. for (unsigned int i = 0; i < indexCount; i++)
  438. {
  439. indexSorted[i] = i;
  440. }
  441. indexSorter sortFunc(indexList);
  442. std::sort(indexSorted, indexSorted + indexCount, sortFunc);
  443. for (unsigned int i = 0; i < indexCount; i++)
  444. {
  445. if (i == 0
  446. || sortFunc(indexSorted[i - 1], indexSorted[i]))
  447. {
  448. // it's not a duplicate
  449. vertexRemap[indexSorted[i]] = uniqueVertexCount;
  450. uniqueVertexCount++;
  451. }
  452. else
  453. {
  454. vertexRemap[indexSorted[i]] = vertexRemap[indexSorted[i - 1]];
  455. }
  456. }
  457. delete [] indexSorted;
  458. }
  459. // compute face count per vertex
  460. for (uint32_t i=0; i<indexCount; ++i)
  461. {
  462. OptimizeVertexData<IndexType>& vertexData = vertexDataList[vertexRemap[i]];
  463. vertexData.activeFaceListSize++;
  464. }
  465. const IndexType kEvictedCacheIndex = std::numeric_limits<IndexType>::max();
  466. {
  467. // allocate face list per vertex
  468. uint32_t curActiveFaceListPos = 0;
  469. for (uint32_t i = 0; i < uniqueVertexCount; ++i)
  470. {
  471. OptimizeVertexData<IndexType>& vertexData = vertexDataList[i];
  472. vertexData.cachePos0 = kEvictedCacheIndex;
  473. vertexData.cachePos1 = kEvictedCacheIndex;
  474. vertexData.activeFaceListStart = curActiveFaceListPos;
  475. curActiveFaceListPos += vertexData.activeFaceListSize;
  476. vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos0, lruCacheSize);
  477. vertexData.activeFaceListSize = 0;
  478. }
  479. assert(curActiveFaceListPos == indexCount);
  480. }
  481. // sort unprocessed faces by highest score
  482. for (uint32_t f = 0; f < faceCount; f++)
  483. {
  484. faceSorted[f] = f;
  485. }
  486. FaceValenceSort<unsigned int, IndexType> faceValenceSort(vertexDataList);
  487. std::sort(faceSorted, faceSorted + faceCount, faceValenceSort);
  488. for (uint32_t f = 0; f < faceCount; f++)
  489. {
  490. faceReverseLookup[faceSorted[f]] = f;
  491. }
  492. // fill out face list per vertex
  493. for (uint32_t i=0; i<indexCount; i+=3)
  494. {
  495. for (uint32_t j=0; j<3; ++j)
  496. {
  497. OptimizeVertexData<IndexType>& vertexData = vertexDataList[vertexRemap[i + j]];
  498. activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize] = i;
  499. vertexData.activeFaceListSize++;
  500. }
  501. }
  502. IndexType vertexCacheBuffer[(kMaxVertexCacheSize+3)*2];
  503. IndexType *cache0 = vertexCacheBuffer;
  504. IndexType *cache1 = vertexCacheBuffer+(kMaxVertexCacheSize+3);
  505. IndexType entriesInCache0 = 0;
  506. uint32_t bestFace = 0;
  507. float bestScore = -1.f;
  508. const float maxValenceScore = FindVertexScore(1, kEvictedCacheIndex, lruCacheSize) * 3.f;
  509. unsigned int nextBestFace = 0;
  510. for (uint32_t i = 0; i < indexCount; i += 3)
  511. {
  512. if (bestScore < 0.f)
  513. {
  514. // no verts in the cache are used by any unprocessed faces so
  515. // search all unprocessed faces for a new starting point
  516. for (; nextBestFace < faceCount; nextBestFace++)
  517. {
  518. unsigned int faceIndex = faceSorted[nextBestFace];
  519. if (processedFaceList[faceIndex] == 0)
  520. {
  521. uint32_t face = faceIndex * 3;
  522. float faceScore = 0.f;
  523. for (uint32_t k=0; k<3; ++k)
  524. {
  525. //assert(vertexData.activeFaceListSize > 0);
  526. //assert(vertexData.cachePos0 >= lruCacheSize);
  527. float vertexScore = vertexDataList[vertexRemap[face + k]].score;
  528. faceScore += vertexScore;
  529. }
  530. bestScore = faceScore;
  531. bestFace = face;
  532. nextBestFace++;
  533. break; // we're searching a pre-sorted list, first one we find will be the best
  534. }
  535. }
  536. assert(bestScore >= 0.f);
  537. }
  538. processedFaceList[bestFace / 3] = 1;
  539. uint16_t entriesInCache1 = 0;
  540. // add bestFace to LRU cache and to newIndexList
  541. for (uint32_t v = 0; v < 3; ++v)
  542. {
  543. IndexType index = indexList[bestFace+v];
  544. newIndexList[i+v] = index;
  545. OptimizeVertexData<IndexType>& vertexData = vertexDataList[vertexRemap[bestFace + v]];
  546. if (vertexData.cachePos1 >= entriesInCache1)
  547. {
  548. vertexData.cachePos1 = entriesInCache1;
  549. cache1[entriesInCache1++] = vertexRemap[bestFace + v];
  550. if (vertexData.activeFaceListSize == 1)
  551. {
  552. --vertexData.activeFaceListSize;
  553. continue;
  554. }
  555. }
  556. assert(vertexData.activeFaceListSize > 0);
  557. uint32_t *begin = activeFaceList + vertexData.activeFaceListStart;
  558. uint32_t *end = activeFaceList + (vertexData.activeFaceListStart + vertexData.activeFaceListSize);
  559. uint32_t *it = std::find(begin, end, bestFace);
  560. assert(it != end);
  561. std::swap(*it, *(end-1));
  562. --vertexData.activeFaceListSize;
  563. vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos1, lruCacheSize);
  564. // need to re-sort the faces that use this vertex, as their score will change due to activeFaceListSize shrinking
  565. for (uint32_t *fi = begin; fi != end - 1; ++fi)
  566. {
  567. unsigned int faceIndex = *fi / 3;
  568. unsigned int n = faceReverseLookup[faceIndex];
  569. assert(faceSorted[n] == faceIndex);
  570. // found it, now move it up
  571. while (n > 0)
  572. {
  573. if (faceValenceSort(n, n - 1))
  574. {
  575. faceReverseLookup[faceSorted[n]] = n - 1;
  576. faceReverseLookup[faceSorted[n - 1]] = n;
  577. std::swap(faceSorted[n], faceSorted[n - 1]);
  578. n--;
  579. }
  580. else
  581. {
  582. break;
  583. }
  584. }
  585. }
  586. }
  587. // move the rest of the old verts in the cache down and compute their new scores
  588. for (uint32_t c0 = 0; c0 < entriesInCache0; ++c0)
  589. {
  590. OptimizeVertexData<IndexType>& vertexData = vertexDataList[cache0[c0]];
  591. if (vertexData.cachePos1 >= entriesInCache1)
  592. {
  593. vertexData.cachePos1 = entriesInCache1;
  594. cache1[entriesInCache1++] = cache0[c0];
  595. vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos1, lruCacheSize);
  596. // don't need to re-sort this vertex... once it gets out of the cache, it'll have its original score
  597. }
  598. }
  599. // find the best scoring triangle in the current cache (including up to 3 that were just evicted)
  600. bestScore = -1.f;
  601. for (uint32_t c1 = 0; c1 < entriesInCache1; ++c1)
  602. {
  603. OptimizeVertexData<IndexType>& vertexData = vertexDataList[cache1[c1]];
  604. vertexData.cachePos0 = vertexData.cachePos1;
  605. vertexData.cachePos1 = kEvictedCacheIndex;
  606. for (uint32_t j=0; j<vertexData.activeFaceListSize; ++j)
  607. {
  608. uint32_t face = activeFaceList[vertexData.activeFaceListStart+j];
  609. float faceScore = 0.f;
  610. for (uint32_t v=0; v<3; v++)
  611. {
  612. OptimizeVertexData<IndexType>& faceVertexData = vertexDataList[vertexRemap[face + v]];
  613. faceScore += faceVertexData.score;
  614. }
  615. if (faceScore > bestScore)
  616. {
  617. bestScore = faceScore;
  618. bestFace = face;
  619. }
  620. }
  621. }
  622. std::swap(cache0, cache1);
  623. entriesInCache0 = std::min(entriesInCache1, lruCacheSize);
  624. }
  625. delete [] vertexDataList;
  626. delete [] vertexRemap;
  627. delete [] activeFaceList;
  628. delete [] processedFaceList;
  629. delete [] faceSorted;
  630. delete [] faceReverseLookup;
  631. }
  632. } // ForsythSorted
  633. #endif
  634. /******************************************************************************/
  635. #if METHOD==METHOD_FORSYTH2 || METHOD==METHOD_TEST
  636. /*
  637. Copyright (C) 2008 Martin Storsjo
  638. This software is provided 'as-is', without any express or implied
  639. warranty. In no event will the authors be held liable for any damages
  640. arising from the use of this software.
  641. Permission is granted to anyone to use this software for any purpose,
  642. including commercial applications, and to alter it and redistribute it
  643. freely, subject to the following restrictions:
  644. 1. The origin of this software must not be misrepresented; you must not
  645. claim that you wrote the original software. If you use this software
  646. in a product, an acknowledgment in the product documentation would be
  647. appreciated but is not required.
  648. 2. Altered source versions must be plainly marked as such, and must not be
  649. misrepresented as being the original software.
  650. 3. This notice may not be removed or altered from any source distribution.
  651. */
  652. namespace Forsyth2
  653. {
  654. #define VERTEX_CACHE_SIZE VTX_CACHE_SIZE
  655. // The size of these data types affect the memory usage
  656. #if 0 // this improves speed of optimization but reduces precision
  657. typedef UShort ScoreType;
  658. typedef Int ScoreTypeBig;
  659. #define SCORE_SCALING 7281
  660. #else
  661. typedef Flt ScoreType, ScoreTypeBig;
  662. #define SCORE_SCALING 1
  663. #endif
  664. typedef Byte AdjacencyType;
  665. #define MAX_ADJACENCY 0xFF
  666. typedef Int VertexIndexType;
  667. typedef SByte CachePosType;
  668. typedef Int TriangleIndexType;
  669. typedef Int ArrayIndexType;
  670. // The size of the precalculated tables
  671. #define CACHE_SCORE_TABLE_SIZE VTX_CACHE_SIZE
  672. #define VALENCE_SCORE_TABLE_SIZE VTX_CACHE_SIZE
  673. #if CACHE_SCORE_TABLE_SIZE<VERTEX_CACHE_SIZE
  674. #error Vertex score table too small
  675. #endif
  676. // Precalculated tables
  677. static ScoreType cachePositionScore[ CACHE_SCORE_TABLE_SIZE];
  678. static ScoreType valenceScore [VALENCE_SCORE_TABLE_SIZE];
  679. #define ISADDED(x) (triangleAdded[(x) >> 3] & (1 << (x & 7)))
  680. #define SETADDED(x) (triangleAdded[(x) >> 3] |= (1 << (x & 7)))
  681. // Score function constants
  682. #define CACHE_DECAY_POWER 1.5f
  683. #define LAST_TRI_SCORE 0.75f
  684. #define VALENCE_BOOST_SCALE 2.0f
  685. #define VALENCE_BOOST_POWER 0.5f
  686. // Precalculate the tables
  687. void initForsyth()
  688. {
  689. for(int i=0; i<CACHE_SCORE_TABLE_SIZE; i++)
  690. {
  691. Flt score=0;
  692. if(i<3)
  693. {
  694. // This vertex was used in the last triangle, so it has a fixed score, which ever of the three it's in.
  695. // Otherwise, you can get very different answers depending on whether you add the triangle 1,2,3 or 3,1,2-which is silly
  696. score=LAST_TRI_SCORE;
  697. }else
  698. {
  699. // Points for being high in the cache.
  700. const Flt scaler=1.0f/(VERTEX_CACHE_SIZE-3);
  701. score=1.0f-(i-3)*scaler;
  702. score=Pow(score, CACHE_DECAY_POWER);
  703. }
  704. cachePositionScore[i]=(ScoreType) (SCORE_SCALING*score);
  705. }
  706. for(int i=1; i<VALENCE_SCORE_TABLE_SIZE; i++)
  707. {
  708. // Bonus points for having a low number of tris still to use the vert, so we get rid of lone verts quickly
  709. Flt valenceBoost=Pow(i, -VALENCE_BOOST_POWER);
  710. Flt score=VALENCE_BOOST_SCALE*valenceBoost;
  711. valenceScore[i]=(ScoreType) (SCORE_SCALING*score);
  712. }
  713. }
  714. // Calculate the score for a vertex
  715. ScoreType findVertexScore(int numActiveTris, int cachePosition)
  716. {
  717. if(!numActiveTris)return 0; // No triangles need this vertex
  718. ScoreType score=0;
  719. if(InRange(cachePosition, cachePositionScore))score =cachePositionScore[cachePosition];
  720. if(InRange(numActiveTris, valenceScore))score+= valenceScore[numActiveTris];
  721. return score;
  722. }
  723. // The main reordering function
  724. void reorderForsyth(const VertexIndexType *indices, int nTriangles, int nVertices, VertexIndexType *outputIndices)
  725. {
  726. AdjacencyType *numActiveTris=new AdjacencyType[nVertices];
  727. memset(numActiveTris, 0, sizeof(AdjacencyType)*nVertices);
  728. // First scan over the vertex data, count the total number of occurrences of each vertex
  729. for(int i=0; i<3*nTriangles; i++)
  730. {
  731. if(numActiveTris[indices[i]]==MAX_ADJACENCY)
  732. {
  733. // Unsupported mesh, vertex shared by too many triangles
  734. delete [] numActiveTris;
  735. CopyN(outputIndices, indices, nTriangles*3);
  736. return;
  737. }
  738. numActiveTris[indices[i]]++;
  739. }
  740. // Allocate the rest of the arrays
  741. ArrayIndexType *offsets=new ArrayIndexType[nVertices];
  742. ScoreType *lastScore=new ScoreType[nVertices];
  743. CachePosType *cacheTag=new CachePosType[nVertices];
  744. Byte *triangleAdded=new Byte[(nTriangles + 7)/8];
  745. ScoreType *triangleScore=new ScoreType[nTriangles];
  746. TriangleIndexType *triangleIndices=new TriangleIndexType[3*nTriangles];
  747. memset(triangleAdded, 0, sizeof(Byte)*((nTriangles + 7)/8));
  748. memset(triangleScore, 0, sizeof(ScoreType)*nTriangles);
  749. memset(triangleIndices, 0, sizeof(TriangleIndexType)*3*nTriangles);
  750. TriangleIndexType *outTriangles=new TriangleIndexType[nTriangles];
  751. // Count the triangle array offset for each vertex, initialize the rest of the data.
  752. int sum=0;
  753. for(int i=0; i<nVertices; i++)
  754. {
  755. offsets[i]=sum;
  756. sum+=numActiveTris[i];
  757. numActiveTris[i]=0;
  758. cacheTag[i]=-1;
  759. }
  760. // Fill the vertex data structures with indices to the triangles using each vertex
  761. for(int i=0; i<nTriangles; i++)
  762. for(int j=0; j<3; j++)
  763. {
  764. int v=indices[3*i + j];
  765. triangleIndices[offsets[v] + numActiveTris[v]]=i;
  766. numActiveTris[v]++;
  767. }
  768. // Initialize the score for all vertices
  769. for(int i=0; i<nVertices; i++)
  770. {
  771. int tris=numActiveTris[i],
  772. offset=offsets [i];
  773. ScoreType score=lastScore [i]=findVertexScore(tris, cacheTag[i]);
  774. REPD(j, tris)triangleScore[triangleIndices[offset+j]]+=score;
  775. }
  776. // Find the best triangle
  777. int bestTriangle=-1;
  778. ScoreTypeBig bestScore=-1;
  779. for(int i=0; i<nTriangles; i++)if(triangleScore[i]>bestScore)
  780. {
  781. bestScore=triangleScore[i];
  782. bestTriangle=i;
  783. }
  784. // Initialize the cache
  785. int cache[VERTEX_CACHE_SIZE + 3];
  786. for(int i=0; i<VERTEX_CACHE_SIZE + 3; i++)cache[i]=-1;
  787. // Output the currently best triangle, as long as there are triangles left to output
  788. int outPos=0, scanPos=0;
  789. while(bestTriangle>=0)
  790. {
  791. // Mark the triangle as added
  792. SETADDED(bestTriangle);
  793. // Output this triangle
  794. outTriangles[outPos++]=bestTriangle;
  795. for(int i=0; i<3; i++)
  796. {
  797. // Update this vertex
  798. int v=indices[3*bestTriangle + i];
  799. // Check the current cache position, if it is in the cache
  800. int endpos=cacheTag[v];
  801. if(endpos<0)endpos=VERTEX_CACHE_SIZE + i;
  802. // Move all cache entries from the previous position in the cache to the new target position (i) one step backwards
  803. for(int j=endpos; j>i; j--)
  804. {
  805. cache[j]=cache[j-1];
  806. // If this cache slot contains a real vertex, update its cache tag
  807. if(cache[j]>=0)cacheTag[cache[j]]++;
  808. }
  809. // Insert the current vertex into its new target slot
  810. cache[i]=v;
  811. cacheTag[v]=i;
  812. // Find the current triangle in the list of active triangles and remove it (moving the last triangle in the list to the slot of this triangle).
  813. for(int j=0; j<numActiveTris[v]; j++)
  814. {
  815. if(triangleIndices[offsets[v] + j]==bestTriangle)
  816. {
  817. triangleIndices[offsets[v] + j]=triangleIndices[offsets[v] + numActiveTris[v]-1];
  818. break;
  819. }
  820. }
  821. // Shorten the list
  822. numActiveTris[v]--;
  823. }
  824. // Update the scores of all triangles in the cache
  825. for(int i=0; i<VERTEX_CACHE_SIZE + 3; i++)
  826. {
  827. int v=cache[i];
  828. if(v<0)break;
  829. if(i>=VERTEX_CACHE_SIZE) // This vertex was pushed outside of the actual cache
  830. {
  831. cacheTag[v]=-1;
  832. cache [i]=-1;
  833. }
  834. ScoreType newScore=findVertexScore(numActiveTris[v], cacheTag[v]);
  835. ScoreType diff=newScore-lastScore[v];
  836. for(int j=0; j<numActiveTris[v]; j++)triangleScore[triangleIndices[offsets[v] + j]]+=diff;
  837. lastScore[v]=newScore;
  838. }
  839. // Find the best triangle referenced by vertices in the cache
  840. bestTriangle=-1;
  841. bestScore=-1;
  842. for(int i=0; i<VERTEX_CACHE_SIZE; i++)
  843. {
  844. if(cache[i]<0)break;
  845. int v=cache[i];
  846. for(int j=0; j<numActiveTris[v]; j++)
  847. {
  848. int t=triangleIndices[offsets[v] + j];
  849. if(triangleScore[t]>bestScore)
  850. {
  851. bestTriangle=t;
  852. bestScore=triangleScore[t];
  853. }
  854. }
  855. }
  856. // If no active triangle was found at all, continue scanning the whole list of triangles
  857. if(bestTriangle<0)
  858. for(; scanPos<nTriangles; scanPos++)if(!ISADDED(scanPos)){bestTriangle=scanPos; break;}
  859. }
  860. // Convert the triangle index array into a full triangle list
  861. outPos=0;
  862. for(int i=0; i<nTriangles; i++)
  863. {
  864. int t=outTriangles[i]*3;
  865. for(int j=0; j<3; j++)outputIndices[outPos++]=indices[t+j];
  866. }
  867. // Clean up
  868. delete [] triangleIndices;
  869. delete [] offsets;
  870. delete [] lastScore;
  871. delete [] numActiveTris;
  872. delete [] cacheTag;
  873. delete [] triangleAdded;
  874. delete [] triangleScore;
  875. delete [] outTriangles;
  876. }
  877. } // Forsyth2
  878. #endif
  879. /******************************************************************************/
  880. #if METHOD==METHOD_TIPSIFY || METHOD==METHOD_TEST
  881. namespace Tipsify
  882. {
  883. #define DEAD_END_STACK_SIZE 128
  884. #define DEAD_END_STACK_MASK (DEAD_END_STACK_SIZE-1)
  885. // The size of these data types control the memory usage
  886. typedef Byte AdjacencyType;
  887. #define MAX_ADJACENCY 0xFF
  888. typedef Int VertexIndexType;
  889. typedef Int TriangleIndexType;
  890. typedef Int ArrayIndexType;
  891. #define ISEMITTED(x) (emitted[(x) >> 3] & (1 << (x & 7)))
  892. #define SETEMITTED(x) (emitted[(x) >> 3] |= (1 << (x & 7)))
  893. // Find the next non-local vertex to continue from
  894. int skipDeadEnd(const AdjacencyType *liveTriangles,
  895. const VertexIndexType *deadEndStack,
  896. int& deadEndStackPos,
  897. int& deadEndStackStart,
  898. int nVertices,
  899. int& i) {
  900. // Next in dead-end stack
  901. while ((deadEndStackPos & DEAD_END_STACK_MASK) != deadEndStackStart) {
  902. int d=deadEndStack[(--deadEndStackPos) & DEAD_END_STACK_MASK];
  903. // Check for live triangles
  904. if(liveTriangles[d]>0)
  905. return d;
  906. }
  907. // Next in input order
  908. while (i + 1<nVertices) {
  909. // Cursor sweeps list only once
  910. i++;
  911. // Check for live triangles
  912. if(liveTriangles[i]>0)
  913. return i;
  914. }
  915. // We are done!
  916. return -1;
  917. }
  918. // Find the next vertex to continue from
  919. int getNextVertex(int nVertices,
  920. int& i,
  921. int k,
  922. const VertexIndexType *nextCandidates,
  923. int numNextCandidates,
  924. const ArrayIndexType *cacheTime,
  925. int s,
  926. const AdjacencyType *liveTriangles,
  927. const VertexIndexType *deadEndStack,
  928. int& deadEndStackPos,
  929. int& deadEndStackStart) {
  930. // Best candidate
  931. int n=-1;
  932. // and priority
  933. int m=-1;
  934. for(int j=0; j<numNextCandidates; j++) {
  935. int v=nextCandidates[j];
  936. // Must have live triangles
  937. if(liveTriangles[v]>0) {
  938. // Initial priority
  939. int p=0;
  940. // In cache even after fanning?
  941. if(s-cacheTime[v] + 2*liveTriangles[v] <= k)
  942. // Priority is position in cache
  943. p=s-cacheTime[v];
  944. // Keep best candidate
  945. if(p>m) {
  946. m=p;
  947. n=v;
  948. }
  949. }
  950. }
  951. // Reached a dead-end?
  952. if(n==-1) {
  953. // Get non-local vertex
  954. n=skipDeadEnd(liveTriangles, deadEndStack,
  955. deadEndStackPos, deadEndStackStart,
  956. nVertices, i);
  957. }
  958. return n;
  959. }
  960. // The main reordering function
  961. void tipsify(const VertexIndexType *indices,
  962. int nTriangles,
  963. int nVertices,
  964. VertexIndexType *outputIndices,
  965. int k=32)
  966. {
  967. // Vertex-triangle adjacency
  968. // Count the occurrances of each vertex
  969. AdjacencyType *numOccurrances=new AdjacencyType[nVertices];
  970. memset(numOccurrances, 0, sizeof(AdjacencyType)*nVertices);
  971. for(int i=0; i<3*nTriangles; i++) {
  972. int v=indices[i];
  973. if(numOccurrances[v]==MAX_ADJACENCY) {
  974. // Unsupported mesh,
  975. // vertex shared by too many triangles
  976. delete [] numOccurrances;
  977. CopyN(outputIndices, indices, nTriangles*3);
  978. return;
  979. }
  980. numOccurrances[v]++;
  981. }
  982. // Find the offsets into the adjacency array for each vertex
  983. int sum=0;
  984. ArrayIndexType *offsets=new ArrayIndexType[nVertices+1];
  985. int maxAdjacency=0;
  986. for(int i=0; i<nVertices; i++) {
  987. offsets[i]=sum;
  988. sum+=numOccurrances[i];
  989. if(numOccurrances[i]>maxAdjacency)
  990. maxAdjacency=numOccurrances[i];
  991. numOccurrances[i]=0;
  992. }
  993. offsets[nVertices]=sum;
  994. // Add the triangle indices to the vertices it refers to
  995. TriangleIndexType *adjacency=new TriangleIndexType[3*nTriangles];
  996. for(int i=0; i<nTriangles; i++) {
  997. const VertexIndexType *vptr=&indices[3*i];
  998. adjacency[offsets[vptr[0]] + numOccurrances[vptr[0]]]=i;
  999. numOccurrances[vptr[0]]++;
  1000. adjacency[offsets[vptr[1]] + numOccurrances[vptr[1]]]=i;
  1001. numOccurrances[vptr[1]]++;
  1002. adjacency[offsets[vptr[2]] + numOccurrances[vptr[2]]]=i;
  1003. numOccurrances[vptr[2]]++;
  1004. }
  1005. // Per-vertex live triangle counts
  1006. AdjacencyType *liveTriangles=numOccurrances;
  1007. // Per-vertex caching time stamps
  1008. ArrayIndexType *cacheTime=new ArrayIndexType[nVertices];
  1009. memset(cacheTime, 0, sizeof(ArrayIndexType)*nVertices);
  1010. // Dead-end vertex stack
  1011. VertexIndexType *deadEndStack=new VertexIndexType[DEAD_END_STACK_SIZE];
  1012. memset(deadEndStack, 0, sizeof(VertexIndexType)*DEAD_END_STACK_SIZE);
  1013. int deadEndStackPos=0;
  1014. int deadEndStackStart=0;
  1015. // Per triangle emitted flag
  1016. Byte *emitted=new Byte[(nTriangles + 7)/8];
  1017. memset(emitted, 0, sizeof(Byte)*((nTriangles + 7)/8));
  1018. // Empty output buffer
  1019. TriangleIndexType *outputTriangles=new TriangleIndexType[nTriangles];
  1020. int outputPos=0;
  1021. // Arbitrary starting vertex
  1022. int f=0;
  1023. // Time stamp and cursor
  1024. int s=k + 1;
  1025. int i=0;
  1026. VertexIndexType *nextCandidates=new VertexIndexType[3*maxAdjacency];
  1027. // For all valid fanning vertices
  1028. while (f>=0) {
  1029. // 1-ring of next candidates
  1030. int numNextCandidates=0;
  1031. int startOffset=offsets[f];
  1032. int endOffset=offsets[f+1];
  1033. for(int offset=startOffset; offset<endOffset; offset++) {
  1034. int t=adjacency[offset];
  1035. if(!ISEMITTED(t)) {
  1036. const VertexIndexType *vptr=&indices[3*t];
  1037. // Output triangle
  1038. outputTriangles[outputPos++]=t;
  1039. for(int j=0; j<3; j++) {
  1040. int v=vptr[j];
  1041. // Add to dead-end stack
  1042. deadEndStack[(deadEndStackPos++) & DEAD_END_STACK_MASK]=v;
  1043. if((deadEndStackPos & DEAD_END_STACK_MASK) ==
  1044. (deadEndStackStart & DEAD_END_STACK_MASK))
  1045. deadEndStackStart=(deadEndStackStart + 1) & DEAD_END_STACK_MASK;
  1046. // Register as candidate
  1047. nextCandidates[numNextCandidates++]=v;
  1048. // Decrease live triangle count
  1049. liveTriangles[v]--;
  1050. // If not in cache
  1051. if(s-cacheTime[v]>k) {
  1052. // Set time stamp
  1053. cacheTime[v]=s;
  1054. // Increment time stamp
  1055. s++;
  1056. }
  1057. }
  1058. // Flag triangle as emitted
  1059. SETEMITTED(t);
  1060. }
  1061. }
  1062. // Select next fanning vertex
  1063. f=getNextVertex(nVertices, i, k, nextCandidates,
  1064. numNextCandidates, cacheTime, s,
  1065. liveTriangles, deadEndStack,
  1066. deadEndStackPos, deadEndStackStart);
  1067. }
  1068. // Clean up
  1069. delete [] nextCandidates;
  1070. delete [] emitted;
  1071. delete [] deadEndStack;
  1072. delete [] cacheTime;
  1073. delete [] adjacency;
  1074. delete [] offsets;
  1075. delete [] numOccurrances;
  1076. // Convert the triangle index array into a full triangle list
  1077. outputPos=0;
  1078. for(int i=0; i<nTriangles; i++) {
  1079. int t=outputTriangles[i];
  1080. for(int j=0; j<3; j++) {
  1081. int v=indices[3*t + j];
  1082. outputIndices[outputPos++]=v;
  1083. }
  1084. }
  1085. delete [] outputTriangles;
  1086. }
  1087. } // Tipsify
  1088. #endif
  1089. /******************************************************************************/
  1090. namespace EE{
  1091. /******************************************************************************/
  1092. MeshBase& MeshBase::optimizeCache(Bool faces, Bool vertexes)
  1093. {
  1094. Memt<Int> remap;
  1095. // face reorder
  1096. if(faces)
  1097. {
  1098. quadToTri().exclude(VTX_DUP|ADJ_ALL);
  1099. #if METHOD==METHOD_DIRECTX
  1100. setAdjacencies(true, false); ASSERT(Int(UNUSED32)==-1); // 'setAdjacencies' sets -1 for invalid while these codes operate on UNUSED32
  1101. ASSERT(SIZE(tri.ind (0))==SIZE(uint32_t)*3);
  1102. ASSERT(SIZE(tri.adjFace(0))==SIZE(uint32_t)*3);
  1103. remap.reserve(Max(tris(), vertexes ? vtxs() : 0)); ASSERT(SIZE(Int)==SIZE(uint32_t));
  1104. remap.setNum(tris());
  1105. if(DirectX::OptimizeFaces((uint32_t*)tri.ind(), tris(), (uint32_t*)tri.adjFace(), (uint32_t*)remap.data(), VTX_CACHE_SIZE, VTX_CACHE_SIZE))
  1106. {
  1107. MeshBase temp(0, 0, remap.elms(), 0, TRI_IND);
  1108. REPA(remap)temp.tri.ind(i)=tri.ind(remap[i]);
  1109. Swap(temp.tri, tri);
  1110. }
  1111. #else
  1112. MeshBase temp(0, 0, tris(), 0, TRI_IND);
  1113. #if METHOD==METHOD_FORSYTH
  1114. Forsyth::OptimizeFaces(tri.ind()->c, tris()*3, vtxs(), temp.tri.ind()->c, VTX_CACHE_SIZE);
  1115. #elif METHOD==METHOD_FORSYTH_SORTED
  1116. ForsythSorted::OptimizeFaces<Int>(tri.ind()->c, tris()*3, temp.tri.ind()->c, VTX_CACHE_SIZE);
  1117. #elif METHOD==METHOD_FORSYTH2
  1118. Forsyth2::reorderForsyth(tri.ind()->c, tris(), vtxs(), temp.tri.ind()->c);
  1119. #elif METHOD==METHOD_TIPSIFY
  1120. Tipsify::tipsify(tri.ind()->c, tris(), vtxs(), temp.tri.ind()->c);
  1121. #endif
  1122. Swap(temp.tri, tri);
  1123. #endif
  1124. }
  1125. // vtx reorder
  1126. if(vertexes)
  1127. {
  1128. remap.setNum(vtxs());
  1129. SetMemN(remap.data(), 0xFF, remap.elms());
  1130. Int used=0,
  1131. *p= tri.ind()->c; REP( tris()*3){Int v=*p++; if(!InRange(v, remap))return T; if(remap[v]<0)remap[v]=used++;}
  1132. p=quad.ind()->c; REP(quads()*4){Int v=*p++; if(!InRange(v, remap))return T; if(remap[v]<0)remap[v]=used++;}
  1133. vertexes=false; FREP(vtxs()){Int &v=remap[i]; if(v<0)v=used++; if(v!=i)vertexes=true;}
  1134. if(vertexes)
  1135. {
  1136. MeshBase temp(remap.elms(), 0, 0, 0, flag()&VTX_ALL);
  1137. REPA(temp.vtx)copyVtx(i, temp, remap[i]);
  1138. Swap(temp.vtx, vtx);
  1139. REPA(tri )tri .ind(i).remap(remap.data());
  1140. REPA(quad)quad.ind(i).remap(remap.data());
  1141. }
  1142. }
  1143. return T;
  1144. }
  1145. /******************************************************************************/
  1146. MeshRender& MeshRender::optimize(Bool faces, Bool vertexes)
  1147. {
  1148. Memt<Int> remap;
  1149. Ptr ind=null;
  1150. // face reorder
  1151. #if METHOD!=METHOD_TEST
  1152. if(faces)
  1153. #endif
  1154. {
  1155. #if METHOD==METHOD_DIRECTX
  1156. MeshBase temp; if(temp.createInd(_ib))
  1157. {
  1158. temp.vtx._elms=vtxs(); // this is needed for 'setAdjacencies'
  1159. temp.setAdjacencies(true, false);
  1160. remap.reserve(Max(tris(), vertexes ? vtxs() : 0));
  1161. remap.setNum(tris());
  1162. Bool ok=false;
  1163. if(_bone_split)
  1164. {
  1165. Int processed_tris=0; Memt<uint32_t> attributes; attributes.setNum(tris());
  1166. FREP(_bone_splits)
  1167. {
  1168. BoneSplit &bs=_bone_split[i];
  1169. REPD(t, bs.tris)attributes[processed_tris++]=i;
  1170. }
  1171. ok=DirectX::OptimizeFacesEx((uint32_t*)temp.tri.ind(), tris(), (uint32_t*)temp.tri.adjFace(), attributes.data(), (uint32_t*)remap.data(), VTX_CACHE_SIZE, VTX_CACHE_SIZE);
  1172. }else
  1173. {
  1174. ok=DirectX::OptimizeFaces((uint32_t*)temp.tri.ind(), tris(), (uint32_t*)temp.tri.adjFace(), (uint32_t*)remap.data(), VTX_CACHE_SIZE, VTX_CACHE_SIZE);
  1175. }
  1176. if(ok)if(ind=indLock(LOCK_WRITE))
  1177. {
  1178. if(indBit16()){VecUS *tri=(VecUS*)ind; REP(tris())tri[i]=temp.tri.ind(remap[i]);}
  1179. else {VecI *tri=(VecI *)ind; REP(tris())tri[i]=temp.tri.ind(remap[i]);}
  1180. }
  1181. }
  1182. #else
  1183. if(ind=indLock())
  1184. {
  1185. Memt<VecI> temp; temp.setNum(tris()*2);
  1186. VecI *org=temp.data(), *reordered=temp.data()+tris();
  1187. if(indBit16())Copy16To32(org, ind, tris()*3);
  1188. else Copy32To32(org, ind, tris()*3);
  1189. if(_bone_split)
  1190. {
  1191. Int processed_tris=0;
  1192. FREP(_bone_splits)
  1193. {
  1194. BoneSplit &bs=_bone_split[i];
  1195. #if METHOD==METHOD_FORSYTH
  1196. Forsyth::OptimizeFaces((org+processed_tris)->c, bs.tris*3, vtxs(), (reordered+processed_tris)->c, VTX_CACHE_SIZE);
  1197. #elif METHOD==METHOD_FORSYTH_SORTED
  1198. ForsythSorted::OptimizeFaces<Int>((org+processed_tris)->c, bs.tris*3, (reordered+processed_tris)->c, VTX_CACHE_SIZE);
  1199. #elif METHOD==METHOD_FORSYTH2
  1200. Forsyth2::reorderForsyth((org+processed_tris)->c, bs.tris, vtxs(), (reordered+processed_tris)->c);
  1201. #elif METHOD==METHOD_TIPSIFY
  1202. Tipsify::tipsify((org+processed_tris)->c, bs.tris, vtxs(), (reordered+processed_tris)->c);
  1203. #endif
  1204. processed_tris+=bs.tris;
  1205. }
  1206. }else
  1207. {
  1208. #if METHOD==METHOD_FORSYTH
  1209. Forsyth::OptimizeFaces(org->c, tris()*3, vtxs(), reordered->c, VTX_CACHE_SIZE);
  1210. #elif METHOD==METHOD_FORSYTH_SORTED
  1211. ForsythSorted::OptimizeFaces<Int>(org->c, tris()*3, reordered->c, VTX_CACHE_SIZE);
  1212. #elif METHOD==METHOD_FORSYTH2
  1213. Forsyth2::reorderForsyth(org->c, tris(), vtxs(), reordered->c);
  1214. #elif METHOD==METHOD_TIPSIFY
  1215. Tipsify::tipsify(org->c, tris(), vtxs(), reordered->c);
  1216. #elif METHOD==METHOD_TEST
  1217. //if(faces)
  1218. Forsyth::OptimizeFaces(org->c, tris()*3, vtxs(), reordered->c, VTX_CACHE_SIZE);
  1219. //else
  1220. //ForsythSorted::OptimizeFaces<Int>(org->c, tris()*3, reordered->c, VTX_CACHE_SIZE);
  1221. //Forsyth2::reorderForsyth(org->c, tris(), vtxs(), reordered->c);
  1222. //Tipsify::tipsify(org->c, tris(), vtxs(), reordered->c);
  1223. #endif
  1224. }
  1225. if(indBit16())Copy32To16(ind, reordered, tris()*3);
  1226. else Copy32To32(ind, reordered, tris()*3);
  1227. }
  1228. #endif
  1229. }
  1230. // vtx reorder
  1231. if(vertexes)
  1232. {
  1233. if(!ind)ind=indLock();
  1234. if( ind)
  1235. {
  1236. remap.setNum(vtxs());
  1237. SetMemN(remap.data(), 0xFF, remap.elms());
  1238. Int used=0; // iterate all triangle indexes in order, and set new indexes for the vertexes, so they are listed in order as they are used by the triangles
  1239. if(indBit16()){U16 *p=(U16*)ind; REP(tris()*3){Int v=*p++; if(!InRange(v, remap)){vertexes=false; break;} if(remap[v]<0)remap[v]=used++;}}
  1240. else {U32 *p=(U32*)ind; REP(tris()*3){Int v=*p++; if(!InRange(v, remap)){vertexes=false; break;} if(remap[v]<0)remap[v]=used++;}}
  1241. if(vertexes)
  1242. {
  1243. vertexes=false; FREP(vtxs()){Int &v=remap[i]; if(v<0)v=used++; if(v!=i)vertexes=true;}
  1244. if(vertexes)if(Byte *vtx=vtxLock())
  1245. {
  1246. Memt<Byte> vtx_temp; vtx_temp.setNum(vtxs()*vtxSize());
  1247. REP(vtxs())CopyFast(vtx_temp.data() + remap[i]*vtxSize(), vtx + i*vtxSize(), vtxSize());
  1248. CopyFast(vtx, vtx_temp.data(), vtx_temp.elms());
  1249. vtxUnlock();
  1250. if(indBit16()){VecUS *tri=(VecUS*)ind; REP(tris())tri[i].remap(remap.data());}
  1251. else {VecI *tri=(VecI *)ind; REP(tris())tri[i].remap(remap.data());}
  1252. }
  1253. }
  1254. }
  1255. }
  1256. if(ind)indUnlock();
  1257. return T;
  1258. }
  1259. /******************************************************************************/
  1260. void InitMesh()
  1261. {
  1262. #if METHOD==METHOD_FORSYTH || METHOD==METHOD_TEST
  1263. Forsyth::ComputeVertexScores();
  1264. #endif
  1265. #if METHOD==METHOD_FORSYTH_SORTED || METHOD==METHOD_TEST
  1266. ForsythSorted::ComputeVertexScores();
  1267. #endif
  1268. #if METHOD==METHOD_FORSYTH2 || METHOD==METHOD_TEST
  1269. Forsyth2::initForsyth();
  1270. #endif
  1271. }
  1272. /******************************************************************************/
  1273. }
  1274. /******************************************************************************/