|
@@ -7,6 +7,26 @@ Copyright (c) 2006-2022, assimp team
|
|
|
|
|
|
|
|
|
|
|
|
+All rights reserved.
|
|
|
+
|
|
|
+Redistribution and use of this software in source and binary forms,
|
|
|
+with or without modification, are permitted provided that the following
|
|
|
+conditions are met:
|
|
|
+
|
|
|
+* Redistributions of source code must retain the above
|
|
|
+ copyright notice, this list of conditions and the
|
|
|
+ following disclaimer.
|
|
|
+
|
|
|
+* Redistributions in binary form must reproduce the above
|
|
|
+ copyright notice, this list of conditions and the
|
|
|
+ following disclaimer in the documentation and/or other
|
|
|
+ materials provided with the distribution./*
|
|
|
+---------------------------------------------------------------------------
|
|
|
+Open Asset Import Library (assimp)
|
|
|
+---------------------------------------------------------------------------
|
|
|
+
|
|
|
+Copyright (c) 2006-2023, assimp team
|
|
|
+
|
|
|
All rights reserved.
|
|
|
|
|
|
Redistribution and use of this software in source and binary forms,
|
|
@@ -59,31 +79,31 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
#include <stdio.h>
|
|
|
#include <stack>
|
|
|
|
|
|
-using namespace Assimp;
|
|
|
+namespace Assimp {
|
|
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
|
// Constructor to be privately used by Importer
|
|
|
-ImproveCacheLocalityProcess::ImproveCacheLocalityProcess()
|
|
|
-: mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
|
|
|
+ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() :
|
|
|
+ mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
|
|
|
// empty
|
|
|
}
|
|
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
|
// Returns whether the processing step is present in the given flag field.
|
|
|
-bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const {
|
|
|
+bool ImproveCacheLocalityProcess::IsActive(unsigned int pFlags) const {
|
|
|
return (pFlags & aiProcess_ImproveCacheLocality) != 0;
|
|
|
}
|
|
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
|
// Setup configuration
|
|
|
-void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) {
|
|
|
+void ImproveCacheLocalityProcess::SetupProperties(const Importer *pImp) {
|
|
|
// AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
|
|
|
- mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
|
|
|
+ mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE, PP_ICL_PTCACHE_SIZE);
|
|
|
}
|
|
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
|
// Executes the post processing step on the given imported data.
|
|
|
-void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
|
|
|
+void ImproveCacheLocalityProcess::Execute(aiScene *pScene) {
|
|
|
if (!pScene->mNumMeshes) {
|
|
|
ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes");
|
|
|
return;
|
|
@@ -93,11 +113,11 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
|
|
|
|
|
|
float out = 0.f;
|
|
|
unsigned int numf = 0, numm = 0;
|
|
|
- for( unsigned int a = 0; a < pScene->mNumMeshes; ++a ){
|
|
|
- const float res = ProcessMesh( pScene->mMeshes[a],a);
|
|
|
+ for (unsigned int a = 0; a < pScene->mNumMeshes; ++a) {
|
|
|
+ const float res = ProcessMesh(pScene->mMeshes[a], a);
|
|
|
if (res) {
|
|
|
numf += pScene->mMeshes[a]->mNumFaces;
|
|
|
- out += res;
|
|
|
+ out += res;
|
|
|
++numm;
|
|
|
}
|
|
|
}
|
|
@@ -109,9 +129,54 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+// ------------------------------------------------------------------------------------------------
|
|
|
+static ai_real calculateInputACMR(aiMesh *pMesh, const aiFace *const pcEnd,
|
|
|
+ unsigned int configCacheDepth, unsigned int meshNum) {
|
|
|
+ ai_real fACMR = 0.0f;
|
|
|
+ unsigned int *piFIFOStack = new unsigned int[configCacheDepth];
|
|
|
+ memset(piFIFOStack, 0xff, configCacheDepth * sizeof(unsigned int));
|
|
|
+ unsigned int *piCur = piFIFOStack;
|
|
|
+ const unsigned int *const piCurEnd = piFIFOStack + configCacheDepth;
|
|
|
+
|
|
|
+ // count the number of cache misses
|
|
|
+ unsigned int iCacheMisses = 0;
|
|
|
+ for (const aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
|
|
|
+ for (unsigned int qq = 0; qq < 3; ++qq) {
|
|
|
+ bool bInCache = false;
|
|
|
+ for (unsigned int *pp = piFIFOStack; pp < piCurEnd; ++pp) {
|
|
|
+ if (*pp == pcFace->mIndices[qq]) {
|
|
|
+ // the vertex is in cache
|
|
|
+ bInCache = true;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!bInCache) {
|
|
|
+ ++iCacheMisses;
|
|
|
+ if (piCurEnd == piCur) {
|
|
|
+ piCur = piFIFOStack;
|
|
|
+ }
|
|
|
+ *piCur++ = pcFace->mIndices[qq];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ delete[] piFIFOStack;
|
|
|
+ fACMR = (ai_real)iCacheMisses / pMesh->mNumFaces;
|
|
|
+ if (3.0 == fACMR) {
|
|
|
+ char szBuff[128]; // should be sufficiently large in every case
|
|
|
+
|
|
|
+ // the JoinIdenticalVertices process has not been executed on this
|
|
|
+ // mesh, otherwise this value would normally be at least minimally
|
|
|
+ // smaller than 3.0 ...
|
|
|
+ ai_snprintf(szBuff, 128, "Mesh %u: Not suitable for vcache optimization", meshNum);
|
|
|
+ ASSIMP_LOG_WARN(szBuff);
|
|
|
+ return static_cast<ai_real>(0.f);
|
|
|
+ }
|
|
|
+ return fACMR;
|
|
|
+}
|
|
|
+
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
|
// Improves the cache coherency of a specific mesh
|
|
|
-ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) {
|
|
|
+ai_real ImproveCacheLocalityProcess::ProcessMesh(aiMesh *pMesh, unsigned int meshNum) {
|
|
|
// TODO: rewrite this to use std::vector or boost::shared_array
|
|
|
ai_assert(nullptr != pMesh);
|
|
|
|
|
@@ -126,91 +191,57 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
|
|
|
return static_cast<ai_real>(0.f);
|
|
|
}
|
|
|
|
|
|
- if(pMesh->mNumVertices <= mConfigCacheDepth) {
|
|
|
+ if (pMesh->mNumVertices <= mConfigCacheDepth) {
|
|
|
return static_cast<ai_real>(0.f);
|
|
|
}
|
|
|
|
|
|
ai_real fACMR = 3.f;
|
|
|
- const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
|
|
|
+ const aiFace *const pcEnd = pMesh->mFaces + pMesh->mNumFaces;
|
|
|
|
|
|
// Input ACMR is for logging purposes only
|
|
|
- if (!DefaultLogger::isNullLogger()) {
|
|
|
-
|
|
|
- unsigned int* piFIFOStack = new unsigned int[mConfigCacheDepth];
|
|
|
- memset(piFIFOStack,0xff,mConfigCacheDepth*sizeof(unsigned int));
|
|
|
- unsigned int* piCur = piFIFOStack;
|
|
|
- const unsigned int* const piCurEnd = piFIFOStack + mConfigCacheDepth;
|
|
|
-
|
|
|
- // count the number of cache misses
|
|
|
- unsigned int iCacheMisses = 0;
|
|
|
- for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
|
|
|
- for (unsigned int qq = 0; qq < 3;++qq) {
|
|
|
- bool bInCache = false;
|
|
|
- for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
|
|
|
- if (*pp == pcFace->mIndices[qq]) {
|
|
|
- // the vertex is in cache
|
|
|
- bInCache = true;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- if (!bInCache) {
|
|
|
- ++iCacheMisses;
|
|
|
- if (piCurEnd == piCur) {
|
|
|
- piCur = piFIFOStack;
|
|
|
- }
|
|
|
- *piCur++ = pcFace->mIndices[qq];
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- delete[] piFIFOStack;
|
|
|
- fACMR = (ai_real) iCacheMisses / pMesh->mNumFaces;
|
|
|
- if (3.0 == fACMR) {
|
|
|
- char szBuff[128]; // should be sufficiently large in every case
|
|
|
-
|
|
|
- // the JoinIdenticalVertices process has not been executed on this
|
|
|
- // mesh, otherwise this value would normally be at least minimally
|
|
|
- // smaller than 3.0 ...
|
|
|
- ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
|
|
|
- ASSIMP_LOG_WARN(szBuff);
|
|
|
- return static_cast<ai_real>(0.f);
|
|
|
- }
|
|
|
+ if (!DefaultLogger::isNullLogger()) {
|
|
|
+ fACMR = calculateInputACMR(pMesh, pcEnd, mConfigCacheDepth, meshNum);
|
|
|
}
|
|
|
|
|
|
// first we need to build a vertex-triangle adjacency list
|
|
|
- VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
|
|
|
+ VertexTriangleAdjacency adj(pMesh->mFaces, pMesh->mNumFaces, pMesh->mNumVertices, true);
|
|
|
|
|
|
// build a list to store per-vertex caching time stamps
|
|
|
- unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
|
|
|
- memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
|
|
|
+ std::vector<unsigned int> piCachingStamps;
|
|
|
+ piCachingStamps.resize(pMesh->mNumVertices);
|
|
|
+ memset(&piCachingStamps[0], 0x0, pMesh->mNumVertices * sizeof(unsigned int));
|
|
|
|
|
|
// allocate an empty output index buffer. We store the output indices in one large array.
|
|
|
// Since the number of triangles won't change the input faces can be reused. This is how
|
|
|
// we save thousands of redundant mini allocations for aiFace::mIndices
|
|
|
- const unsigned int iIdxCnt = pMesh->mNumFaces*3;
|
|
|
- unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
|
|
|
- unsigned int* piCSIter = piIBOutput;
|
|
|
+ const unsigned int iIdxCnt = pMesh->mNumFaces * 3;
|
|
|
+ std::vector<unsigned int> piIBOutput;
|
|
|
+ piIBOutput.resize(iIdxCnt);
|
|
|
+ std::vector<unsigned int>::iterator piCSIter = piIBOutput.begin();
|
|
|
|
|
|
// allocate the flag array to hold the information
|
|
|
// whether a face has already been emitted or not
|
|
|
- std::vector<bool> abEmitted(pMesh->mNumFaces,false);
|
|
|
+ std::vector<bool> abEmitted(pMesh->mNumFaces, false);
|
|
|
|
|
|
// dead-end vertex index stack
|
|
|
- std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
|
|
|
+ std::stack<unsigned int, std::vector<unsigned int>> sDeadEndVStack;
|
|
|
|
|
|
// create a copy of the piNumTriPtr buffer
|
|
|
- unsigned int* const piNumTriPtr = adj.mLiveTriangles;
|
|
|
+ unsigned int *const piNumTriPtr = adj.mLiveTriangles;
|
|
|
const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
|
|
|
|
|
|
// get the largest number of referenced triangles and allocate the "candidate buffer"
|
|
|
- unsigned int iMaxRefTris = 0; {
|
|
|
- const unsigned int* piCur = adj.mLiveTriangles;
|
|
|
- const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
|
|
|
- for (;piCur != piCurEnd;++piCur) {
|
|
|
- iMaxRefTris = std::max(iMaxRefTris,*piCur);
|
|
|
+ unsigned int iMaxRefTris = 0;
|
|
|
+ {
|
|
|
+ const unsigned int *piCur = adj.mLiveTriangles;
|
|
|
+ const unsigned int *const piCurEnd = adj.mLiveTriangles + pMesh->mNumVertices;
|
|
|
+ for (; piCur != piCurEnd; ++piCur) {
|
|
|
+ iMaxRefTris = std::max(iMaxRefTris, *piCur);
|
|
|
}
|
|
|
}
|
|
|
ai_assert(iMaxRefTris > 0);
|
|
|
- unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
|
|
|
+ std::vector<unsigned int> piCandidates;
|
|
|
+ piCandidates.resize(iMaxRefTris * 3);
|
|
|
unsigned int iCacheMisses = 0;
|
|
|
|
|
|
// ...................................................................................
|
|
@@ -245,23 +276,23 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
|
|
|
|
|
|
int ivdx = 0;
|
|
|
int ics = 1;
|
|
|
- int iStampCnt = mConfigCacheDepth+1;
|
|
|
- while (ivdx >= 0) {
|
|
|
+ int iStampCnt = mConfigCacheDepth + 1;
|
|
|
+ while (ivdx >= 0) {
|
|
|
|
|
|
unsigned int icnt = piNumTriPtrNoModify[ivdx];
|
|
|
- unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
|
|
|
- unsigned int* piCurCandidate = piCandidates;
|
|
|
+ unsigned int *piList = adj.GetAdjacentTriangles(ivdx);
|
|
|
+ std::vector<unsigned int>::iterator piCurCandidate = piCandidates.begin();
|
|
|
|
|
|
// get all triangles in the neighborhood
|
|
|
- for (unsigned int tri = 0; tri < icnt;++tri) {
|
|
|
+ for (unsigned int tri = 0; tri < icnt; ++tri) {
|
|
|
|
|
|
// if they have not yet been emitted, add them to the output IB
|
|
|
const unsigned int fidx = *piList++;
|
|
|
- if (!abEmitted[fidx]) {
|
|
|
+ if (!abEmitted[fidx]) {
|
|
|
|
|
|
// so iterate through all vertices of the current triangle
|
|
|
- const aiFace* pcFace = &pMesh->mFaces[ fidx ];
|
|
|
- unsigned nind = pcFace->mNumIndices;
|
|
|
+ const aiFace *pcFace = &pMesh->mFaces[fidx];
|
|
|
+ const unsigned nind = pcFace->mNumIndices;
|
|
|
for (unsigned ind = 0; ind < nind; ind++) {
|
|
|
unsigned dp = pcFace->mIndices[ind];
|
|
|
|
|
@@ -281,7 +312,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
|
|
|
*piCSIter++ = dp;
|
|
|
|
|
|
// if the vertex is not yet in cache, set its cache count
|
|
|
- if (iStampCnt-piCachingStamps[dp] > mConfigCacheDepth) {
|
|
|
+ if (iStampCnt - piCachingStamps[dp] > mConfigCacheDepth) {
|
|
|
piCachingStamps[dp] = iStampCnt++;
|
|
|
++iCacheMisses;
|
|
|
}
|
|
@@ -297,16 +328,16 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
|
|
|
// get next fanning vertex
|
|
|
ivdx = -1;
|
|
|
int max_priority = -1;
|
|
|
- for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
|
|
|
+ for (std::vector<unsigned int>::iterator piCur = piCandidates.begin(); piCur != piCurCandidate; ++piCur) {
|
|
|
const unsigned int dp = *piCur;
|
|
|
|
|
|
// must have live triangles
|
|
|
- if (piNumTriPtr[dp] > 0) {
|
|
|
+ if (piNumTriPtr[dp] > 0) {
|
|
|
int priority = 0;
|
|
|
|
|
|
// will the vertex be in cache, even after fanning occurs?
|
|
|
unsigned int tmp;
|
|
|
- if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= mConfigCacheDepth) {
|
|
|
+ if ((tmp = iStampCnt - piCachingStamps[dp]) + 2 * piNumTriPtr[dp] <= mConfigCacheDepth) {
|
|
|
priority = tmp;
|
|
|
}
|
|
|
|
|
@@ -324,7 +355,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
|
|
|
while (!sDeadEndVStack.empty()) {
|
|
|
unsigned int iCachedIdx = sDeadEndVStack.top();
|
|
|
sDeadEndVStack.pop();
|
|
|
- if (piNumTriPtr[ iCachedIdx ] > 0) {
|
|
|
+ if (piNumTriPtr[iCachedIdx] > 0) {
|
|
|
ivdx = iCachedIdx;
|
|
|
break;
|
|
|
}
|
|
@@ -333,9 +364,9 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
|
|
|
if (-1 == ivdx) {
|
|
|
// well, there isn't such a vertex. Simply get the next vertex in input order and
|
|
|
// hope it is not too bad ...
|
|
|
- while (ics < (int)pMesh->mNumVertices) {
|
|
|
+ while (ics < (int)pMesh->mNumVertices) {
|
|
|
++ics;
|
|
|
- if (piNumTriPtr[ics] > 0) {
|
|
|
+ if (piNumTriPtr[ics] > 0) {
|
|
|
ivdx = ics;
|
|
|
break;
|
|
|
}
|
|
@@ -345,29 +376,29 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
|
|
|
}
|
|
|
ai_real fACMR2 = 0.0f;
|
|
|
if (!DefaultLogger::isNullLogger()) {
|
|
|
- fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
|
|
|
-
|
|
|
+ fACMR2 = static_cast<ai_real>(iCacheMisses / pMesh->mNumFaces);
|
|
|
+ const ai_real averageACMR = ((fACMR - fACMR2) / fACMR) * 100.f;
|
|
|
// very intense verbose logging ... prepare for much text if there are many meshes
|
|
|
- if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
|
|
|
- ASSIMP_LOG_VERBOSE_DEBUG("Mesh %u | ACMR in: ", meshNum, " out: ", fACMR, " | ~", fACMR2, ((fACMR - fACMR2) / fACMR) * 100.f);
|
|
|
+ if (DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
|
|
|
+ ASSIMP_LOG_VERBOSE_DEBUG("Mesh ", meshNum, "| ACMR in: ", fACMR, " out: ", fACMR2, " | average ACMR ", averageACMR);
|
|
|
}
|
|
|
-
|
|
|
fACMR2 *= pMesh->mNumFaces;
|
|
|
}
|
|
|
+
|
|
|
// sort the output index buffer back to the input array
|
|
|
- piCSIter = piIBOutput;
|
|
|
- for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
|
|
|
+ piCSIter = piIBOutput.begin();
|
|
|
+ for (aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
|
|
|
unsigned nind = pcFace->mNumIndices;
|
|
|
- unsigned * ind = pcFace->mIndices;
|
|
|
- if (nind > 0) ind[0] = *piCSIter++;
|
|
|
- if (nind > 1) ind[1] = *piCSIter++;
|
|
|
- if (nind > 2) ind[2] = *piCSIter++;
|
|
|
+ unsigned *ind = pcFace->mIndices;
|
|
|
+ if (nind > 0)
|
|
|
+ ind[0] = *piCSIter++;
|
|
|
+ if (nind > 1)
|
|
|
+ ind[1] = *piCSIter++;
|
|
|
+ if (nind > 2)
|
|
|
+ ind[2] = *piCSIter++;
|
|
|
}
|
|
|
|
|
|
- // delete temporary storage
|
|
|
- delete[] piCachingStamps;
|
|
|
- delete[] piIBOutput;
|
|
|
- delete[] piCandidates;
|
|
|
-
|
|
|
return fACMR2;
|
|
|
}
|
|
|
+
|
|
|
+} // namespace Assimp
|