преди 1 година · a531c72f7f
--- a/code/PostProcessing/ImproveCacheLocality.cpp
+++ b/code/PostProcessing/ImproveCacheLocality.cpp
@@ -7,6 +7,26 @@ Copyright (c) 2006-2022, assimp team
 
				 
			
 
				 
			
 
				 
			
 
				+All rights reserved.
			
 
				+
			
 
				+Redistribution and use of this software in source and binary forms,
			
 
				+with or without modification, are permitted provided that the following
			
 
				+conditions are met:
			
 
				+
			
 
				+* Redistributions of source code must retain the above
			
 
				+  copyright notice, this list of conditions and the
			
 
				+  following disclaimer.
			
 
				+
			
 
				+* Redistributions in binary form must reproduce the above
			
 
				+  copyright notice, this list of conditions and the
			
 
				+  following disclaimer in the documentation and/or other
			
 
				+  materials provided with the distribution./*
			
 
				+---------------------------------------------------------------------------
			
 
				+Open Asset Import Library (assimp)
			
 
				+---------------------------------------------------------------------------
			
 
				+
			
 
				+Copyright (c) 2006-2023, assimp team
			
 
				+
			
 
				 All rights reserved.
			
 
				 
			
 
				 Redistribution and use of this software in source and binary forms,
			
@@ -59,31 +79,31 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
				 #include <stdio.h>
			
 
				 #include <stack>
			
 
				 
			
 
				-using namespace Assimp;
			
 
				+namespace Assimp {
			
 
				 
			
 
				 // ------------------------------------------------------------------------------------------------
			
 
				 // Constructor to be privately used by Importer
			
 
				-ImproveCacheLocalityProcess::ImproveCacheLocalityProcess()
			
 
				-: mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
			
 
				+ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() :
			
 
				+        mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
			
 
				     // empty
			
 
				 }
			
 
				 
			
 
				 // ------------------------------------------------------------------------------------------------
			
 
				 // Returns whether the processing step is present in the given flag field.
			
 
				-bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const {
			
 
				+bool ImproveCacheLocalityProcess::IsActive(unsigned int pFlags) const {
			
 
				     return (pFlags & aiProcess_ImproveCacheLocality) != 0;
			
 
				 }
			
 
				 
			
 
				 // ------------------------------------------------------------------------------------------------
			
 
				 // Setup configuration
			
 
				-void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) {
			
 
				+void ImproveCacheLocalityProcess::SetupProperties(const Importer *pImp) {
			
 
				     // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
			
 
				-    mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
			
 
				+    mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE, PP_ICL_PTCACHE_SIZE);
			
 
				 }
			
 
				 
			
 
				 // ------------------------------------------------------------------------------------------------
			
 
				 // Executes the post processing step on the given imported data.
			
 
				-void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
			
 
				+void ImproveCacheLocalityProcess::Execute(aiScene *pScene) {
			
 
				     if (!pScene->mNumMeshes) {
			
 
				         ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes");
			
 
				         return;
			
@@ -93,11 +113,11 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
 
				 
			
 
				     float out = 0.f;
			
 
				     unsigned int numf = 0, numm = 0;
			
 
				-    for( unsigned int a = 0; a < pScene->mNumMeshes; ++a ){
			
 
				-        const float res = ProcessMesh( pScene->mMeshes[a],a);
			
 
				+    for (unsigned int a = 0; a < pScene->mNumMeshes; ++a) {
			
 
				+        const float res = ProcessMesh(pScene->mMeshes[a], a);
			
 
				         if (res) {
			
 
				             numf += pScene->mMeshes[a]->mNumFaces;
			
 
				-            out  += res;
			
 
				+            out += res;
			
 
				             ++numm;
			
 
				         }
			
 
				     }
			
@@ -109,9 +129,54 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
 
				     }
			
 
				 }
			
 
				 
			
 
				+// ------------------------------------------------------------------------------------------------
			
 
				+static ai_real calculateInputACMR(aiMesh *pMesh, const aiFace *const pcEnd,
			
 
				+        unsigned int configCacheDepth, unsigned int meshNum) {
			
 
				+    ai_real fACMR = 0.0f;
			
 
				+    unsigned int *piFIFOStack = new unsigned int[configCacheDepth];
			
 
				+    memset(piFIFOStack, 0xff, configCacheDepth * sizeof(unsigned int));
			
 
				+    unsigned int *piCur = piFIFOStack;
			
 
				+    const unsigned int *const piCurEnd = piFIFOStack + configCacheDepth;
			
 
				+
			
 
				+    // count the number of cache misses
			
 
				+    unsigned int iCacheMisses = 0;
			
 
				+    for (const aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
			
 
				+        for (unsigned int qq = 0; qq < 3; ++qq) {
			
 
				+            bool bInCache = false;
			
 
				+            for (unsigned int *pp = piFIFOStack; pp < piCurEnd; ++pp) {
			
 
				+                if (*pp == pcFace->mIndices[qq]) {
			
 
				+                    // the vertex is in cache
			
 
				+                    bInCache = true;
			
 
				+                    break;
			
 
				+                }
			
 
				+            }
			
 
				+            if (!bInCache) {
			
 
				+                ++iCacheMisses;
			
 
				+                if (piCurEnd == piCur) {
			
 
				+                    piCur = piFIFOStack;
			
 
				+                }
			
 
				+                *piCur++ = pcFace->mIndices[qq];
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    delete[] piFIFOStack;
			
 
				+    fACMR = (ai_real)iCacheMisses / pMesh->mNumFaces;
			
 
				+    if (3.0 == fACMR) {
			
 
				+        char szBuff[128]; // should be sufficiently large in every case
			
 
				+
			
 
				+        // the JoinIdenticalVertices process has not been executed on this
			
 
				+        // mesh, otherwise this value would normally be at least minimally
			
 
				+        // smaller than 3.0 ...
			
 
				+        ai_snprintf(szBuff, 128, "Mesh %u: Not suitable for vcache optimization", meshNum);
			
 
				+        ASSIMP_LOG_WARN(szBuff);
			
 
				+        return static_cast<ai_real>(0.f);
			
 
				+    }
			
 
				+    return fACMR;
			
 
				+}
			
 
				+
			
 
				 // ------------------------------------------------------------------------------------------------
			
 
				 // Improves the cache coherency of a specific mesh
			
 
				-ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) {
			
 
				+ai_real ImproveCacheLocalityProcess::ProcessMesh(aiMesh *pMesh, unsigned int meshNum) {
			
 
				     // TODO: rewrite this to use std::vector or boost::shared_array
			
 
				     ai_assert(nullptr != pMesh);
			
 
				 
			
@@ -126,91 +191,57 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
 
				         return static_cast<ai_real>(0.f);
			
 
				     }
			
 
				 
			
 
				-    if(pMesh->mNumVertices <= mConfigCacheDepth) {
			
 
				+    if (pMesh->mNumVertices <= mConfigCacheDepth) {
			
 
				         return static_cast<ai_real>(0.f);
			
 
				     }
			
 
				 
			
 
				     ai_real fACMR = 3.f;
			
 
				-    const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
			
 
				+    const aiFace *const pcEnd = pMesh->mFaces + pMesh->mNumFaces;
			
 
				 
			
 
				     // Input ACMR is for logging purposes only
			
 
				-    if (!DefaultLogger::isNullLogger())     {
			
 
				-
			
 
				-        unsigned int* piFIFOStack = new unsigned int[mConfigCacheDepth];
			
 
				-        memset(piFIFOStack,0xff,mConfigCacheDepth*sizeof(unsigned int));
			
 
				-        unsigned int* piCur = piFIFOStack;
			
 
				-        const unsigned int* const piCurEnd = piFIFOStack + mConfigCacheDepth;
			
 
				-
			
 
				-        // count the number of cache misses
			
 
				-        unsigned int iCacheMisses = 0;
			
 
				-        for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
			
 
				-            for (unsigned int qq = 0; qq < 3;++qq) {
			
 
				-                bool bInCache = false;
			
 
				-                for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
			
 
				-                    if (*pp == pcFace->mIndices[qq])    {
			
 
				-                        // the vertex is in cache
			
 
				-                        bInCache = true;
			
 
				-                        break;
			
 
				-                    }
			
 
				-                }
			
 
				-                if (!bInCache)  {
			
 
				-                    ++iCacheMisses;
			
 
				-                    if (piCurEnd == piCur) {
			
 
				-                        piCur = piFIFOStack;
			
 
				-                    }
			
 
				-                    *piCur++ = pcFace->mIndices[qq];
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-        delete[] piFIFOStack;
			
 
				-        fACMR = (ai_real) iCacheMisses / pMesh->mNumFaces;
			
 
				-        if (3.0 == fACMR)   {
			
 
				-            char szBuff[128]; // should be sufficiently large in every case
			
 
				-
			
 
				-            // the JoinIdenticalVertices process has not been executed on this
			
 
				-            // mesh, otherwise this value would normally be at least minimally
			
 
				-            // smaller than 3.0 ...
			
 
				-            ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
			
 
				-            ASSIMP_LOG_WARN(szBuff);
			
 
				-            return static_cast<ai_real>(0.f);
			
 
				-        }
			
 
				+    if (!DefaultLogger::isNullLogger()) {
			
 
				+        fACMR = calculateInputACMR(pMesh, pcEnd, mConfigCacheDepth, meshNum);
			
 
				     }
			
 
				 
			
 
				     // first we need to build a vertex-triangle adjacency list
			
 
				-    VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
			
 
				+    VertexTriangleAdjacency adj(pMesh->mFaces, pMesh->mNumFaces, pMesh->mNumVertices, true);
			
 
				 
			
 
				     // build a list to store per-vertex caching time stamps
			
 
				-    unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
			
 
				-    memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
			
 
				+    std::vector<unsigned int> piCachingStamps;
			
 
				+    piCachingStamps.resize(pMesh->mNumVertices);
			
 
				+    memset(&piCachingStamps[0], 0x0, pMesh->mNumVertices * sizeof(unsigned int));
			
 
				 
			
 
				     // allocate an empty output index buffer. We store the output indices in one large array.
			
 
				     // Since the number of triangles won't change the input faces can be reused. This is how
			
 
				     // we save thousands of redundant mini allocations for aiFace::mIndices
			
 
				-    const unsigned int iIdxCnt = pMesh->mNumFaces*3;
			
 
				-    unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
			
 
				-    unsigned int* piCSIter = piIBOutput;
			
 
				+    const unsigned int iIdxCnt = pMesh->mNumFaces * 3;
			
 
				+    std::vector<unsigned int> piIBOutput;
			
 
				+    piIBOutput.resize(iIdxCnt);
			
 
				+    std::vector<unsigned int>::iterator piCSIter = piIBOutput.begin();
			
 
				 
			
 
				     // allocate the flag array to hold the information
			
 
				     // whether a face has already been emitted or not
			
 
				-    std::vector<bool> abEmitted(pMesh->mNumFaces,false);
			
 
				+    std::vector<bool> abEmitted(pMesh->mNumFaces, false);
			
 
				 
			
 
				     // dead-end vertex index stack
			
 
				-    std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
			
 
				+    std::stack<unsigned int, std::vector<unsigned int>> sDeadEndVStack;
			
 
				 
			
 
				     // create a copy of the piNumTriPtr buffer
			
 
				-    unsigned int* const piNumTriPtr = adj.mLiveTriangles;
			
 
				+    unsigned int *const piNumTriPtr = adj.mLiveTriangles;
			
 
				     const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
			
 
				 
			
 
				     // get the largest number of referenced triangles and allocate the "candidate buffer"
			
 
				-    unsigned int iMaxRefTris = 0; {
			
 
				-        const unsigned int* piCur = adj.mLiveTriangles;
			
 
				-        const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
			
 
				-        for (;piCur != piCurEnd;++piCur) {
			
 
				-            iMaxRefTris = std::max(iMaxRefTris,*piCur);
			
 
				+    unsigned int iMaxRefTris = 0;
			
 
				+    {
			
 
				+        const unsigned int *piCur = adj.mLiveTriangles;
			
 
				+        const unsigned int *const piCurEnd = adj.mLiveTriangles + pMesh->mNumVertices;
			
 
				+        for (; piCur != piCurEnd; ++piCur) {
			
 
				+            iMaxRefTris = std::max(iMaxRefTris, *piCur);
			
 
				         }
			
 
				     }
			
 
				     ai_assert(iMaxRefTris > 0);
			
 
				-    unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
			
 
				+    std::vector<unsigned int> piCandidates;
			
 
				+    piCandidates.resize(iMaxRefTris * 3);
			
 
				     unsigned int iCacheMisses = 0;
			
 
				 
			
 
				     // ...................................................................................
			
@@ -245,23 +276,23 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
 
				 
			
 
				     int ivdx = 0;
			
 
				     int ics = 1;
			
 
				-    int iStampCnt = mConfigCacheDepth+1;
			
 
				-    while (ivdx >= 0)   {
			
 
				+    int iStampCnt = mConfigCacheDepth + 1;
			
 
				+    while (ivdx >= 0) {
			
 
				 
			
 
				         unsigned int icnt = piNumTriPtrNoModify[ivdx];
			
 
				-        unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
			
 
				-        unsigned int* piCurCandidate = piCandidates;
			
 
				+        unsigned int *piList = adj.GetAdjacentTriangles(ivdx);
			
 
				+        std::vector<unsigned int>::iterator piCurCandidate = piCandidates.begin();
			
 
				 
			
 
				         // get all triangles in the neighborhood
			
 
				-        for (unsigned int tri = 0; tri < icnt;++tri)    {
			
 
				+        for (unsigned int tri = 0; tri < icnt; ++tri) {
			
 
				 
			
 
				             // if they have not yet been emitted, add them to the output IB
			
 
				             const unsigned int fidx = *piList++;
			
 
				-            if (!abEmitted[fidx])   {
			
 
				+            if (!abEmitted[fidx]) {
			
 
				 
			
 
				                 // so iterate through all vertices of the current triangle
			
 
				-                const aiFace* pcFace = &pMesh->mFaces[ fidx ];
			
 
				-                unsigned nind = pcFace->mNumIndices;
			
 
				+                const aiFace *pcFace = &pMesh->mFaces[fidx];
			
 
				+                const unsigned nind = pcFace->mNumIndices;
			
 
				                 for (unsigned ind = 0; ind < nind; ind++) {
			
 
				                     unsigned dp = pcFace->mIndices[ind];
			
 
				 
			
@@ -281,7 +312,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
 
				                     *piCSIter++ = dp;
			
 
				 
			
 
				                     // if the vertex is not yet in cache, set its cache count
			
 
				-                    if (iStampCnt-piCachingStamps[dp] > mConfigCacheDepth) {
			
 
				+                    if (iStampCnt - piCachingStamps[dp] > mConfigCacheDepth) {
			
 
				                         piCachingStamps[dp] = iStampCnt++;
			
 
				                         ++iCacheMisses;
			
 
				                     }
			
@@ -297,16 +328,16 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
 
				         // get next fanning vertex
			
 
				         ivdx = -1;
			
 
				         int max_priority = -1;
			
 
				-        for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur)    {
			
 
				+        for (std::vector<unsigned int>::iterator piCur = piCandidates.begin(); piCur != piCurCandidate; ++piCur) {
			
 
				             const unsigned int dp = *piCur;
			
 
				 
			
 
				             // must have live triangles
			
 
				-            if (piNumTriPtr[dp] > 0)    {
			
 
				+            if (piNumTriPtr[dp] > 0) {
			
 
				                 int priority = 0;
			
 
				 
			
 
				                 // will the vertex be in cache, even after fanning occurs?
			
 
				                 unsigned int tmp;
			
 
				-                if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= mConfigCacheDepth) {
			
 
				+                if ((tmp = iStampCnt - piCachingStamps[dp]) + 2 * piNumTriPtr[dp] <= mConfigCacheDepth) {
			
 
				                     priority = tmp;
			
 
				                 }
			
 
				 
			
@@ -324,7 +355,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
 
				             while (!sDeadEndVStack.empty()) {
			
 
				                 unsigned int iCachedIdx = sDeadEndVStack.top();
			
 
				                 sDeadEndVStack.pop();
			
 
				-                if (piNumTriPtr[ iCachedIdx ] > 0)  {
			
 
				+                if (piNumTriPtr[iCachedIdx] > 0) {
			
 
				                     ivdx = iCachedIdx;
			
 
				                     break;
			
 
				                 }
			
@@ -333,9 +364,9 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
 
				             if (-1 == ivdx) {
			
 
				                 // well, there isn't such a vertex. Simply get the next vertex in input order and
			
 
				                 // hope it is not too bad ...
			
 
				-                while (ics < (int)pMesh->mNumVertices)  {
			
 
				+                while (ics < (int)pMesh->mNumVertices) {
			
 
				                     ++ics;
			
 
				-                    if (piNumTriPtr[ics] > 0)   {
			
 
				+                    if (piNumTriPtr[ics] > 0) {
			
 
				                         ivdx = ics;
			
 
				                         break;
			
 
				                     }
			
@@ -345,29 +376,29 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
 
				     }
			
 
				     ai_real fACMR2 = 0.0f;
			
 
				     if (!DefaultLogger::isNullLogger()) {
			
 
				-        fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
			
 
				-
			
 
				+        fACMR2 = static_cast<ai_real>(iCacheMisses / pMesh->mNumFaces);
			
 
				+        const ai_real averageACMR = ((fACMR - fACMR2) / fACMR) * 100.f;
			
 
				         // very intense verbose logging ... prepare for much text if there are many meshes
			
 
				-        if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
			
 
				-            ASSIMP_LOG_VERBOSE_DEBUG("Mesh %u | ACMR in: ", meshNum, " out: ", fACMR, " | ~", fACMR2, ((fACMR - fACMR2) / fACMR) * 100.f);
			
 
				+        if (DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
			
 
				+            ASSIMP_LOG_VERBOSE_DEBUG("Mesh ", meshNum, "| ACMR in: ", fACMR, " out: ", fACMR2, " | average ACMR ", averageACMR);
			
 
				         }
			
 
				-
			
 
				         fACMR2 *= pMesh->mNumFaces;
			
 
				     }
			
 
				+
			
 
				     // sort the output index buffer back to the input array
			
 
				-    piCSIter = piIBOutput;
			
 
				-    for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace)  {
			
 
				+    piCSIter = piIBOutput.begin();
			
 
				+    for (aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
			
 
				         unsigned nind = pcFace->mNumIndices;
			
 
				-        unsigned * ind = pcFace->mIndices;
			
 
				-        if (nind > 0) ind[0] = *piCSIter++;
			
 
				-        if (nind > 1) ind[1] = *piCSIter++;
			
 
				-        if (nind > 2) ind[2] = *piCSIter++;
			
 
				+        unsigned *ind = pcFace->mIndices;
			
 
				+        if (nind > 0)
			
 
				+            ind[0] = *piCSIter++;
			
 
				+        if (nind > 1)
			
 
				+            ind[1] = *piCSIter++;
			
 
				+        if (nind > 2)
			
 
				+            ind[2] = *piCSIter++;
			
 
				     }
			
 
				 
			
 
				-    // delete temporary storage
			
 
				-    delete[] piCachingStamps;
			
 
				-    delete[] piIBOutput;
			
 
				-    delete[] piCandidates;
			
 
				-
			
 
				     return fACMR2;
			
 
				 }
			
 
				+
			
 
				+} // namespace Assimp