123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- /*
- ---------------------------------------------------------------------------
- Open Asset Import Library (assimp)
- ---------------------------------------------------------------------------
- Copyright (c) 2006-2025, assimp team
- All rights reserved.
- Redistribution and use of this software in source and binary forms,
- with or without modification, are permitted provided that the following
- conditions are met:
- * Redistributions of source code must retain the above
- copyright notice, this list of conditions and the
- following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the
- following disclaimer in the documentation and/or other
- materials provided with the distribution.
- * Neither the name of the assimp team, nor the names of its
- contributors may be used to endorse or promote products
- derived from this software without specific prior
- written permission of the assimp team.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- ---------------------------------------------------------------------------
- */
- /** @file Implementation of the post processing step to improve the cache locality of a mesh.
- * <br>
- * The algorithm is roughly basing on this paper:
- * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
- * .. although overdraw reduction isn't implemented yet ...
- */
- // internal headers
- #include "PostProcessing/ImproveCacheLocality.h"
- #include "Common/VertexTriangleAdjacency.h"
- #include <assimp/StringUtils.h>
- #include <assimp/postprocess.h>
- #include <assimp/scene.h>
- #include <assimp/DefaultLogger.hpp>
- #include <stdio.h>
- #include <stack>
- namespace Assimp {
- // ------------------------------------------------------------------------------------------------
- // Constructor to be privately used by Importer
- ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() :
- mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
- // empty
- }
- // ------------------------------------------------------------------------------------------------
- // Returns whether the processing step is present in the given flag field.
- bool ImproveCacheLocalityProcess::IsActive(unsigned int pFlags) const {
- return (pFlags & aiProcess_ImproveCacheLocality) != 0;
- }
- // ------------------------------------------------------------------------------------------------
- // Setup configuration
- void ImproveCacheLocalityProcess::SetupProperties(const Importer *pImp) {
- // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
- mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE, PP_ICL_PTCACHE_SIZE);
- }
- // ------------------------------------------------------------------------------------------------
- // Executes the post processing step on the given imported data.
- void ImproveCacheLocalityProcess::Execute(aiScene *pScene) {
- if (!pScene->mNumMeshes) {
- ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes");
- return;
- }
- ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess begin");
- float out = 0.f;
- unsigned int numf = 0, numm = 0;
- for (unsigned int a = 0; a < pScene->mNumMeshes; ++a) {
- const float res = ProcessMesh(pScene->mMeshes[a], a);
- if (res) {
- numf += pScene->mMeshes[a]->mNumFaces;
- out += res;
- ++numm;
- }
- }
- if (!DefaultLogger::isNullLogger()) {
- if (numf > 0) {
- ASSIMP_LOG_INFO("Cache relevant are ", numm, " meshes (", numf, " faces). Average output ACMR is ", out / numf);
- }
- ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess finished. ");
- }
- }
- // ------------------------------------------------------------------------------------------------
- static ai_real calculateInputACMR(aiMesh *pMesh, const aiFace *const pcEnd,
- unsigned int configCacheDepth, unsigned int meshNum) {
- ai_real fACMR = 0.0f;
- unsigned int *piFIFOStack = new unsigned int[configCacheDepth];
- memset(piFIFOStack, 0xff, configCacheDepth * sizeof(unsigned int));
- unsigned int *piCur = piFIFOStack;
- const unsigned int *const piCurEnd = piFIFOStack + configCacheDepth;
- // count the number of cache misses
- unsigned int iCacheMisses = 0;
- for (const aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
- for (unsigned int qq = 0; qq < 3; ++qq) {
- bool bInCache = false;
- for (unsigned int *pp = piFIFOStack; pp < piCurEnd; ++pp) {
- if (*pp == pcFace->mIndices[qq]) {
- // the vertex is in cache
- bInCache = true;
- break;
- }
- }
- if (!bInCache) {
- ++iCacheMisses;
- if (piCurEnd == piCur) {
- piCur = piFIFOStack;
- }
- *piCur++ = pcFace->mIndices[qq];
- }
- }
- }
- delete[] piFIFOStack;
- fACMR = (ai_real)iCacheMisses / pMesh->mNumFaces;
- if (3.0 == fACMR) {
- char szBuff[128]; // should be sufficiently large in every case
- // the JoinIdenticalVertices process has not been executed on this
- // mesh, otherwise this value would normally be at least minimally
- // smaller than 3.0 ...
- ai_snprintf(szBuff, 128, "Mesh %u: Not suitable for vcache optimization", meshNum);
- ASSIMP_LOG_WARN(szBuff);
- return static_cast<ai_real>(0.f);
- }
- return fACMR;
- }
- // ------------------------------------------------------------------------------------------------
- // Improves the cache coherency of a specific mesh
- ai_real ImproveCacheLocalityProcess::ProcessMesh(aiMesh *pMesh, unsigned int meshNum) {
- // TODO: rewrite this to use std::vector or boost::shared_array
- ai_assert(nullptr != pMesh);
- // Check whether the input data is valid
- // - there must be vertices and faces
- // - all faces must be triangulated or we can't operate on them
- if (!pMesh->HasFaces() || !pMesh->HasPositions())
- return static_cast<ai_real>(0.f);
- if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
- ASSIMP_LOG_ERROR("This algorithm works on triangle meshes only");
- return static_cast<ai_real>(0.f);
- }
- if (pMesh->mNumVertices <= mConfigCacheDepth) {
- return static_cast<ai_real>(0.f);
- }
- ai_real fACMR = 3.f;
- const aiFace *const pcEnd = pMesh->mFaces + pMesh->mNumFaces;
- // Input ACMR is for logging purposes only
- if (!DefaultLogger::isNullLogger()) {
- fACMR = calculateInputACMR(pMesh, pcEnd, mConfigCacheDepth, meshNum);
- }
- // first we need to build a vertex-triangle adjacency list
- VertexTriangleAdjacency adj(pMesh->mFaces, pMesh->mNumFaces, pMesh->mNumVertices, true);
- // build a list to store per-vertex caching time stamps
- std::vector<unsigned int> piCachingStamps;
- piCachingStamps.resize(pMesh->mNumVertices);
- memset(&piCachingStamps[0], 0x0, pMesh->mNumVertices * sizeof(unsigned int));
- // allocate an empty output index buffer. We store the output indices in one large array.
- // Since the number of triangles won't change the input faces can be reused. This is how
- // we save thousands of redundant mini allocations for aiFace::mIndices
- const unsigned int iIdxCnt = pMesh->mNumFaces * 3;
- std::vector<unsigned int> piIBOutput;
- piIBOutput.resize(iIdxCnt);
- std::vector<unsigned int>::iterator piCSIter = piIBOutput.begin();
- // allocate the flag array to hold the information
- // whether a face has already been emitted or not
- std::vector<bool> abEmitted(pMesh->mNumFaces, false);
- // dead-end vertex index stack
- std::stack<unsigned int, std::vector<unsigned int>> sDeadEndVStack;
- // create a copy of the piNumTriPtr buffer
- unsigned int *const piNumTriPtr = adj.mLiveTriangles;
- const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
- // get the largest number of referenced triangles and allocate the "candidate buffer"
- unsigned int iMaxRefTris = 0;
- {
- const unsigned int *piCur = adj.mLiveTriangles;
- const unsigned int *const piCurEnd = adj.mLiveTriangles + pMesh->mNumVertices;
- for (; piCur != piCurEnd; ++piCur) {
- iMaxRefTris = std::max(iMaxRefTris, *piCur);
- }
- }
- ai_assert(iMaxRefTris > 0);
- std::vector<unsigned int> piCandidates;
- piCandidates.resize(iMaxRefTris * 3);
- unsigned int iCacheMisses = 0;
- // ...................................................................................
- /** PSEUDOCODE for the algorithm
- A = Build-Adjacency(I) Vertex-triangle adjacency
- L = Get-Triangle-Counts(A) Per-vertex live triangle counts
- C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
- D = Empty-Stack() Dead-end vertex stack
- E = False(Triangle-Count(I)) Per triangle emitted flag
- O = Empty-Index-Buffer() Empty output buffer
- f = 0 Arbitrary starting vertex
- s = k+1, i = 1 Time stamp and cursor
- while f >= 0 For all valid fanning vertices
- N = Empty-Set() 1-ring of next candidates
- for each Triangle t in Neighbors(A, f)
- if !Emitted(E,t)
- for each Vertex v in t
- Append(O,v) Output vertex
- Push(D,v) Add to dead-end stack
- Insert(N,v) Register as candidate
- L[v] = L[v]-1 Decrease live triangle count
- if s-C[v] > k If not in cache
- C[v] = s Set time stamp
- s = s+1 Increment time stamp
- E[t] = true Flag triangle as emitted
- Select next fanning vertex
- f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
- return O
- */
- // ...................................................................................
- int ivdx = 0;
- int ics = 1;
- int iStampCnt = mConfigCacheDepth + 1;
- while (ivdx >= 0) {
- unsigned int icnt = piNumTriPtrNoModify[ivdx];
- unsigned int *piList = adj.GetAdjacentTriangles(ivdx);
- std::vector<unsigned int>::iterator piCurCandidate = piCandidates.begin();
- // get all triangles in the neighborhood
- for (unsigned int tri = 0; tri < icnt; ++tri) {
- // if they have not yet been emitted, add them to the output IB
- const unsigned int fidx = *piList++;
- if (!abEmitted[fidx]) {
- // so iterate through all vertices of the current triangle
- const aiFace *pcFace = &pMesh->mFaces[fidx];
- const unsigned nind = pcFace->mNumIndices;
- for (unsigned ind = 0; ind < nind; ind++) {
- unsigned dp = pcFace->mIndices[ind];
- // the current vertex won't have any free triangles after this step
- if (ivdx != (int)dp) {
- // append the vertex to the dead-end stack
- sDeadEndVStack.push(dp);
- // register as candidate for the next step
- *piCurCandidate++ = dp;
- // decrease the per-vertex triangle counts
- piNumTriPtr[dp]--;
- }
- // append the vertex to the output index buffer
- *piCSIter++ = dp;
- // if the vertex is not yet in cache, set its cache count
- if (iStampCnt - piCachingStamps[dp] > mConfigCacheDepth) {
- piCachingStamps[dp] = iStampCnt++;
- ++iCacheMisses;
- }
- }
- // flag triangle as emitted
- abEmitted[fidx] = true;
- }
- }
- // the vertex has now no living adjacent triangles anymore
- piNumTriPtr[ivdx] = 0;
- // get next fanning vertex
- ivdx = -1;
- int max_priority = -1;
- for (std::vector<unsigned int>::iterator piCur = piCandidates.begin(); piCur != piCurCandidate; ++piCur) {
- const unsigned int dp = *piCur;
- // must have live triangles
- if (piNumTriPtr[dp] > 0) {
- int priority = 0;
- // will the vertex be in cache, even after fanning occurs?
- unsigned int tmp;
- if ((tmp = iStampCnt - piCachingStamps[dp]) + 2 * piNumTriPtr[dp] <= mConfigCacheDepth) {
- priority = tmp;
- }
- // keep best candidate
- if (priority > max_priority) {
- max_priority = priority;
- ivdx = dp;
- }
- }
- }
- // did we reach a dead end?
- if (-1 == ivdx) {
- // need to get a non-local vertex for which we have a good chance that it is still
- // in the cache ...
- while (!sDeadEndVStack.empty()) {
- unsigned int iCachedIdx = sDeadEndVStack.top();
- sDeadEndVStack.pop();
- if (piNumTriPtr[iCachedIdx] > 0) {
- ivdx = iCachedIdx;
- break;
- }
- }
- if (-1 == ivdx) {
- // well, there isn't such a vertex. Simply get the next vertex in input order and
- // hope it is not too bad ...
- while (ics < (int)pMesh->mNumVertices) {
- ++ics;
- if (piNumTriPtr[ics] > 0) {
- ivdx = ics;
- break;
- }
- }
- }
- }
- }
- ai_real fACMR2 = 0.0f;
- if (!DefaultLogger::isNullLogger()) {
- fACMR2 = static_cast<ai_real>(iCacheMisses / pMesh->mNumFaces);
- const ai_real averageACMR = ((fACMR - fACMR2) / fACMR) * 100.f;
- // very intense verbose logging ... prepare for much text if there are many meshes
- if (DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
- ASSIMP_LOG_VERBOSE_DEBUG("Mesh ", meshNum, "| ACMR in: ", fACMR, " out: ", fACMR2, " | average ACMR ", averageACMR);
- }
- fACMR2 *= pMesh->mNumFaces;
- }
- // sort the output index buffer back to the input array
- piCSIter = piIBOutput.begin();
- for (aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
- unsigned nind = pcFace->mNumIndices;
- unsigned *ind = pcFace->mIndices;
- if (nind > 0)
- ind[0] = *piCSIter++;
- if (nind > 1)
- ind[1] = *piCSIter++;
- if (nind > 2)
- ind[2] = *piCSIter++;
- }
- return fACMR2;
- }
- } // namespace Assimp
|