cpp
/
GavinNL.gul
огледало от https://gitlab.com/GavinNL/gul.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
							#ifndef GUL_MESH_PRIMITIVE_OPTIMIZER_H
#define GUL_MESH_PRIMITIVE_OPTIMIZER_H

#include "MeshPrimitive2.h"
#include <meshoptimizer.h>

namespace gul
{

/**
 * @brief OptimizeMeshPrimitive
 * @param V
 *
 * Uses Arseny Kapoulkine's MeshOptimizer library to generate Level of Details
 * for the mesh primitive. ( https://github.com/zeux/meshoptimizer )
 *
 * Each level of detail is a set of different indices which are stored
 * as submeshes.
 *
 * Requirements:
 *          The submesh array in the MeshPrimitive must be cleared
 *          The mesh MUST have index buffers
 *          The index buffer must be uint32
 *
 * GenerateMeshLOD(M, 10, 0.7f, 0.01f);
 * assert(M.subMeshes.size() > 0);
 *
 */
inline std::vector<DrawCall> GenerateMeshLOD(MeshPrimitive & V, size_t lod_count = 10, float thresholdPowerBase=0.7f, float target_error = 1e-2)
{
    assert( V.subMeshes.size() == 0);
    assert( V.indexCount() > 0);
    assert( V.INDEX.getAttributeSize() == 4);

    const size_t kCacheSize = 16;


    uint32_t vertexStride = V.POSITION.getAttributeSize();
    uint32_t vertexCount  = V.POSITION.attributeCount();
    auto      vertexData  = static_cast<float const*>(V.POSITION.data());

    // generate 4 LOD levels (1-4), with each subsequent LOD using 70% triangles
    // note that each LOD uses the same (shared) vertex buffer
    std::vector<std::vector<uint32_t>> lods(lod_count);

    auto indices = V.INDEX.toVector<uint32_t>();
    lods[0] = indices;

    for (size_t i = 1; i < lod_count; ++i)
    {
        std::vector<unsigned int>& lod = lods[i];

        float threshold = powf(thresholdPowerBase, float(i));
        size_t target_index_count = size_t(indices.size() * threshold) / 3 * 3;
        //float target_error = 1e-2f;

        // we can simplify all the way from base level or from the last result
        // simplifying from the base level sometimes produces better results, but simplifying from last level is faster
        const std::vector<unsigned int>& source = lods[i - 1];
        //const std::vector<unsigned int>& source = lods[0];

        if (source.size() < target_index_count)
            target_index_count = source.size();

        lod.resize(source.size());
        lod.resize(meshopt_simplify(&lod[0], &source[0], source.size(), vertexData, vertexCount, vertexStride, target_index_count, target_error));
    }


    // optimize each individual LOD for vertex cache & overdraw
    for (size_t i = 0; i < lod_count; ++i)
    {
        std::vector<unsigned int>& lod = lods[i];

        meshopt_optimizeVertexCache(&lod[0], &lod[0], lod.size(), vertexCount);
        meshopt_optimizeOverdraw(&lod[0], &lod[0], lod.size(), vertexData, vertexCount, vertexStride, 1.0f);
    }

    // concatenate all LODs into one IB
    // note: the order of concatenation is important - since we optimize the entire IB for vertex fetch,
    // putting coarse LODs first makes sure that the vertex range referenced by them is as small as possible
    // some GPUs process the entire range referenced by the index buffer region so doing this optimizes the vertex transform
    // cost for coarse LODs
    // this order also produces much better vertex fetch cache coherency for coarse LODs (since they're essentially optimized first)
    // somewhat surprisingly, the vertex fetch cache coherency for fine LODs doesn't seem to suffer that much.
    auto lod_index_offsets = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
    auto lod_index_counts  = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
    size_t total_index_count = 0;

    for (int i = lod_count - 1; i >= 0; --i)
    {
        lod_index_offsets[i] = total_index_count;
        lod_index_counts[i] = lods[i].size();

        total_index_count += lods[i].size();
    }

    indices.resize(total_index_count);

    V.subMeshes.clear();
    std::vector<DrawCall> _out;
    for (size_t i = 0; i < lod_count; ++i)
    {
        memcpy(&indices[lod_index_offsets[i]], &lods[i][0], lods[i].size() * sizeof(lods[i][0]));
        DrawCall dc;
        dc.indexCount   = uint32_t(lods[i].size());
        dc.indexOffset  = lod_index_offsets[i];
        dc.vertexCount  = 0;
        dc.vertexOffset = 0;

        V.subMeshes.push_back(dc);
        _out.push_back(dc);
    }
    V.INDEX = indices;

    return _out;
    #if 0
    return lods;
    std::vector<Vertex> vertices = mesh.vertices;

    // vertex fetch optimization should go last as it depends on the final index order
    // note that the order of LODs above affects vertex fetch results
    meshopt_optimizeVertexFetch(&vertices[0], &indices[0], indices.size(), &vertices[0], vertices.size(), vertexStride);

    double end = timestamp();

    printf("%-9s: %d triangles => %d LOD levels down to %d triangles in %.2f msec, optimized in %.2f msec\n",
        "SimplifyC",
        int(lod_index_counts[0]) / 3, int(lod_count), int(lod_index_counts[lod_count - 1]) / 3,
        (middle - start) * 1000, (end - middle) * 1000);

    // for using LOD data at runtime, in addition to vertices and indices you have to save lod_index_offsets/lod_index_counts.

    {
        meshopt_VertexCacheStatistics vcs0 = meshopt_analyzeVertexCache(&indices[lod_index_offsets[0]], lod_index_counts[0], vertices.size(), kCacheSize, 0, 0);
        meshopt_VertexFetchStatistics vfs0 = meshopt_analyzeVertexFetch(&indices[lod_index_offsets[0]], lod_index_counts[0], vertices.size(), vertexStride);
        meshopt_VertexCacheStatistics vcsN = meshopt_analyzeVertexCache(&indices[lod_index_offsets[lod_count - 1]], lod_index_counts[lod_count - 1], vertices.size(), kCacheSize, 0, 0);
        meshopt_VertexFetchStatistics vfsN = meshopt_analyzeVertexFetch(&indices[lod_index_offsets[lod_count - 1]], lod_index_counts[lod_count - 1], vertices.size(), vertexStride);

        typedef PackedVertexOct PV;

        std::vector<PV> pv(vertices.size());
        packMesh(pv, vertices);

        std::vector<unsigned char> vbuf(meshopt_encodeVertexBufferBound(vertices.size(), sizeof(PV)));
        vbuf.resize(meshopt_encodeVertexBuffer(&vbuf[0], vbuf.size(), &pv[0], vertices.size(), sizeof(PV)));

        std::vector<unsigned char> ibuf(meshopt_encodeIndexBufferBound(indices.size(), vertices.size()));
        ibuf.resize(meshopt_encodeIndexBuffer(&ibuf[0], ibuf.size(), &indices[0], indices.size()));

        printf("%-9s  ACMR %f...%f Overfetch %f..%f Codec VB %.1f bits/vertex IB %.1f bits/triangle\n",
            "",
            vcs0.acmr, vcsN.acmr, vfs0.overfetch, vfsN.overfetch,
            double(vbuf.size()) / double(vertices.size()) * 8,
            double(ibuf.size()) / double(indices.size() / 3) * 8);
    }
#endif
}


inline std::vector<DrawCall> GenerateMeshLOD(MeshPrimitive & V, size_t lod_count = 10, float thresholdPowerBase=0.7f, float target_error = 1e-2)
{
    assert( V.subMeshes.size() == 0);
    assert( V.indexCount() > 0);
    assert( V.INDEX.getAttributeSize() == 4);

    const size_t kCacheSize = 16;


    uint32_t vertexStride = V.POSITION.getAttributeSize();
    uint32_t vertexCount  = V.POSITION.attributeCount();
    auto      vertexData  = static_cast<float const*>(V.POSITION.data());

    // generate 4 LOD levels (1-4), with each subsequent LOD using 70% triangles
    // note that each LOD uses the same (shared) vertex buffer
    std::vector<std::vector<uint32_t>> lods(lod_count);

    auto indices = V.INDEX.toVector<uint32_t>();
    lods[0] = indices;

    for (size_t i = 1; i < lod_count; ++i)
    {
        std::vector<unsigned int>& lod = lods[i];

        float threshold = powf(thresholdPowerBase, float(i));
        size_t target_index_count = size_t(indices.size() * threshold) / 3 * 3;
        //float target_error = 1e-2f;

        // we can simplify all the way from base level or from the last result
        // simplifying from the base level sometimes produces better results, but simplifying from last level is faster
        const std::vector<unsigned int>& source = lods[i - 1];
        //const std::vector<unsigned int>& source = lods[0];

        if (source.size() < target_index_count)
            target_index_count = source.size();

        lod.resize(source.size());
        lod.resize(meshopt_simplify(&lod[0], &source[0], source.size(), vertexData, vertexCount, vertexStride, target_index_count, target_error));
    }


    // optimize each individual LOD for vertex cache & overdraw
    for (size_t i = 0; i < lod_count; ++i)
    {
        std::vector<unsigned int>& lod = lods[i];

        meshopt_optimizeVertexCache(&lod[0], &lod[0], lod.size(), vertexCount);
        meshopt_optimizeOverdraw(&lod[0], &lod[0], lod.size(), vertexData, vertexCount, vertexStride, 1.0f);
    }

    // concatenate all LODs into one IB
    // note: the order of concatenation is important - since we optimize the entire IB for vertex fetch,
    // putting coarse LODs first makes sure that the vertex range referenced by them is as small as possible
    // some GPUs process the entire range referenced by the index buffer region so doing this optimizes the vertex transform
    // cost for coarse LODs
    // this order also produces much better vertex fetch cache coherency for coarse LODs (since they're essentially optimized first)
    // somewhat surprisingly, the vertex fetch cache coherency for fine LODs doesn't seem to suffer that much.
    auto lod_index_offsets = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
    auto lod_index_counts  = std::vector<size_t>(lod_count, 0);//[lod_count] = {};
    size_t total_index_count = 0;

    for (int i = lod_count - 1; i >= 0; --i)
    {
        lod_index_offsets[i] = total_index_count;
        lod_index_counts[i] = lods[i].size();

        total_index_count += lods[i].size();
    }

    indices.resize(total_index_count);

    V.subMeshes.clear();
    std::vector<DrawCall> _out;
    for (size_t i = 0; i < lod_count; ++i)
    {
        memcpy(&indices[lod_index_offsets[i]], &lods[i][0], lods[i].size() * sizeof(lods[i][0]));
        DrawCall dc;
        dc.indexCount   = uint32_t(lods[i].size());
        dc.indexOffset  = lod_index_offsets[i];
        dc.vertexCount  = 0;
        dc.vertexOffset = 0;

        V.subMeshes.push_back(dc);
        _out.push_back(dc);
    }
    V.INDEX = indices;

    return _out;
}


}

#endif