Browse Source

Add more structures to the GPU scene

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
75d4560c89
46 changed files with 821 additions and 498 deletions
  1. 9 9
      AnKi/Core/App.cpp
  2. 9 9
      AnKi/Core/GpuMemoryPools.cpp
  3. 148 18
      AnKi/Core/GpuMemoryPools.h
  4. 3 3
      AnKi/Renderer/Drawer.cpp
  5. 1 1
      AnKi/Renderer/PackVisibleClusteredObjects.cpp
  6. 2 2
      AnKi/Renderer/Renderer.cpp
  7. 2 2
      AnKi/Renderer/RtShadows.cpp
  8. 0 1
      AnKi/Resource/Common.h
  9. 6 0
      AnKi/Resource/MaterialResource.cpp
  10. 25 25
      AnKi/Resource/MeshResource.cpp
  11. 4 4
      AnKi/Resource/MeshResource.h
  12. 0 1
      AnKi/Resource/ResourceManager.h
  13. 0 2
      AnKi/Scene/Common.h
  14. 3 9
      AnKi/Scene/Components/DecalComponent.cpp
  15. 2 2
      AnKi/Scene/Components/DecalComponent.h
  16. 4 9
      AnKi/Scene/Components/FogDensityComponent.cpp
  17. 2 3
      AnKi/Scene/Components/FogDensityComponent.h
  18. 4 9
      AnKi/Scene/Components/GlobalIlluminationProbeComponent.cpp
  19. 2 2
      AnKi/Scene/Components/GlobalIlluminationProbeComponent.h
  20. 12 38
      AnKi/Scene/Components/LightComponent.cpp
  21. 3 5
      AnKi/Scene/Components/LightComponent.h
  22. 148 149
      AnKi/Scene/Components/ModelComponent.cpp
  23. 12 7
      AnKi/Scene/Components/ModelComponent.h
  24. 105 54
      AnKi/Scene/Components/ParticleEmitterComponent.cpp
  25. 12 9
      AnKi/Scene/Components/ParticleEmitterComponent.h
  26. 4 10
      AnKi/Scene/Components/ReflectionProbeComponent.cpp
  27. 2 3
      AnKi/Scene/Components/ReflectionProbeComponent.h
  28. 1 0
      AnKi/Scene/Components/SceneComponent.h
  29. 4 5
      AnKi/Scene/Components/SkinComponent.cpp
  30. 2 2
      AnKi/Scene/Components/SkinComponent.h
  31. 23 15
      AnKi/Scene/ContiguousArrayAllocator.cpp
  32. 92 19
      AnKi/Scene/ContiguousArrayAllocator.h
  33. 5 8
      AnKi/Scene/RenderStateBucket.cpp
  34. 47 6
      AnKi/Scene/RenderStateBucket.h
  35. 7 4
      AnKi/Scene/SceneGraph.cpp
  36. 0 16
      AnKi/Scene/SceneGraph.h
  37. 1 1
      AnKi/Scene/Visibility.cpp
  38. 1 1
      AnKi/Shaders/Include/GpuSceneTypes.h
  39. 67 0
      AnKi/Util/Enum.h
  40. 2 3
      AnKi/Util/MemoryPool.cpp
  41. 2 2
      AnKi/Util/StackAllocatorBuilder.h
  42. 24 14
      AnKi/Util/StackAllocatorBuilder.inl.h
  43. 2 0
      AnKi/Util/Tracer.h
  44. 5 5
      Samples/Common/SampleApp.cpp
  45. 2 1
      Samples/SkeletalAnimation/Main.cpp
  46. 10 10
      Tests/Util/Memory.cpp

+ 9 - 9
AnKi/Core/App.cpp

@@ -129,8 +129,8 @@ void App::cleanup()
 	ResourceManager::freeSingleton();
 	PhysicsWorld::freeSingleton();
 	RebarStagingGpuMemoryPool::freeSingleton();
-	UnifiedGeometryMemoryPool::freeSingleton();
-	GpuSceneMemoryPool::freeSingleton();
+	UnifiedGeometryBuffer::freeSingleton();
+	GpuSceneBuffer::freeSingleton();
 	CoreThreadHive::freeSingleton();
 	MaliHwCounters::freeSingleton();
 	GrManager::freeSingleton();
@@ -282,8 +282,8 @@ Error App::initInternal()
 	//
 	// GPU mem
 	//
-	UnifiedGeometryMemoryPool::allocateSingleton().init();
-	GpuSceneMemoryPool::allocateSingleton().init();
+	UnifiedGeometryBuffer::allocateSingleton().init();
+	GpuSceneBuffer::allocateSingleton().init();
 	RebarStagingGpuMemoryPool::allocateSingleton().init();
 
 	//
@@ -467,8 +467,8 @@ Error App::mainLoop()
 			}
 
 			const PtrSize rebarMemUsed = RebarStagingGpuMemoryPool::getSingleton().endFrame();
-			UnifiedGeometryMemoryPool::getSingleton().endFrame();
-			GpuSceneMemoryPool::getSingleton().endFrame();
+			UnifiedGeometryBuffer::getSingleton().endFrame();
+			GpuSceneBuffer::getSingleton().endFrame();
 
 			// Update the trace info with some async loader stats
 			U64 asyncTaskCount = ResourceManager::getSingleton().getAsyncLoader().getCompletedTaskCount();
@@ -534,10 +534,10 @@ Error App::mainLoop()
 				in.m_cpuFreeCount = m_memStats.m_freeCount.load();
 
 				const GrManagerStats grStats = GrManager::getSingleton().getStats();
-				UnifiedGeometryMemoryPool::getSingleton().getStats(
+				UnifiedGeometryBuffer::getSingleton().getStats(
 					in.m_unifiedGometryExternalFragmentation, in.m_unifiedGeometryAllocated, in.m_unifiedGeometryTotal);
-				GpuSceneMemoryPool::getSingleton().getStats(in.m_gpuSceneExternalFragmentation, in.m_gpuSceneAllocated,
-															in.m_gpuSceneTotal);
+				GpuSceneBuffer::getSingleton().getStats(in.m_gpuSceneExternalFragmentation, in.m_gpuSceneAllocated,
+														in.m_gpuSceneTotal);
 				in.m_gpuDeviceMemoryAllocated = grStats.m_deviceMemoryAllocated;
 				in.m_gpuDeviceMemoryInUse = grStats.m_deviceMemoryInUse;
 				in.m_reBar = rebarMemUsed;

+ 9 - 9
AnKi/Core/GpuMemoryPools.cpp

@@ -12,7 +12,7 @@
 
 namespace anki {
 
-void UnifiedGeometryMemoryPool::init()
+void UnifiedGeometryBuffer::init()
 {
 	const PtrSize poolSize = ConfigSet::getSingleton().getCoreGlobalVertexMemorySize();
 
@@ -29,12 +29,12 @@ void UnifiedGeometryMemoryPool::init()
 	m_pool.init(buffUsage, classes, poolSize, "UnifiedGeometry", false);
 
 	// Allocate something dummy to force creating the GPU buffer
-	SegregatedListsGpuMemoryPoolToken token;
-	allocate(16, 4, token);
-	deferredFree(token);
+	UnifiedGeometryBufferAllocation alloc;
+	allocate(16, 4, alloc);
+	deferredFree(alloc);
 }
 
-void GpuSceneMemoryPool::init()
+void GpuSceneBuffer::init()
 {
 	const PtrSize poolSize = ConfigSet::getSingleton().getCoreGpuSceneInitialSize();
 
@@ -45,9 +45,9 @@ void GpuSceneMemoryPool::init()
 	m_pool.init(buffUsage, classes, poolSize, "GpuScene", true);
 
 	// Allocate something dummy to force creating the GPU buffer
-	SegregatedListsGpuMemoryPoolToken token;
-	allocate(16, 4, token);
-	deferredFree(token);
+	GpuSceneBufferAllocation alloc;
+	allocate(16, 4, alloc);
+	deferredFree(alloc);
 }
 
 RebarStagingGpuMemoryPool::~RebarStagingGpuMemoryPool()
@@ -222,7 +222,7 @@ void GpuSceneMicroPatcher::patchGpuScene(CommandBuffer& cmdb)
 						   headersToken.m_range);
 	cmdb.bindStorageBuffer(0, 1, RebarStagingGpuMemoryPool::getSingleton().getBuffer(), dataToken.m_offset,
 						   dataToken.m_range);
-	cmdb.bindStorageBuffer(0, 2, GpuSceneMemoryPool::getSingleton().getBuffer(), 0, kMaxPtrSize);
+	cmdb.bindStorageBuffer(0, 2, GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
 
 	cmdb.bindShaderProgram(m_grProgram);
 

+ 148 - 18
AnKi/Core/GpuMemoryPools.h

@@ -16,27 +16,76 @@ namespace anki {
 /// @addtogroup core
 /// @{
 
+/// @memberof UnifiedGeometryBuffer
+class UnifiedGeometryBufferAllocation
+{
+	friend class UnifiedGeometryBuffer;
+
+public:
+	UnifiedGeometryBufferAllocation() = default;
+
+	UnifiedGeometryBufferAllocation(const UnifiedGeometryBufferAllocation&) = delete;
+
+	UnifiedGeometryBufferAllocation(UnifiedGeometryBufferAllocation&& b)
+	{
+		*this = std::move(b);
+	}
+
+	~UnifiedGeometryBufferAllocation();
+
+	UnifiedGeometryBufferAllocation& operator=(const UnifiedGeometryBufferAllocation&) = delete;
+
+	UnifiedGeometryBufferAllocation& operator=(UnifiedGeometryBufferAllocation&& b)
+	{
+		ANKI_ASSERT(!isValid() && "Forgot to delete");
+		m_token = b.m_token;
+		b.m_token = {};
+		return *this;
+	}
+
+	Bool isValid() const
+	{
+		return m_token.m_offset != kMaxPtrSize;
+	}
+
+	/// Get offset in the Unified Geometry Buffer buffer.
+	U32 getOffset() const
+	{
+		ANKI_ASSERT(isValid());
+		return U32(m_token.m_offset);
+	}
+
+	U32 getAllocatedSize() const
+	{
+		ANKI_ASSERT(isValid());
+		return U32(m_token.m_size);
+	}
+
+private:
+	SegregatedListsGpuMemoryPoolToken m_token;
+};
+
 /// Manages vertex and index memory for the whole application.
-class UnifiedGeometryMemoryPool : public MakeSingleton<UnifiedGeometryMemoryPool>
+class UnifiedGeometryBuffer : public MakeSingleton<UnifiedGeometryBuffer>
 {
 	template<typename>
 	friend class MakeSingleton;
 
 public:
-	UnifiedGeometryMemoryPool(const UnifiedGeometryMemoryPool&) = delete; // Non-copyable
+	UnifiedGeometryBuffer(const UnifiedGeometryBuffer&) = delete; // Non-copyable
 
-	UnifiedGeometryMemoryPool& operator=(const UnifiedGeometryMemoryPool&) = delete; // Non-copyable
+	UnifiedGeometryBuffer& operator=(const UnifiedGeometryBuffer&) = delete; // Non-copyable
 
 	void init();
 
-	void allocate(PtrSize size, U32 alignment, SegregatedListsGpuMemoryPoolToken& token)
+	void allocate(PtrSize size, U32 alignment, UnifiedGeometryBufferAllocation& alloc)
 	{
-		m_pool.allocate(size, alignment, token);
+		m_pool.allocate(size, alignment, alloc.m_token);
 	}
 
-	void deferredFree(SegregatedListsGpuMemoryPoolToken& token)
+	void deferredFree(UnifiedGeometryBufferAllocation& alloc)
 	{
-		m_pool.deferredFree(token);
+		m_pool.deferredFree(alloc.m_token);
 	}
 
 	void endFrame()
@@ -57,32 +106,86 @@ public:
 private:
 	SegregatedListsGpuMemoryPool m_pool;
 
-	UnifiedGeometryMemoryPool() = default;
+	UnifiedGeometryBuffer() = default;
 
-	~UnifiedGeometryMemoryPool() = default;
+	~UnifiedGeometryBuffer() = default;
+};
+
+inline UnifiedGeometryBufferAllocation::~UnifiedGeometryBufferAllocation()
+{
+	UnifiedGeometryBuffer::getSingleton().deferredFree(*this);
+}
+
+/// @memberof GpuSceneBuffer
+class GpuSceneBufferAllocation
+{
+	friend class GpuSceneBuffer;
+
+public:
+	GpuSceneBufferAllocation() = default;
+
+	GpuSceneBufferAllocation(const GpuSceneBufferAllocation&) = delete;
+
+	GpuSceneBufferAllocation(GpuSceneBufferAllocation&& b)
+	{
+		*this = std::move(b);
+	}
+
+	~GpuSceneBufferAllocation();
+
+	GpuSceneBufferAllocation& operator=(const GpuSceneBufferAllocation&) = delete;
+
+	GpuSceneBufferAllocation& operator=(GpuSceneBufferAllocation&& b)
+	{
+		ANKI_ASSERT(!isValid() && "Forgot to delete");
+		m_token = b.m_token;
+		b.m_token = {};
+		return *this;
+	}
+
+	Bool isValid() const
+	{
+		return m_token.m_offset != kMaxPtrSize;
+	}
+
+	/// Get offset in the Unified Geometry Buffer buffer.
+	U32 getOffset() const
+	{
+		ANKI_ASSERT(isValid());
+		return U32(m_token.m_offset);
+	}
+
+	U32 getAllocatedSize() const
+	{
+		ANKI_ASSERT(isValid());
+		return U32(m_token.m_size);
+	}
+
+private:
+	SegregatedListsGpuMemoryPoolToken m_token;
 };
 
 /// Memory pool for the GPU scene.
-class GpuSceneMemoryPool : public MakeSingleton<GpuSceneMemoryPool>
+class GpuSceneBuffer : public MakeSingleton<GpuSceneBuffer>
 {
 	template<typename>
 	friend class MakeSingleton;
 
 public:
-	GpuSceneMemoryPool(const GpuSceneMemoryPool&) = delete; // Non-copyable
+	GpuSceneBuffer(const GpuSceneBuffer&) = delete; // Non-copyable
 
-	GpuSceneMemoryPool& operator=(const GpuSceneMemoryPool&) = delete; // Non-copyable
+	GpuSceneBuffer& operator=(const GpuSceneBuffer&) = delete; // Non-copyable
 
 	void init();
 
-	void allocate(PtrSize size, U32 alignment, SegregatedListsGpuMemoryPoolToken& token)
+	void allocate(PtrSize size, U32 alignment, GpuSceneBufferAllocation& alloc)
 	{
-		m_pool.allocate(size, alignment, token);
+		m_pool.allocate(size, alignment, alloc.m_token);
 	}
 
-	void deferredFree(SegregatedListsGpuMemoryPoolToken& token)
+	void deferredFree(GpuSceneBufferAllocation& alloc)
 	{
-		m_pool.deferredFree(token);
+		m_pool.deferredFree(alloc.m_token);
 	}
 
 	void endFrame()
@@ -103,11 +206,16 @@ public:
 private:
 	SegregatedListsGpuMemoryPool m_pool;
 
-	GpuSceneMemoryPool() = default;
+	GpuSceneBuffer() = default;
 
-	~GpuSceneMemoryPool() = default;
+	~GpuSceneBuffer() = default;
 };
 
+inline GpuSceneBufferAllocation::~GpuSceneBufferAllocation()
+{
+	GpuSceneBuffer::getSingleton().deferredFree(*this);
+}
+
 /// Token that gets returned when requesting for memory to write to a resource.
 class RebarGpuMemoryToken
 {
@@ -198,6 +306,28 @@ public:
 	/// @note It's thread-safe.
 	void newCopy(StackMemoryPool& frameCpuPool, PtrSize gpuSceneDestOffset, PtrSize dataSize, const void* data);
 
+	template<typename T>
+	void newCopy(StackMemoryPool& frameCpuPool, PtrSize gpuSceneDestOffset, const T& value)
+	{
+		newCopy(frameCpuPool, gpuSceneDestOffset, sizeof(value), &value);
+	}
+
+	/// @see newCopy
+	void newCopy(StackMemoryPool& frameCpuPool, const GpuSceneBufferAllocation& dest, PtrSize dataSize,
+				 const void* data)
+	{
+		ANKI_ASSERT(dataSize <= dest.getAllocatedSize());
+		newCopy(frameCpuPool, dest.getOffset(), dataSize, data);
+	}
+
+	/// @see newCopy
+	template<typename T>
+	void newCopy(StackMemoryPool& frameCpuPool, const GpuSceneBufferAllocation& dest, const T& value)
+	{
+		ANKI_ASSERT(sizeof(value) <= dest.getAllocatedSize());
+		newCopy(frameCpuPool, dest.getOffset(), sizeof(value), &value);
+	}
+
 	/// Check if there is a need to call patchGpuScene or if no copies are needed.
 	/// @note Not thread-safe. Nothing else should be happening before calling it.
 	Bool patchingIsNeeded() const

+ 3 - 3
AnKi/Renderer/Drawer.cpp

@@ -57,17 +57,17 @@ void RenderableDrawer::drawRange(const RenderableDrawerArguments& args, const Re
 	cmdb->bindAllBindless(U32(MaterialSet::kBindless));
 	cmdb->bindSampler(U32(MaterialSet::kGlobal), U32(MaterialBinding::kTrilinearRepeatSampler), args.m_sampler);
 	cmdb->bindStorageBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kGpuScene),
-							GpuSceneMemoryPool::getSingleton().getBuffer(), 0, kMaxPtrSize);
+							GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
 
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType) \
 	cmdb->bindReadOnlyTextureBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kUnifiedGeometry_##fmt), \
-									UnifiedGeometryMemoryPool::getSingleton().getBuffer(), 0, kMaxPtrSize, \
+									UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize, \
 									Format::k##fmt);
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 
 	// Misc
 	cmdb->setVertexAttribute(0, 0, Format::kR32G32B32A32_Uint, 0);
-	cmdb->bindIndexBuffer(UnifiedGeometryMemoryPool::getSingleton().getBuffer(), 0, IndexType::kU16);
+	cmdb->bindIndexBuffer(UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, IndexType::kU16);
 
 	// Set a few things
 	Context ctx;

+ 1 - 1
AnKi/Renderer/PackVisibleClusteredObjects.cpp

@@ -111,7 +111,7 @@ void PackVisibleClusteredObjects::dispatchType(WeakArray<TRenderQueueElement> ar
 		}
 	}
 
-	cmdb->bindStorageBuffer(0, 0, GpuSceneMemoryPool::getSingleton().getBuffer(),
+	cmdb->bindStorageBuffer(0, 0, GpuSceneBuffer::getSingleton().getBuffer(),
 							rqueue.m_clustererObjectsArrayOffsets[kType], rqueue.m_clustererObjectsArrayRanges[kType]);
 
 	cmdb->bindStorageBuffer(0, 1, m_allClustererObjects, m_structureBufferOffsets[kType],

+ 2 - 2
AnKi/Renderer/Renderer.cpp

@@ -693,8 +693,8 @@ void Renderer::gpuSceneCopy(RenderingContext& ctx)
 {
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
-	m_runCtx.m_gpuSceneHandle = rgraph.importBuffer(GpuSceneMemoryPool::getSingleton().getBuffer(),
-													GpuSceneMemoryPool::getSingleton().getBuffer()->getBufferUsage());
+	m_runCtx.m_gpuSceneHandle = rgraph.importBuffer(GpuSceneBuffer::getSingleton().getBuffer(),
+													GpuSceneBuffer::getSingleton().getBuffer()->getBufferUsage());
 
 	if(GpuSceneMicroPatcher::getSingleton().patchingIsNeeded())
 	{

+ 2 - 2
AnKi/Renderer/RtShadows.cpp

@@ -467,11 +467,11 @@ void RtShadows::run(RenderPassWorkContext& rgraphCtx)
 	cmdb->bindSampler(U32(MaterialSet::kGlobal), U32(MaterialBinding::kTrilinearRepeatSampler),
 					  getRenderer().getSamplers().m_trilinearRepeat);
 	cmdb->bindStorageBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kGpuScene),
-							GpuSceneMemoryPool::getSingleton().getBuffer(), 0, kMaxPtrSize);
+							GpuSceneBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize);
 
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType) \
 	cmdb->bindReadOnlyTextureBuffer(U32(MaterialSet::kGlobal), U32(MaterialBinding::kUnifiedGeometry_##fmt), \
-									UnifiedGeometryMemoryPool::getSingleton().getBuffer(), 0, kMaxPtrSize, \
+									UnifiedGeometryBuffer::getSingleton().getBuffer(), 0, kMaxPtrSize, \
 									Format::k##fmt);
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.defs.h>
 

+ 0 - 1
AnKi/Resource/Common.h

@@ -19,7 +19,6 @@ template<typename Type>
 class ResourcePointer;
 class TransferGpuAllocatorHandle;
 class PhysicsWorld;
-class UnifiedGeometryMemoryPool;
 
 /// @addtogroup resource
 /// @{

+ 6 - 0
AnKi/Resource/MaterialResource.cpp

@@ -728,6 +728,12 @@ const MaterialVariant& MaterialResource::getOrCreateVariant(const RenderingKey&
 		key.setVelocity(false);
 	}
 
+	if(key.getRenderingTechnique() != RenderingTechnique::kGBuffer && key.getVelocity())
+	{
+		// Only GBuffer technique can write to velocity buffers
+		key.setVelocity(false);
+	}
+
 	ANKI_ASSERT(!key.getSkinned() || !!(prog.m_presentBuildinMutators & U32(1 << BuiltinMutatorId::kBones)));
 	ANKI_ASSERT(!key.getVelocity() || !!(prog.m_presentBuildinMutators & U32(1 << BuiltinMutatorId::kVelocity)));
 

+ 25 - 25
AnKi/Resource/MeshResource.cpp

@@ -56,11 +56,11 @@ MeshResource::~MeshResource()
 {
 	for(Lod& lod : m_lods)
 	{
-		UnifiedGeometryMemoryPool::getSingleton().deferredFree(lod.m_indexBufferAllocationToken);
+		UnifiedGeometryBuffer::getSingleton().deferredFree(lod.m_indexBufferAllocationToken);
 
 		for(VertexStreamId stream : EnumIterable(VertexStreamId::kMeshRelatedFirst, VertexStreamId::kMeshRelatedCount))
 		{
-			UnifiedGeometryMemoryPool::getSingleton().deferredFree(lod.m_vertexBuffersAllocationToken[stream]);
+			UnifiedGeometryBuffer::getSingleton().deferredFree(lod.m_vertexBuffersAllocationToken[stream]);
 		}
 	}
 }
@@ -119,8 +119,8 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 		lod.m_indexCount = header.m_totalIndexCounts[l];
 		ANKI_ASSERT((lod.m_indexCount % 3) == 0 && "Expecting triangles");
 		const PtrSize indexBufferSize = PtrSize(lod.m_indexCount) * getIndexSize(m_indexType);
-		UnifiedGeometryMemoryPool::getSingleton().allocate(indexBufferSize, getIndexSize(m_indexType),
-														   lod.m_indexBufferAllocationToken);
+		UnifiedGeometryBuffer::getSingleton().allocate(indexBufferSize, getIndexSize(m_indexType),
+													   lod.m_indexBufferAllocationToken);
 
 		// Vertex stuff
 		lod.m_vertexCount = header.m_totalVertexCounts[l];
@@ -137,19 +137,19 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 			const U32 alignment = max(4u, nextPowerOfTwo(texelSize));
 			const PtrSize vertexBufferSize = PtrSize(lod.m_vertexCount) * texelSize + alignment;
 
-			UnifiedGeometryMemoryPool::getSingleton().allocate(vertexBufferSize, alignment,
-															   lod.m_vertexBuffersAllocationToken[stream]);
+			UnifiedGeometryBuffer::getSingleton().allocate(vertexBufferSize, alignment,
+														   lod.m_vertexBuffersAllocationToken[stream]);
 
 			// We need to align the actual offset to the texel size
-			const PtrSize remainder = lod.m_vertexBuffersAllocationToken[stream].m_offset % texelSize;
+			const PtrSize remainder = lod.m_vertexBuffersAllocationToken[stream].getOffset() % texelSize;
 			lod.m_fixedUnifiedGeometryBufferOffset[stream] = U8(texelSize - remainder);
 
-			ANKI_ASSERT(
-				(lod.m_vertexBuffersAllocationToken[stream].m_offset + lod.m_fixedUnifiedGeometryBufferOffset[stream])
-					% texelSize
-				== 0);
+			ANKI_ASSERT((lod.m_vertexBuffersAllocationToken[stream].getOffset()
+						 + lod.m_fixedUnifiedGeometryBufferOffset[stream])
+							% texelSize
+						== 0);
 			ANKI_ASSERT(lod.m_fixedUnifiedGeometryBufferOffset[stream] + PtrSize(lod.m_vertexCount) * texelSize
-						<= lod.m_vertexBuffersAllocationToken[stream].m_size);
+						<= lod.m_vertexBuffersAllocationToken[stream].getAllocatedSize());
 		}
 
 		// BLAS
@@ -158,13 +158,13 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 			AccelerationStructureInitInfo inf(ResourceString().sprintf("%s_%s", "Blas", basename.cstr()));
 			inf.m_type = AccelerationStructureType::kBottomLevel;
 
-			inf.m_bottomLevel.m_indexBuffer = UnifiedGeometryMemoryPool::getSingleton().getBuffer();
-			inf.m_bottomLevel.m_indexBufferOffset = lod.m_indexBufferAllocationToken.m_offset;
+			inf.m_bottomLevel.m_indexBuffer = UnifiedGeometryBuffer::getSingleton().getBuffer();
+			inf.m_bottomLevel.m_indexBufferOffset = lod.m_indexBufferAllocationToken.getOffset();
 			inf.m_bottomLevel.m_indexCount = lod.m_indexCount;
 			inf.m_bottomLevel.m_indexType = m_indexType;
-			inf.m_bottomLevel.m_positionBuffer = UnifiedGeometryMemoryPool::getSingleton().getBuffer();
+			inf.m_bottomLevel.m_positionBuffer = UnifiedGeometryBuffer::getSingleton().getBuffer();
 			inf.m_bottomLevel.m_positionBufferOffset =
-				lod.m_vertexBuffersAllocationToken[VertexStreamId::kPosition].m_offset
+				lod.m_vertexBuffersAllocationToken[VertexStreamId::kPosition].getOffset()
 				+ lod.m_fixedUnifiedGeometryBufferOffset[VertexStreamId::kPosition];
 			inf.m_bottomLevel.m_positionStride =
 				getFormatInfo(kMeshRelatedVertexStreamFormats[VertexStreamId::kPosition]).m_texelSize;
@@ -184,8 +184,8 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 
 		for(const Lod& lod : m_lods)
 		{
-			cmdb->fillBuffer(UnifiedGeometryMemoryPool::getSingleton().getBuffer(),
-							 lod.m_indexBufferAllocationToken.m_offset,
+			cmdb->fillBuffer(UnifiedGeometryBuffer::getSingleton().getBuffer(),
+							 lod.m_indexBufferAllocationToken.getOffset(),
 							 PtrSize(lod.m_indexCount) * getIndexSize(m_indexType), 0);
 
 			for(VertexStreamId stream :
@@ -193,14 +193,14 @@ Error MeshResource::load(const ResourceFilename& filename, Bool async)
 			{
 				if(header.m_vertexAttributes[stream].m_format != Format::kNone)
 				{
-					cmdb->fillBuffer(UnifiedGeometryMemoryPool::getSingleton().getBuffer(),
-									 lod.m_vertexBuffersAllocationToken[stream].m_offset,
-									 lod.m_vertexBuffersAllocationToken[stream].m_size, 0);
+					cmdb->fillBuffer(UnifiedGeometryBuffer::getSingleton().getBuffer(),
+									 lod.m_vertexBuffersAllocationToken[stream].getOffset(),
+									 lod.m_vertexBuffersAllocationToken[stream].getAllocatedSize(), 0);
 				}
 			}
 		}
 
-		const BufferBarrierInfo barrier = {UnifiedGeometryMemoryPool::getSingleton().getBuffer().get(),
+		const BufferBarrierInfo barrier = {UnifiedGeometryBuffer::getSingleton().getBuffer().get(),
 										   BufferUsageBit::kTransferDestination, BufferUsageBit::kVertex, 0,
 										   kMaxPtrSize};
 
@@ -232,7 +232,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 	Array<TransferGpuAllocatorHandle, kMaxLodCount*(U32(VertexStreamId::kMeshRelatedCount) + 1)> handles;
 	U32 handleCount = 0;
 
-	BufferPtr unifiedGeometryBuffer = UnifiedGeometryMemoryPool::getSingleton().getBuffer();
+	BufferPtr unifiedGeometryBuffer = UnifiedGeometryBuffer::getSingleton().getBuffer();
 	const BufferUsageBit unifiedGeometryBufferNonTransferUsage =
 		unifiedGeometryBuffer->getBufferUsage() ^ BufferUsageBit::kTransferDestination;
 
@@ -262,7 +262,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 			ANKI_CHECK(loader.storeIndexBuffer(lodIdx, data, indexBufferSize));
 
 			cmdb->copyBufferToBuffer(handle.getBuffer(), handle.getOffset(), unifiedGeometryBuffer,
-									 lod.m_indexBufferAllocationToken.m_offset, handle.getRange());
+									 lod.m_indexBufferAllocationToken.getOffset(), handle.getRange());
 		}
 
 		// Upload vert buffers
@@ -286,7 +286,7 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 
 			// Copy
 			cmdb->copyBufferToBuffer(handle.getBuffer(), handle.getOffset(), unifiedGeometryBuffer,
-									 lod.m_vertexBuffersAllocationToken[stream].m_offset
+									 lod.m_vertexBuffersAllocationToken[stream].getOffset()
 										 + lod.m_fixedUnifiedGeometryBufferOffset[stream],
 									 handle.getRange());
 		}

+ 4 - 4
AnKi/Resource/MeshResource.h

@@ -55,7 +55,7 @@ public:
 	/// Get all info around vertex indices.
 	void getIndexBufferInfo(U32 lod, PtrSize& buffOffset, U32& indexCount, IndexType& indexType) const
 	{
-		buffOffset = m_lods[lod].m_indexBufferAllocationToken.m_offset;
+		buffOffset = m_lods[lod].m_indexBufferAllocationToken.getOffset();
 		ANKI_ASSERT(isAligned(getIndexSize(m_indexType), buffOffset));
 		indexCount = m_lods[lod].m_indexCount;
 		indexType = m_indexType;
@@ -64,7 +64,7 @@ public:
 	/// Get vertex buffer info.
 	void getVertexStreamInfo(U32 lod, VertexStreamId stream, PtrSize& bufferOffset, U32& vertexCount) const
 	{
-		bufferOffset = m_lods[lod].m_vertexBuffersAllocationToken[stream].m_offset
+		bufferOffset = m_lods[lod].m_vertexBuffersAllocationToken[stream].getOffset()
 					   + m_lods[lod].m_fixedUnifiedGeometryBufferOffset[stream];
 		vertexCount = m_lods[lod].m_vertexCount;
 	}
@@ -103,8 +103,8 @@ private:
 	class Lod
 	{
 	public:
-		SegregatedListsGpuMemoryPoolToken m_indexBufferAllocationToken;
-		Array<SegregatedListsGpuMemoryPoolToken, U32(VertexStreamId::kMeshRelatedCount)> m_vertexBuffersAllocationToken;
+		UnifiedGeometryBufferAllocation m_indexBufferAllocationToken;
+		Array<UnifiedGeometryBufferAllocation, U32(VertexStreamId::kMeshRelatedCount)> m_vertexBuffersAllocationToken;
 		Array<U8, U32(VertexStreamId::kMeshRelatedCount)> m_fixedUnifiedGeometryBufferOffset = {};
 
 		U32 m_indexCount = 0;

+ 0 - 1
AnKi/Resource/ResourceManager.h

@@ -21,7 +21,6 @@ class AsyncLoader;
 class ResourceManagerModel;
 class ShaderCompilerCache;
 class ShaderProgramResourceSystem;
-class UnifiedGeometryMemoryPool;
 
 /// @addtogroup resource
 /// @{

+ 0 - 2
AnKi/Scene/Common.h

@@ -17,8 +17,6 @@ namespace anki {
 class ResourceManager;
 class Input;
 class UiManager;
-class UnifiedGeometryMemoryPool;
-class GpuSceneMemoryPool;
 class GpuSceneMicroPatcher;
 class ScriptManager;
 class GrManager;

+ 3 - 9
AnKi/Scene/Components/DecalComponent.cpp

@@ -14,16 +14,12 @@ DecalComponent::DecalComponent(SceneNode* node)
 	: SceneComponent(node, getStaticClassId())
 	, m_spatial(this)
 {
-	m_gpuSceneIndex =
-		SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(GpuSceneContiguousArrayType::kDecals);
+	m_gpuSceneIndex = AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kDecals);
 }
 
 DecalComponent::~DecalComponent()
 {
 	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
-
-	SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(GpuSceneContiguousArrayType::kDecals,
-																			 m_gpuSceneIndex);
 }
 
 void DecalComponent::setLayer(CString fname, F32 blendFactor, LayerType type)
@@ -90,10 +86,8 @@ Error DecalComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 		gpuDecal.m_obbExtend = m_obb.getExtend().xyz();
 
-		const PtrSize offset = m_gpuSceneIndex * sizeof(GpuSceneDecal)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kDecals);
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(gpuDecal), &gpuDecal);
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneIndex.getOffsetInGpuScene(),
+													 gpuDecal);
 	}
 
 	const Bool spatialUpdated = m_spatial.update(SceneGraph::getSingleton().getOctree());

+ 2 - 2
AnKi/Scene/Components/DecalComponent.h

@@ -67,7 +67,7 @@ public:
 		el.m_obbCenter = m_obb.getCenter().xyz();
 		el.m_obbExtend = m_obb.getExtend().xyz();
 		el.m_obbRotation = m_obb.getRotation().getRotationPart();
-		el.m_index = m_gpuSceneIndex;
+		el.m_index = m_gpuSceneIndex.get();
 	}
 
 private:
@@ -93,7 +93,7 @@ private:
 	Vec3 m_boxSize = Vec3(1.0f);
 	Obb m_obb = Obb(Vec4(0.0f), Mat3x4::getIdentity(), Vec4(0.5f, 0.5f, 0.5f, 0.0f));
 
-	U32 m_gpuSceneIndex = kMaxU32;
+	GpuSceneContiguousArrayIndex m_gpuSceneIndex;
 
 	Bool m_dirty = true;
 

+ 4 - 9
AnKi/Scene/Components/FogDensityComponent.cpp

@@ -13,16 +13,13 @@ FogDensityComponent::FogDensityComponent(SceneNode* node)
 	: SceneComponent(node, getStaticClassId())
 	, m_spatial(this)
 {
-	m_gpuSceneIndex = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-		GpuSceneContiguousArrayType::kFogDensityVolumes);
+	m_gpuSceneIndex =
+		AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kFogDensityVolumes);
 }
 
 FogDensityComponent ::~FogDensityComponent()
 {
 	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
-
-	SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-		GpuSceneContiguousArrayType::kFogDensityVolumes, m_gpuSceneIndex);
 }
 
 Error FogDensityComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
@@ -61,10 +58,8 @@ Error FogDensityComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		gpuVolume.m_isBox = m_isBox;
 		gpuVolume.m_density = m_density;
 
-		const PtrSize offset = m_gpuSceneIndex * sizeof(GpuSceneFogDensityVolume)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kFogDensityVolumes);
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(gpuVolume), &gpuVolume);
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneIndex.getOffsetInGpuScene(),
+													 gpuVolume);
 	}
 
 	const Bool spatialUpdated = m_spatial.update(SceneGraph::getSingleton().getOctree());

+ 2 - 3
AnKi/Scene/Components/FogDensityComponent.h

@@ -92,8 +92,7 @@ public:
 			el.m_sphereCenter = m_worldPos.xyz();
 			el.m_sphereRadius = m_sphereRadius;
 		}
-		ANKI_ASSERT(m_gpuSceneIndex != kMaxU32);
-		el.m_index = m_gpuSceneIndex;
+		el.m_index = m_gpuSceneIndex.get();
 	}
 
 private:
@@ -110,7 +109,7 @@ private:
 	Vec3 m_worldPos = Vec3(0.0f);
 	F32 m_density = 1.0f;
 
-	U32 m_gpuSceneIndex = kMaxU32;
+	GpuSceneContiguousArrayIndex m_gpuSceneIndex;
 
 	Bool m_isBox = true;
 	Bool m_dirty = true;

+ 4 - 9
AnKi/Scene/Components/GlobalIlluminationProbeComponent.cpp

@@ -26,8 +26,8 @@ GlobalIlluminationProbeComponent::GlobalIlluminationProbeComponent(SceneNode* no
 		m_frustums[i].update();
 	}
 
-	m_gpuSceneIndex = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-		GpuSceneContiguousArrayType::kGlobalIlluminationProbes);
+	m_gpuSceneIndex =
+		AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kGlobalIlluminationProbes);
 
 	const Error err = ResourceManager::getSingleton().loadResource("ShaderBinaries/ClearTextureCompute.ankiprogbin",
 																   m_clearTextureProg);
@@ -40,9 +40,6 @@ GlobalIlluminationProbeComponent::GlobalIlluminationProbeComponent(SceneNode* no
 GlobalIlluminationProbeComponent::~GlobalIlluminationProbeComponent()
 {
 	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
-
-	SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-		GpuSceneContiguousArrayType::kGlobalIlluminationProbes, m_gpuSceneIndex);
 }
 
 Error GlobalIlluminationProbeComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
@@ -123,10 +120,8 @@ Error GlobalIlluminationProbeComponent::update(SceneComponentUpdateInfo& info, B
 		gpuProbe.m_halfTexelSizeU = 1.0f / (F32(m_cellCounts.y()) * 6.0f) / 2.0f;
 		gpuProbe.m_fadeDistance = m_fadeDistance;
 
-		const PtrSize offset = m_gpuSceneIndex * sizeof(GpuSceneGlobalIlluminationProbe)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kGlobalIlluminationProbes);
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(gpuProbe), &gpuProbe);
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneIndex.getOffsetInGpuScene(),
+													 gpuProbe);
 	}
 
 	if(needsRefresh()) [[unlikely]]

+ 2 - 2
AnKi/Scene/Components/GlobalIlluminationProbeComponent.h

@@ -77,7 +77,7 @@ public:
 		el.m_cellSizes = (m_halfSize * 2.0f) / Vec3(m_cellCounts);
 		el.m_fadeDistance = m_fadeDistance;
 		el.m_volumeTextureBindlessIndex = m_volTexBindlessIdx;
-		el.m_index = m_gpuSceneIndex;
+		el.m_index = m_gpuSceneIndex.get();
 	}
 
 	void setupGlobalIlluminationProbeQueueElementForRefresh(GlobalIlluminationProbeQueueElementForRefresh& el)
@@ -111,7 +111,7 @@ private:
 	TextureViewPtr m_volView;
 	U32 m_volTexBindlessIdx = 0;
 
-	U32 m_gpuSceneIndex = kMaxU32;
+	GpuSceneContiguousArrayIndex m_gpuSceneIndex;
 
 	Array<Frustum, 6> m_frustums;
 

+ 12 - 38
AnKi/Scene/Components/LightComponent.cpp

@@ -33,24 +33,6 @@ LightComponent::~LightComponent()
 {
 	deleteArray(SceneMemoryPool::getSingleton(), m_frustums, m_frustumCount);
 	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
-
-	if(m_gpuSceneLightIndex != kMaxU32)
-	{
-		if(m_type == LightComponentType::kPoint)
-		{
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-				GpuSceneContiguousArrayType::kPointLights, m_gpuSceneLightIndex);
-		}
-		else if(m_type == LightComponentType::kSpot)
-		{
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-				GpuSceneContiguousArrayType::kSpotLights, m_gpuSceneLightIndex);
-		}
-		else
-		{
-			ANKI_ASSERT(0);
-		}
-	}
 }
 
 void LightComponent::setLightComponentType(LightComponentType type)
@@ -70,24 +52,20 @@ void LightComponent::setLightComponentType(LightComponentType type)
 		m_spatial.setUpdatesOctreeBounds(true);
 	}
 
-	if(m_typeChanged && m_gpuSceneLightIndex != kMaxU32)
+	if(m_typeChanged)
 	{
-		SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-			(m_type == LightComponentType::kPoint) ? GpuSceneContiguousArrayType::kPointLights
-												   : GpuSceneContiguousArrayType::kSpotLights,
-			m_gpuSceneLightIndex);
-		m_gpuSceneLightIndex = kMaxU32;
+		AllGpuSceneContiguousArrays::getSingleton().deferredFree(m_gpuSceneLightIndex);
 	}
 
-	if(m_gpuSceneLightIndex == kMaxU32 && type == LightComponentType::kPoint)
+	if(!m_gpuSceneLightIndex.isValid() && type == LightComponentType::kPoint)
 	{
-		m_gpuSceneLightIndex = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-			GpuSceneContiguousArrayType::kPointLights);
+		m_gpuSceneLightIndex =
+			AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kPointLights);
 	}
-	else if(m_gpuSceneLightIndex == kMaxU32 && type == LightComponentType::kSpot)
+	else if(!m_gpuSceneLightIndex.isValid() && type == LightComponentType::kSpot)
 	{
-		m_gpuSceneLightIndex = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-			GpuSceneContiguousArrayType::kSpotLights);
+		m_gpuSceneLightIndex =
+			AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kSpotLights);
 	}
 
 	m_type = type;
@@ -154,10 +132,8 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		gpuLight.m_diffuseColor = m_diffColor.xyz();
 		gpuLight.m_squareRadiusOverOne = 1.0f / (m_point.m_radius * m_point.m_radius);
 		gpuLight.m_shadow = m_shadow;
-		const PtrSize offset = m_gpuSceneLightIndex * sizeof(GpuScenePointLight)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kPointLights);
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(gpuLight), &gpuLight);
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneLightIndex.getOffsetInGpuScene(),
+													 gpuLight);
 	}
 	else if(updated && m_type == LightComponentType::kSpot)
 	{
@@ -223,10 +199,8 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		gpuLight.m_shadow = m_shadow;
 		gpuLight.m_outerCos = cos(m_spot.m_outerAngle / 2.0f);
 		gpuLight.m_innerCos = cos(m_spot.m_innerAngle / 2.0f);
-		const PtrSize offset = m_gpuSceneLightIndex * sizeof(GpuSceneSpotLight)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kSpotLights);
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(gpuLight), &gpuLight);
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneLightIndex.getOffsetInGpuScene(),
+													 gpuLight);
 	}
 	else if(m_type == LightComponentType::kDirectional)
 	{

+ 3 - 5
AnKi/Scene/Components/LightComponent.h

@@ -124,8 +124,7 @@ public:
 		el.m_radius = m_point.m_radius;
 		el.m_diffuseColor = m_diffColor.xyz();
 		el.m_shadowLayer = kMaxU8;
-		ANKI_ASSERT(m_gpuSceneLightIndex != kMaxU32);
-		el.m_index = m_gpuSceneLightIndex;
+		el.m_index = m_gpuSceneLightIndex.get();
 	}
 
 	void setupSpotLightQueueElement(SpotLightQueueElement& el) const
@@ -140,8 +139,7 @@ public:
 		el.m_diffuseColor = m_diffColor.xyz();
 		el.m_edgePoints = m_spot.m_edgePointsWspace;
 		el.m_shadowLayer = kMaxU8;
-		ANKI_ASSERT(m_gpuSceneLightIndex != kMaxU32);
-		el.m_index = m_gpuSceneLightIndex;
+		el.m_index = m_gpuSceneLightIndex.get();
 	}
 
 	/// Setup a directional queue element.
@@ -187,7 +185,7 @@ private:
 
 	Frustum* m_frustums = nullptr;
 
-	U32 m_gpuSceneLightIndex = kMaxU32;
+	GpuSceneContiguousArrayIndex m_gpuSceneLightIndex;
 
 	LightComponentType m_type;
 

+ 148 - 149
AnKi/Scene/Components/ModelComponent.cpp

@@ -18,40 +18,37 @@ ModelComponent::ModelComponent(SceneNode* node)
 	, m_node(node)
 	, m_spatial(this)
 {
-	m_gpuSceneTransformsIndex = U32(SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-		GpuSceneContiguousArrayType::kTransformPairs));
+	m_gpuSceneIndexTransforms =
+		AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kTransformPairs);
 }
 
 ModelComponent::~ModelComponent()
 {
-	GpuSceneMemoryPool::getSingleton().deferredFree(m_gpuSceneUniforms);
+	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
+}
+
+void ModelComponent::freeGpuScene()
+{
+	GpuSceneBuffer::getSingleton().deferredFree(m_gpuSceneUniforms);
+
+	AllGpuSceneContiguousArrays& arr = AllGpuSceneContiguousArrays::getSingleton();
 
-	for(const PatchInfo& patch : m_patchInfos)
+	for(PatchInfo& patch : m_patchInfos)
 	{
-		if(patch.m_gpuSceneMeshLodsIndex != kMaxU32)
-		{
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-				GpuSceneContiguousArrayType::kMeshLods, patch.m_gpuSceneMeshLodsIndex);
-		}
+		arr.deferredFree(patch.m_gpuSceneIndexMeshLods);
+
+		arr.deferredFree(patch.m_gpuSceneIndexRenderable);
 
-		if(patch.m_gpuSceneRenderableIndex != kMaxU32)
+		for(GpuSceneContiguousArrayIndex& idx : patch.m_gpuSceneIndexRenderableAabbs)
 		{
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-				GpuSceneContiguousArrayType::kRenderablesGBuffer, patch.m_gpuSceneRenderableIndex);
+			arr.deferredFree(idx);
 		}
 
-		if(patch.m_gpuSceneRenderableBoundingVolumeIndex != kMaxU32)
+		for(RenderingTechnique t : EnumIterable<RenderingTechnique>())
 		{
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-				GpuSceneContiguousArrayType::kRenderableBoundingVolumesGBuffer,
-				patch.m_gpuSceneRenderableBoundingVolumeIndex);
+			RenderStateBucketContainer::getSingleton().removeUser(patch.m_renderStateBucketIndices[t]);
 		}
 	}
-
-	SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-		GpuSceneContiguousArrayType::kTransformPairs, m_gpuSceneTransformsIndex);
-
-	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
 }
 
 void ModelComponent::loadModelResource(CString filename)
@@ -64,77 +61,73 @@ void ModelComponent::loadModelResource(CString filename)
 		return;
 	}
 
-	m_dirty = true;
+	m_resourceChanged = true;
 
 	m_model = std::move(rsrc);
 	const U32 modelPatchCount = m_model->getModelPatches().getSize();
 
-	// GPU scene allocations
-	for(const PatchInfo& patch : m_patchInfos)
-	{
-		if(patch.m_gpuSceneMeshLodsIndex != kMaxU32)
-		{
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-				GpuSceneContiguousArrayType::kMeshLods, patch.m_gpuSceneMeshLodsIndex);
-		}
-
-		if(patch.m_gpuSceneRenderableIndex != kMaxU32)
-		{
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-				GpuSceneContiguousArrayType::kRenderablesGBuffer, patch.m_gpuSceneRenderableIndex);
-		}
-
-		if(patch.m_gpuSceneRenderableBoundingVolumeIndex != kMaxU32)
-		{
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-				GpuSceneContiguousArrayType::kRenderableBoundingVolumesGBuffer,
-				patch.m_gpuSceneRenderableBoundingVolumeIndex);
-		}
-	}
-
+	// Init
+	freeGpuScene();
 	m_patchInfos.resize(modelPatchCount);
-	for(U32 i = 0; i < modelPatchCount; ++i)
-	{
-		m_patchInfos[i].m_gpuSceneMeshLodsIndex = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-			GpuSceneContiguousArrayType::kMeshLods);
-
-		m_patchInfos[i].m_gpuSceneRenderableIndex =
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-				GpuSceneContiguousArrayType::kRenderablesGBuffer);
-
-		m_patchInfos[i].m_gpuSceneRenderableBoundingVolumeIndex =
-			SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-				GpuSceneContiguousArrayType::kRenderableBoundingVolumesGBuffer);
-	}
+	m_presentRenderingTechniques = RenderingTechniqueBit::kNone;
 
+	// Allocate all uniforms so you can make one allocation
 	U32 uniformsSize = 0;
 	for(U32 i = 0; i < modelPatchCount; ++i)
 	{
-		m_patchInfos[i].m_gpuSceneUniformsOffset = uniformsSize;
-
 		const U32 size = U32(m_model->getModelPatches()[i].getMaterial()->getPrefilledLocalUniforms().getSizeInBytes());
 		ANKI_ASSERT((size % 4) == 0);
 		uniformsSize += size;
 	}
 
-	GpuSceneMemoryPool::getSingleton().deferredFree(m_gpuSceneUniforms);
-	GpuSceneMemoryPool::getSingleton().allocate(uniformsSize, 4, m_gpuSceneUniforms);
+	GpuSceneBuffer::getSingleton().allocate(uniformsSize, 4, m_gpuSceneUniforms);
+	uniformsSize = 0;
 
+	// Init the patches
 	for(U32 i = 0; i < modelPatchCount; ++i)
 	{
-		m_patchInfos[i].m_gpuSceneUniformsOffset += U32(m_gpuSceneUniforms.m_offset);
-	}
+		PatchInfo& out = m_patchInfos[i];
+		const ModelPatch& in = m_model->getModelPatches()[i];
 
-	// Some other per-patch init
-	m_presentRenderingTechniques = RenderingTechniqueBit::kNone;
-	m_castsShadow = false;
-	for(U32 i = 0; i < modelPatchCount; ++i)
-	{
-		m_patchInfos[i].m_techniques = m_model->getModelPatches()[i].getMaterial()->getRenderingTechniques();
+		out.m_techniques = in.getMaterial()->getRenderingTechniques();
+		m_castsShadow = m_castsShadow || in.getMaterial()->castsShadow();
+		m_presentRenderingTechniques |= in.getMaterial()->getRenderingTechniques();
+
+		out.m_gpuSceneUniformsOffset = m_gpuSceneUniforms.getOffset() + uniformsSize;
+		uniformsSize += U32(in.getMaterial()->getPrefilledLocalUniforms().getSizeInBytes());
+
+		out.m_gpuSceneIndexMeshLods =
+			AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kMeshLods);
 
-		m_castsShadow = m_castsShadow || m_model->getModelPatches()[i].getMaterial()->castsShadow();
+		out.m_gpuSceneIndexRenderable =
+			AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kRenderables);
 
-		m_presentRenderingTechniques |= m_model->getModelPatches()[i].getMaterial()->getRenderingTechniques();
+		for(RenderingTechnique t : EnumIterable<RenderingTechnique>())
+		{
+			if(!(RenderingTechniqueBit(1 << t) & out.m_techniques)
+			   || !!(RenderingTechniqueBit(1 << t) & RenderingTechniqueBit::kAllRt))
+			{
+				continue;
+			}
+
+			GpuSceneContiguousArrayType allocType = GpuSceneContiguousArrayType::kCount;
+			switch(t)
+			{
+			case RenderingTechnique::kGBuffer:
+				allocType = GpuSceneContiguousArrayType::kRenderableBoundingVolumesGBuffer;
+				break;
+			case RenderingTechnique::kForward:
+				allocType = GpuSceneContiguousArrayType::kRenderableBoundingVolumesForward;
+				break;
+			case RenderingTechnique::kDepth:
+				allocType = GpuSceneContiguousArrayType::kRenderableBoundingVolumesDepth;
+				break;
+			default:
+				ANKI_ASSERT(0);
+			}
+
+			out.m_gpuSceneIndexRenderableAabbs[t] = AllGpuSceneContiguousArrays::getSingleton().allocate(allocType);
+		}
 	}
 }
 
@@ -146,20 +139,17 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		return Error::kNone;
 	}
 
-	const AllGpuSceneContiguousArrays& gpuArrays = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays();
-
-	const Bool resourceUpdated = m_dirty;
-	m_dirty = false;
+	const Bool resourceUpdated = m_resourceChanged;
+	m_resourceChanged = false;
 	const Bool moved = info.m_node->movedThisFrame() || m_firstTimeUpdate;
 	const Bool movedLastFrame = m_movedLastFrame || m_firstTimeUpdate;
 	m_firstTimeUpdate = false;
 	m_movedLastFrame = moved;
+	const Bool hasSkin = m_skinComponent != nullptr && m_skinComponent->isEnabled();
 
 	updated = resourceUpdated || moved || movedLastFrame;
 
-	const Bool hasSkin = m_skinComponent != nullptr && m_skinComponent->isEnabled();
-
-	// Upload mesh LODs and uniforms
+	// Upload GpuSceneMeshLod, uniforms and GpuSceneRenderable
 	if(resourceUpdated) [[unlikely]]
 	{
 		// Upload the mesh views
@@ -212,34 +202,22 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 				meshLods[l] = meshLods[l - 1];
 			}
 
-			PtrSize offset = m_patchInfos[i].m_gpuSceneMeshLodsIndex * sizeof(meshLods)
-							 + gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kMeshLods);
-			GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, meshLods.getSizeInBytes(),
-														 &meshLods[0]);
+			GpuSceneMicroPatcher::getSingleton().newCopy(
+				*info.m_framePool, m_patchInfos[i].m_gpuSceneIndexMeshLods.getOffsetInGpuScene(), meshLods);
 
 			// Upload the GpuSceneRenderable
 			GpuSceneRenderable gpuRenderable;
-
-			gpuRenderable.m_worldTransformsOffset =
-				U32(m_gpuSceneTransformsIndex * sizeof(Mat3x4) * 2
-					+ gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kTransformPairs));
+			gpuRenderable.m_worldTransformsOffset = m_gpuSceneIndexTransforms.getOffsetInGpuScene();
 			gpuRenderable.m_uniformsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
-
-			gpuRenderable.m_geometryOffset =
-				U32(m_patchInfos[i].m_gpuSceneMeshLodsIndex * sizeof(GpuSceneMeshLod) * kMaxLodCount
-					+ gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kMeshLods));
-
+			gpuRenderable.m_geometryOffset = m_patchInfos[i].m_gpuSceneIndexMeshLods.getOffsetInGpuScene();
 			gpuRenderable.m_boneTransformsOffset = (hasSkin) ? m_skinComponent->getBoneTransformsGpuSceneOffset() : 0;
-
-			offset = m_patchInfos[i].m_gpuSceneRenderableIndex * sizeof(GpuSceneRenderable)
-					 + gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kRenderablesGBuffer);
-			GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(gpuRenderable),
-														 &gpuRenderable);
+			GpuSceneMicroPatcher::getSingleton().newCopy(
+				*info.m_framePool, m_patchInfos[i].m_gpuSceneIndexRenderable.getOffsetInGpuScene(), gpuRenderable);
 		}
 
 		// Upload the uniforms
 		DynamicArray<U32, MemoryPoolPtrWrapper<StackMemoryPool>> allUniforms(info.m_framePool);
-		allUniforms.resize(U32(m_gpuSceneUniforms.m_size / 4));
+		allUniforms.resize(m_gpuSceneUniforms.getAllocatedSize() / 4);
 		U32 count = 0;
 		for(U32 i = 0; i < modelPatchCount; ++i)
 		{
@@ -251,23 +229,9 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 			count += U32(mtl.getPrefilledLocalUniforms().getSizeInBytes() / 4);
 		}
 
-		ANKI_ASSERT(count * 4 == m_gpuSceneUniforms.m_size);
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneUniforms.m_offset,
-													 m_gpuSceneUniforms.m_size, &allUniforms[0]);
-
-		// Refresh the render state buckets
-		for(U32 i = 0; i < modelPatchCount; ++i)
-		{
-			for(RenderingTechnique t : EnumIterable<RenderingTechnique>())
-			{
-				for(U32 velocity = 0; velocity < 1; ++velocity)
-				{
-					RenderStateBucketContainer& buckets = SceneGraph::getSingleton().getRenderStateBuckets();
-
-					buckets.removeUser(t, m_patchInfos[i].m_renderStateBucketIndices[t][velocity]);
-				}
-			}
-		}
+		ANKI_ASSERT(count * 4 == m_gpuSceneUniforms.getAllocatedSize());
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneUniforms.getOffset(),
+													 m_gpuSceneUniforms.getAllocatedSize(), &allUniforms[0]);
 	}
 
 	// Upload transforms
@@ -276,15 +240,13 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		Array<Mat3x4, 2> trfs;
 		trfs[0] = Mat3x4(info.m_node->getWorldTransform());
 		trfs[1] = Mat3x4(info.m_node->getPreviousWorldTransform());
-
-		const PtrSize offset = m_gpuSceneTransformsIndex * sizeof(trfs)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kTransformPairs);
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(trfs), &trfs[0]);
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneIndexTransforms.getOffsetInGpuScene(),
+													 trfs);
 	}
 
 	// Spatial update
-	if(moved || resourceUpdated || m_skinComponent) [[unlikely]]
+	const Bool spatialNeedsUpdate = moved || resourceUpdated || m_skinComponent;
+	if(spatialNeedsUpdate) [[unlikely]]
 	{
 		Aabb aabbLocal;
 		if(m_skinComponent == nullptr) [[likely]]
@@ -300,28 +262,71 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		const Aabb aabbWorld = aabbLocal.getTransformed(info.m_node->getWorldTransform());
 
 		m_spatial.setBoundingShape(aabbWorld);
+	}
 
-		// Upload the GpuSceneRenderableBoundingVolume to the GPU scene
+	const Bool spatialUpdated = m_spatial.update(SceneGraph::getSingleton().getOctree());
+	updated = updated || spatialUpdated;
+
+	// Update the buckets
+	const Bool bucketsNeedUpdate = resourceUpdated || moved != movedLastFrame;
+	if(bucketsNeedUpdate)
+	{
 		const U32 modelPatchCount = m_model->getModelPatches().getSize();
 		for(U32 i = 0; i < modelPatchCount; ++i)
 		{
-			GpuSceneRenderableBoundingVolume gpuVolume;
-			gpuVolume.m_aabbMin = aabbWorld.getMin().xyz();
-			gpuVolume.m_aabbMax = aabbWorld.getMax().xyz();
-			gpuVolume.m_renderableOffset = m_patchInfos[i].m_gpuSceneRenderableIndex * sizeof(GpuSceneRenderable)
-										   + gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kRenderablesGBuffer);
-			gpuVolume.m_renderStateBucket = 0; // TODO
-
-			const PtrSize offset =
-				U32(m_patchInfos[i].m_gpuSceneRenderableBoundingVolumeIndex * sizeof(GpuSceneRenderableBoundingVolume)
-					+ gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kRenderableBoundingVolumesGBuffer));
-
-			GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(gpuVolume), &gpuVolume);
+			// Refresh the render state buckets
+			for(RenderingTechnique t : EnumIterable<RenderingTechnique>())
+			{
+				RenderStateBucketContainer::getSingleton().removeUser(m_patchInfos[i].m_renderStateBucketIndices[t]);
+
+				if(!(RenderingTechniqueBit(1 << t) & m_patchInfos[i].m_techniques))
+				{
+					continue;
+				}
+
+				// Fill the state
+				RenderingKey key;
+				key.setLod(0); // Materials don't care
+				key.setRenderingTechnique(t);
+				key.setSkinned(hasSkin);
+				key.setVelocity(moved);
+
+				const MaterialVariant& mvariant = m_model->getModelPatches()[i].getMaterial()->getOrCreateVariant(key);
+
+				RenderStateInfo state;
+				state.m_primitiveTopology = PrimitiveTopology::kTriangles;
+				state.m_indexedDrawcall = true;
+				state.m_program = mvariant.getShaderProgram();
+
+				m_patchInfos[i].m_renderStateBucketIndices[t] =
+					RenderStateBucketContainer::getSingleton().addUser(state, t);
+			}
 		}
 	}
 
-	const Bool spatialUpdated = m_spatial.update(SceneGraph::getSingleton().getOctree());
-	updated = updated || spatialUpdated;
+	// Upload the AABBs to the GPU scene
+	const Bool gpuSceneAabbsNeedUpdate = spatialNeedsUpdate || bucketsNeedUpdate;
+	if(gpuSceneAabbsNeedUpdate)
+	{
+		const U32 modelPatchCount = m_model->getModelPatches().getSize();
+		for(U32 i = 0; i < modelPatchCount; ++i)
+		{
+			GpuSceneRenderableAabb gpuVolume;
+			gpuVolume.m_aabbMin = m_spatial.getAabbWorldSpace().getMin().xyz();
+			gpuVolume.m_aabbMax = m_spatial.getAabbWorldSpace().getMax().xyz();
+			gpuVolume.m_renderableOffset = m_patchInfos[i].m_gpuSceneIndexRenderable.getOffsetInGpuScene();
+
+			for(RenderingTechnique t :
+				EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(m_patchInfos[i].m_techniques))
+			{
+				gpuVolume.m_renderStateBucket = m_patchInfos[i].m_renderStateBucketIndices[t].get();
+
+				GpuSceneMicroPatcher::getSingleton().newCopy(
+					*info.m_framePool, m_patchInfos[i].m_gpuSceneIndexRenderableAabbs[t].getOffsetInGpuScene(),
+					gpuVolume);
+			}
+		}
+	}
 
 	return Error::kNone;
 }
@@ -382,16 +387,11 @@ void ModelComponent::setupRenderableQueueElements(U32 lod, RenderingTechnique te
 		ModelRenderingInfo modelInf;
 		patch.getRenderingInfo(key, modelInf);
 
-		AllGpuSceneContiguousArrays& gpuArrays = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays();
-
 		queueElem.m_program = modelInf.m_program.get();
-		queueElem.m_worldTransformsOffset = U32(m_gpuSceneTransformsIndex * sizeof(Mat3x4) * 2
-												+ gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kTransformPairs));
+		queueElem.m_worldTransformsOffset = m_gpuSceneIndexTransforms.getOffsetInGpuScene();
 		queueElem.m_uniformsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
 		queueElem.m_geometryOffset =
-			U32(m_patchInfos[i].m_gpuSceneMeshLodsIndex * sizeof(GpuSceneMeshLod) * kMaxLodCount
-				+ lod * sizeof(GpuSceneMeshLod));
-		queueElem.m_geometryOffset += U32(gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kMeshLods));
+			m_patchInfos[i].m_gpuSceneIndexMeshLods.getOffsetInGpuScene() + lod * sizeof(GpuSceneMeshLod);
 		queueElem.m_boneTransformsOffset = (hasSkin) ? m_skinComponent->getBoneTransformsGpuSceneOffset() : 0;
 		queueElem.m_indexCount = modelInf.m_indexCount;
 		queueElem.m_firstIndex = U32(modelInf.m_indexBufferOffset / 2 + modelInf.m_firstIndex);
@@ -454,18 +454,17 @@ void ModelComponent::setupRayTracingInstanceQueueElements(U32 lod, RenderingTech
 
 		const ModelPatch& patch = m_model->getModelPatches()[i];
 
-		AllGpuSceneContiguousArrays& gpuArrays = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays();
+		AllGpuSceneContiguousArrays& gpuArrays = AllGpuSceneContiguousArrays::getSingleton();
 
 		ModelRayTracingInfo modelInf;
 		patch.getRayTracingInfo(key, modelInf);
 
 		queueElem.m_bottomLevelAccelerationStructure = modelInf.m_bottomLevelAccelerationStructure.get();
 		queueElem.m_shaderGroupHandleIndex = modelInf.m_shaderGroupHandleIndex;
-		queueElem.m_worldTransformsOffset = U32(m_gpuSceneTransformsIndex * sizeof(Mat3x4) * 2
-												+ gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kTransformPairs));
+		queueElem.m_worldTransformsOffset = m_gpuSceneIndexTransforms.getOffsetInGpuScene();
 		queueElem.m_uniformsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
 		queueElem.m_geometryOffset =
-			U32(m_patchInfos[i].m_gpuSceneMeshLodsIndex * sizeof(GpuSceneMeshLod) * kMaxLodCount
+			U32(m_patchInfos[i].m_gpuSceneIndexMeshLods.get() * sizeof(GpuSceneMeshLod) * kMaxLodCount
 				+ lod * sizeof(GpuSceneMeshLod));
 		queueElem.m_geometryOffset += U32(gpuArrays.getArrayBase(GpuSceneContiguousArrayType::kMeshLods));
 		queueElem.m_indexBufferOffset = U32(modelInf.m_indexBufferOffset);
@@ -491,12 +490,12 @@ void ModelComponent::onOtherComponentRemovedOrAdded(SceneComponent* other, Bool
 	if(added && !alreadyHasSkinComponent)
 	{
 		m_skinComponent = static_cast<SkinComponent*>(other);
-		m_dirty = true;
+		m_resourceChanged = true;
 	}
 	else if(!added && other == m_skinComponent)
 	{
 		m_skinComponent = nullptr;
-		m_dirty = true;
+		m_resourceChanged = true;
 	}
 }
 

+ 12 - 7
AnKi/Scene/Components/ModelComponent.h

@@ -55,10 +55,13 @@ private:
 	{
 	public:
 		U32 m_gpuSceneUniformsOffset = kMaxU32;
-		U32 m_gpuSceneMeshLodsIndex = kMaxU32;
-		U32 m_gpuSceneRenderableIndex = kMaxU32;
-		U32 m_gpuSceneRenderableBoundingVolumeIndex = kMaxU32;
-		Array2d<RenderStateBucketIndex, U32(RenderingTechnique::kCount), 2> m_renderStateBucketIndices;
+
+		GpuSceneContiguousArrayIndex m_gpuSceneIndexMeshLods;
+
+		GpuSceneContiguousArrayIndex m_gpuSceneIndexRenderable;
+		Array<GpuSceneContiguousArrayIndex, U32(RenderingTechnique::kCount)> m_gpuSceneIndexRenderableAabbs;
+
+		Array<RenderStateBucketIndex, U32(RenderingTechnique::kCount)> m_renderStateBucketIndices;
 		RenderingTechniqueBit m_techniques;
 	};
 
@@ -68,17 +71,19 @@ private:
 
 	ModelResourcePtr m_model;
 
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneUniforms;
-	U32 m_gpuSceneTransformsIndex = kMaxU32;
+	GpuSceneBufferAllocation m_gpuSceneUniforms;
+	GpuSceneContiguousArrayIndex m_gpuSceneIndexTransforms;
 	SceneDynamicArray<PatchInfo> m_patchInfos;
 
-	Bool m_dirty : 1 = true;
+	Bool m_resourceChanged : 1 = true;
 	Bool m_castsShadow : 1 = false;
 	Bool m_movedLastFrame : 1 = true;
 	Bool m_firstTimeUpdate : 1 = true; ///< Extra flag in case the component is added in a node that hasn't been moved.
 
 	RenderingTechniqueBit m_presentRenderingTechniques = RenderingTechniqueBit::kNone;
 
+	void freeGpuScene();
+
 	Error update(SceneComponentUpdateInfo& info, Bool& updated);
 
 	void onOtherComponentRemovedOrAdded(SceneComponent* other, Bool added);

+ 105 - 54
AnKi/Scene/Components/ParticleEmitterComponent.cpp

@@ -195,25 +195,12 @@ public:
 
 ParticleEmitterComponent::ParticleEmitterComponent(SceneNode* node)
 	: SceneComponent(node, getStaticClassId())
-	, m_node(node)
 	, m_spatial(this)
 {
 }
 
 ParticleEmitterComponent::~ParticleEmitterComponent()
 {
-	GpuSceneMemoryPool& gpuScenePool = GpuSceneMemoryPool::getSingleton();
-	gpuScenePool.deferredFree(m_gpuScenePositions);
-	gpuScenePool.deferredFree(m_gpuSceneScales);
-	gpuScenePool.deferredFree(m_gpuSceneAlphas);
-	gpuScenePool.deferredFree(m_gpuSceneUniforms);
-
-	if(m_gpuSceneIndex != kMaxU32)
-	{
-		SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-			GpuSceneContiguousArrayType::kParticleEmitters, m_gpuSceneIndex);
-	}
-
 	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
 }
 
@@ -236,16 +223,21 @@ void ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 	// Cleanup
 	m_simpleParticles.destroy();
 	m_physicsParticles.destroy();
-	GpuSceneMemoryPool& gpuScenePool = GpuSceneMemoryPool::getSingleton();
-	gpuScenePool.deferredFree(m_gpuScenePositions);
-	gpuScenePool.deferredFree(m_gpuSceneScales);
-	gpuScenePool.deferredFree(m_gpuSceneAlphas);
-	gpuScenePool.deferredFree(m_gpuSceneUniforms);
+	GpuSceneBuffer::getSingleton().deferredFree(m_gpuScenePositions);
+	GpuSceneBuffer::getSingleton().deferredFree(m_gpuSceneScales);
+	GpuSceneBuffer::getSingleton().deferredFree(m_gpuSceneAlphas);
+	GpuSceneBuffer::getSingleton().deferredFree(m_gpuSceneUniforms);
+
+	AllGpuSceneContiguousArrays::getSingleton().deferredFree(m_gpuSceneIndexParticleEmitter);
+	AllGpuSceneContiguousArrays::getSingleton().deferredFree(m_gpuSceneIndexRenderable);
+	for(GpuSceneContiguousArrayIndex& idx : m_gpuSceneIndexAabbs)
+	{
+		AllGpuSceneContiguousArrays::getSingleton().deferredFree(idx);
+	}
 
-	if(m_gpuSceneIndex != kMaxU32)
+	for(RenderStateBucketIndex& idx : m_renderStateBuckets)
 	{
-		SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-			GpuSceneContiguousArrayType::kParticleEmitters, m_gpuSceneIndex);
+		RenderStateBucketContainer::getSingleton().removeUser(idx);
 	}
 
 	// Init particles
@@ -271,14 +263,57 @@ void ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 	}
 
 	// GPU scene allocations
-	gpuScenePool.allocate(sizeof(Vec3) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuScenePositions);
-	gpuScenePool.allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneAlphas);
-	gpuScenePool.allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneScales);
-	gpuScenePool.allocate(m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(),
-						  alignof(U32), m_gpuSceneUniforms);
-
-	m_gpuSceneIndex = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-		GpuSceneContiguousArrayType::kParticleEmitters);
+	GpuSceneBuffer::getSingleton().allocate(sizeof(Vec3) * m_props.m_maxNumOfParticles, alignof(F32),
+											m_gpuScenePositions);
+	GpuSceneBuffer::getSingleton().allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneAlphas);
+	GpuSceneBuffer::getSingleton().allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneScales);
+	GpuSceneBuffer::getSingleton().allocate(
+		m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(), alignof(U32),
+		m_gpuSceneUniforms);
+
+	m_gpuSceneIndexRenderable =
+		AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kRenderables);
+
+	m_gpuSceneIndexParticleEmitter =
+		AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kParticleEmitters);
+
+	for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(
+			m_particleEmitterResource->getMaterial()->getRenderingTechniques()))
+	{
+		GpuSceneContiguousArrayType allocType = GpuSceneContiguousArrayType::kCount;
+		switch(t)
+		{
+		case RenderingTechnique::kGBuffer:
+			allocType = GpuSceneContiguousArrayType::kRenderableBoundingVolumesGBuffer;
+			break;
+		case RenderingTechnique::kForward:
+			allocType = GpuSceneContiguousArrayType::kRenderableBoundingVolumesForward;
+			break;
+		case RenderingTechnique::kDepth:
+			allocType = GpuSceneContiguousArrayType::kRenderableBoundingVolumesDepth;
+			break;
+		default:
+			ANKI_ASSERT(0);
+		}
+
+		m_gpuSceneIndexAabbs[t] = AllGpuSceneContiguousArrays::getSingleton().allocate(allocType);
+	}
+
+	// Allocate buckets
+	for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(
+			m_particleEmitterResource->getMaterial()->getRenderingTechniques()))
+	{
+		RenderingKey key;
+		key.setRenderingTechnique(t);
+		ShaderProgramPtr prog;
+		m_particleEmitterResource->getRenderingInfo(key, prog);
+
+		RenderStateInfo state;
+		state.m_program = prog;
+		state.m_primitiveTopology = PrimitiveTopology::kTriangles;
+		state.m_indexedDrawcall = false;
+		m_renderStateBuckets[t] = RenderStateBucketContainer::getSingleton().addUser(state, t);
+	}
 }
 
 Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
@@ -297,44 +332,61 @@ Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 	Aabb aabbWorld;
 	if(m_simulationType == SimulationType::kSimple)
 	{
-		simulate(info.m_previousTime, info.m_currentTime, WeakArray<SimpleParticle>(m_simpleParticles), positions,
-				 scales, alphas, aabbWorld);
+		simulate(info.m_previousTime, info.m_currentTime, info.m_node->getWorldTransform(),
+				 WeakArray<SimpleParticle>(m_simpleParticles), positions, scales, alphas, aabbWorld);
 	}
 	else
 	{
 		ANKI_ASSERT(m_simulationType == SimulationType::kPhysicsEngine);
-		simulate(info.m_previousTime, info.m_currentTime, WeakArray<PhysicsParticle>(m_physicsParticles), positions,
-				 scales, alphas, aabbWorld);
+		simulate(info.m_previousTime, info.m_currentTime, info.m_node->getWorldTransform(),
+				 WeakArray<PhysicsParticle>(m_physicsParticles), positions, scales, alphas, aabbWorld);
 	}
 
 	m_spatial.setBoundingShape(aabbWorld);
 	m_spatial.update(SceneGraph::getSingleton().getOctree());
 
-	// Upload to the GPU scene
+	// Upload particles to the GPU scene
 	GpuSceneMicroPatcher& patcher = GpuSceneMicroPatcher::getSingleton();
 	if(m_aliveParticleCount > 0)
 	{
-		patcher.newCopy(*info.m_framePool, m_gpuScenePositions.m_offset, sizeof(Vec3) * m_aliveParticleCount,
-						positions);
-		patcher.newCopy(*info.m_framePool, m_gpuSceneScales.m_offset, sizeof(F32) * m_aliveParticleCount, scales);
-		patcher.newCopy(*info.m_framePool, m_gpuSceneAlphas.m_offset, sizeof(F32) * m_aliveParticleCount, alphas);
+		patcher.newCopy(*info.m_framePool, m_gpuScenePositions, sizeof(Vec3) * m_aliveParticleCount, positions);
+		patcher.newCopy(*info.m_framePool, m_gpuSceneScales, sizeof(F32) * m_aliveParticleCount, scales);
+		patcher.newCopy(*info.m_framePool, m_gpuSceneAlphas, sizeof(F32) * m_aliveParticleCount, alphas);
 	}
 
 	if(m_resourceUpdated)
 	{
+		// Upload GpuSceneParticleEmitter
 		GpuSceneParticleEmitter particles = {};
-		particles.m_vertexOffsets[U32(VertexStreamId::kParticlePosition)] = U32(m_gpuScenePositions.m_offset);
-		particles.m_vertexOffsets[U32(VertexStreamId::kParticleColor)] = U32(m_gpuSceneAlphas.m_offset);
-		particles.m_vertexOffsets[U32(VertexStreamId::kParticleScale)] = U32(m_gpuSceneScales.m_offset);
-
-		const PtrSize offset = m_gpuSceneIndex * sizeof(GpuSceneParticleEmitter)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kParticleEmitters);
-		patcher.newCopy(*info.m_framePool, offset, sizeof(GpuSceneParticleEmitter), &particles);
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticlePosition)] = m_gpuScenePositions.getOffset();
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticleColor)] = m_gpuSceneAlphas.getOffset();
+		particles.m_vertexOffsets[U32(VertexStreamId::kParticleScale)] = m_gpuSceneScales.getOffset();
+		patcher.newCopy(*info.m_framePool, m_gpuSceneIndexParticleEmitter.getOffsetInGpuScene(), particles);
 
-		patcher.newCopy(*info.m_framePool, m_gpuSceneUniforms.m_offset,
+		// Upload uniforms
+		patcher.newCopy(*info.m_framePool, m_gpuSceneUniforms,
 						m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(),
 						m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getBegin());
+
+		// Upload the GpuSceneRenderable
+		GpuSceneRenderable renderable;
+		renderable.m_boneTransformsOffset = 0;
+		renderable.m_geometryOffset = m_gpuSceneIndexParticleEmitter.getOffsetInGpuScene();
+		renderable.m_uniformsOffset = m_gpuSceneUniforms.getOffset();
+		renderable.m_worldTransformsOffset = 0;
+		patcher.newCopy(*info.m_framePool, m_gpuSceneIndexRenderable.getOffsetInGpuScene(), renderable);
+	}
+
+	// Upload the GpuSceneRenderableAabb always
+	for(RenderingTechnique t : EnumBitsIterable<RenderingTechnique, RenderingTechniqueBit>(
+			m_particleEmitterResource->getMaterial()->getRenderingTechniques()))
+	{
+		GpuSceneRenderableAabb aabb;
+		aabb.m_aabbMin = m_spatial.getAabbWorldSpace().getMin().xyz();
+		aabb.m_aabbMax = m_spatial.getAabbWorldSpace().getMax().xyz();
+		aabb.m_renderableOffset = m_gpuSceneIndexRenderable.getOffsetInGpuScene();
+		aabb.m_renderStateBucket = m_renderStateBuckets[t].get();
+		patcher.newCopy(*info.m_framePool, m_gpuSceneIndexAabbs[t].getOffsetInGpuScene(), aabb);
 	}
 
 	m_resourceUpdated = false;
@@ -342,8 +394,9 @@ Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 }
 
 template<typename TParticle>
-void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime, WeakArray<TParticle> particles,
-										Vec3*& positions, F32*& scales, F32*& alphas, Aabb& aabbWorld)
+void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime, const Transform& worldTransform,
+										WeakArray<TParticle> particles, Vec3*& positions, F32*& scales, F32*& alphas,
+										Aabb& aabbWorld)
 {
 	// - Deactivate the dead particles
 	// - Calc the AABB
@@ -427,7 +480,7 @@ void ParticleEmitterComponent::simulate(Second prevUpdateTime, Second crntTime,
 				continue;
 			}
 
-			particle.revive(m_props, m_node->getWorldTransform(), crntTime);
+			particle.revive(m_props, worldTransform, crntTime);
 
 			// do the rest
 			++particleCount;
@@ -467,10 +520,8 @@ void ParticleEmitterComponent::setupRenderableQueueElements(RenderingTechnique t
 	el->m_mergeKey = 0; // Not mergable
 	el->m_program = prog.get();
 	el->m_worldTransformsOffset = 0;
-	el->m_uniformsOffset = U32(m_gpuSceneUniforms.m_offset);
-	el->m_geometryOffset = U32(m_gpuSceneIndex * sizeof(GpuSceneParticleEmitter)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kParticleEmitters));
+	el->m_uniformsOffset = m_gpuSceneUniforms.getOffset();
+	el->m_geometryOffset = m_gpuSceneIndexParticleEmitter.getOffsetInGpuScene();
 	el->m_boneTransformsOffset = 0;
 	el->m_vertexCount = 6 * m_aliveParticleCount;
 	el->m_firstVertex = 0;

+ 12 - 9
AnKi/Scene/Components/ParticleEmitterComponent.h

@@ -7,6 +7,7 @@
 
 #include <AnKi/Scene/Components/SceneComponent.h>
 #include <AnKi/Scene/Spatial.h>
+#include <AnKi/Scene/RenderStateBucket.h>
 #include <AnKi/Resource/ParticleEmitterResource.h>
 #include <AnKi/Collision/Aabb.h>
 #include <AnKi/Util/WeakArray.h>
@@ -51,8 +52,6 @@ private:
 		kPhysicsEngine
 	};
 
-	SceneNode* m_node = nullptr;
-
 	ParticleEmitterProperties m_props;
 
 	Spatial m_spatial;
@@ -63,11 +62,15 @@ private:
 	Second m_timeLeftForNextEmission = 0.0;
 	U32 m_aliveParticleCount = 0;
 
-	SegregatedListsGpuMemoryPoolToken m_gpuScenePositions;
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneAlphas;
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneScales;
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneUniforms;
-	U32 m_gpuSceneIndex = kMaxU32;
+	GpuSceneBufferAllocation m_gpuScenePositions;
+	GpuSceneBufferAllocation m_gpuSceneAlphas;
+	GpuSceneBufferAllocation m_gpuSceneScales;
+	GpuSceneBufferAllocation m_gpuSceneUniforms;
+	GpuSceneContiguousArrayIndex m_gpuSceneIndexParticleEmitter;
+	GpuSceneContiguousArrayIndex m_gpuSceneIndexRenderable;
+	Array<GpuSceneContiguousArrayIndex, U32(RenderingTechnique::kCount)> m_gpuSceneIndexAabbs;
+
+	Array<RenderStateBucketIndex, U32(RenderingTechnique::kCount)> m_renderStateBuckets;
 
 	Bool m_resourceUpdated = true;
 	SimulationType m_simulationType = SimulationType::kUndefined;
@@ -75,8 +78,8 @@ private:
 	Error update(SceneComponentUpdateInfo& info, Bool& updated);
 
 	template<typename TParticle>
-	void simulate(Second prevUpdateTime, Second crntTime, WeakArray<TParticle> particles, Vec3*& positions,
-				  F32*& scales, F32*& alphas, Aabb& aabbWorld);
+	void simulate(Second prevUpdateTime, Second crntTime, const Transform& worldTransform,
+				  WeakArray<TParticle> particles, Vec3*& positions, F32*& scales, F32*& alphas, Aabb& aabbWorld);
 };
 /// @}
 

+ 4 - 10
AnKi/Scene/Components/ReflectionProbeComponent.cpp

@@ -27,16 +27,13 @@ ReflectionProbeComponent::ReflectionProbeComponent(SceneNode* node)
 		m_frustums[i].update();
 	}
 
-	m_gpuSceneIndex = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().allocate(
-		GpuSceneContiguousArrayType::kReflectionProbes);
+	m_gpuSceneIndex =
+		AllGpuSceneContiguousArrays::getSingleton().allocate(GpuSceneContiguousArrayType::kReflectionProbes);
 }
 
 ReflectionProbeComponent::~ReflectionProbeComponent()
 {
 	m_spatial.removeFromOctree(SceneGraph::getSingleton().getOctree());
-
-	SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().deferredFree(
-		GpuSceneContiguousArrayType::kReflectionProbes, m_gpuSceneIndex);
 }
 
 Error ReflectionProbeComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
@@ -104,11 +101,8 @@ Error ReflectionProbeComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 		gpuProbe.m_cubeTexture = m_reflectionTexBindlessIndex;
 		gpuProbe.m_aabbMin = aabbWorld.getMin().xyz();
 		gpuProbe.m_aabbMax = aabbWorld.getMax().xyz();
-
-		const PtrSize offset = m_gpuSceneIndex * sizeof(GpuSceneReflectionProbe)
-							   + SceneGraph::getSingleton().getAllGpuSceneContiguousArrays().getArrayBase(
-								   GpuSceneContiguousArrayType::kReflectionProbes);
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, offset, sizeof(gpuProbe), &gpuProbe);
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneIndex.getOffsetInGpuScene(),
+													 gpuProbe);
 	}
 
 	// Update spatial and frustums

+ 2 - 3
AnKi/Scene/Components/ReflectionProbeComponent.h

@@ -52,8 +52,7 @@ public:
 		el.m_aabbMax = m_halfSize + m_worldPos;
 		ANKI_ASSERT(el.m_textureBindlessIndex != kMaxU32);
 		el.m_textureBindlessIndex = m_reflectionTexBindlessIndex;
-		ANKI_ASSERT(m_gpuSceneIndex != kMaxU32);
-		el.m_index = m_gpuSceneIndex;
+		el.m_index = m_gpuSceneIndex.get();
 	}
 
 	ANKI_INTERNAL void setupReflectionProbeQueueElementForRefresh(ReflectionProbeQueueElementForRefresh& el) const
@@ -77,7 +76,7 @@ private:
 	Vec3 m_worldPos = Vec3(kMaxF32);
 	Vec3 m_halfSize = Vec3(1.0f);
 
-	U32 m_gpuSceneIndex = kMaxU32;
+	GpuSceneContiguousArrayIndex m_gpuSceneIndex;
 
 	Spatial m_spatial;
 

+ 1 - 0
AnKi/Scene/Components/SceneComponent.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <AnKi/Scene/Common.h>
+#include <AnKi/Scene/ContiguousArrayAllocator.h>
 #include <AnKi/Util/Functions.h>
 #include <AnKi/Util/BitMask.h>
 

+ 4 - 5
AnKi/Scene/Components/SkinComponent.cpp

@@ -20,7 +20,6 @@ SkinComponent::SkinComponent(SceneNode* node)
 
 SkinComponent::~SkinComponent()
 {
-	GpuSceneMemoryPool::getSingleton().deferredFree(m_boneTransformsGpuSceneOffset);
 }
 
 void SkinComponent::loadSkeletonResource(CString fname)
@@ -41,7 +40,7 @@ void SkinComponent::loadSkeletonResource(CString fname)
 	m_boneTrfs[0].destroy();
 	m_boneTrfs[1].destroy();
 	m_animationTrfs.destroy();
-	GpuSceneMemoryPool::getSingleton().deferredFree(m_boneTransformsGpuSceneOffset);
+	GpuSceneBuffer::getSingleton().deferredFree(m_gpuSceneBoneTransforms);
 
 	// Create
 	const U32 boneCount = m_skeleton->getBones().getSize();
@@ -49,7 +48,7 @@ void SkinComponent::loadSkeletonResource(CString fname)
 	m_boneTrfs[1].resize(boneCount, Mat3x4::getIdentity());
 	m_animationTrfs.resize(boneCount, Trf{Vec3(0.0f), Quat::getIdentity(), 1.0f});
 
-	GpuSceneMemoryPool::getSingleton().allocate(sizeof(Mat4) * boneCount * 2, 4, m_boneTransformsGpuSceneOffset);
+	GpuSceneBuffer::getSingleton().allocate(sizeof(Mat4) * boneCount * 2, 4, m_gpuSceneBoneTransforms);
 }
 
 void SkinComponent::playAnimation(U32 track, AnimationResourcePtr anim, const AnimationPlayInfo& info)
@@ -198,8 +197,8 @@ Error SkinComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 			trfs[i * 2 + 0] = getBoneTransforms()[i];
 			trfs[i * 2 + 1] = getPreviousFrameBoneTransforms()[i];
 		}
-		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_boneTransformsGpuSceneOffset.m_offset,
-													 trfs.getSizeInBytes(), trfs.getBegin());
+		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneBoneTransforms, trfs.getSizeInBytes(),
+													 trfs.getBegin());
 	}
 	else
 	{

+ 2 - 2
AnKi/Scene/Components/SkinComponent.h

@@ -78,7 +78,7 @@ public:
 
 	U32 getBoneTransformsGpuSceneOffset() const
 	{
-		return U32(m_boneTransformsGpuSceneOffset.m_offset);
+		return m_gpuSceneBoneTransforms.getOffset();
 	}
 
 private:
@@ -112,7 +112,7 @@ private:
 
 	Bool m_forceFullUpdate = true;
 
-	SegregatedListsGpuMemoryPoolToken m_boneTransformsGpuSceneOffset;
+	GpuSceneBufferAllocation m_gpuSceneBoneTransforms;
 
 	Error update(SceneComponentUpdateInfo& info, Bool& updated);
 

+ 23 - 15
AnKi/Scene/ContiguousArrayAllocator.cpp

@@ -4,7 +4,6 @@
 // http://www.anki3d.org/LICENSE
 
 #include <AnKi/Scene/ContiguousArrayAllocator.h>
-#include <AnKi/Scene/SceneGraph.h>
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Gr/GrManager.h>
 
@@ -18,15 +17,15 @@ void AllGpuSceneContiguousArrays::ContiguousArrayAllocator::destroy()
 	}
 }
 
-AllGpuSceneContiguousArrays::Index AllGpuSceneContiguousArrays::ContiguousArrayAllocator::allocateObject()
+U32 AllGpuSceneContiguousArrays::ContiguousArrayAllocator::allocateObject()
 {
 	LockGuard lock(m_mtx);
 
-	if(m_poolToken.m_offset == kMaxPtrSize)
+	if(!m_allocation.isValid())
 	{
 		// Initialize
 		const U32 alignment = GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment;
-		GpuSceneMemoryPool::getSingleton().allocate(m_objectSize * m_initialArraySize, alignment, m_poolToken);
+		GpuSceneBuffer::getSingleton().allocate(m_objectSize * m_initialArraySize, alignment, m_allocation);
 		m_nextSlotIndex = 0;
 
 		m_freeSlotStack.resize(m_initialArraySize);
@@ -41,14 +40,14 @@ AllGpuSceneContiguousArrays::Index AllGpuSceneContiguousArrays::ContiguousArrayA
 		ANKI_ASSERT(!"TODO");
 	}
 
-	const Index idx = m_freeSlotStack[m_nextSlotIndex];
+	const U32 idx = m_freeSlotStack[m_nextSlotIndex];
 	++m_nextSlotIndex;
 
 	ANKI_ASSERT(idx < m_freeSlotStack.getSize());
 	return idx;
 }
 
-void AllGpuSceneContiguousArrays::ContiguousArrayAllocator::deferredFree(U32 crntFrameIdx, Index index)
+void AllGpuSceneContiguousArrays::ContiguousArrayAllocator::deferredFree(U32 crntFrameIdx, U32 index)
 {
 	LockGuard lock(m_mtx);
 
@@ -66,7 +65,7 @@ void AllGpuSceneContiguousArrays::ContiguousArrayAllocator::collectGarbage(U32 n
 	}
 
 	// Release deferred frees
-	for(Index idx : m_garbage[newFrameIdx])
+	for(U32 idx : m_garbage[newFrameIdx])
 	{
 		ANKI_ASSERT(m_nextSlotIndex > 0);
 		--m_nextSlotIndex;
@@ -89,12 +88,12 @@ void AllGpuSceneContiguousArrays::ContiguousArrayAllocator::collectGarbage(U32 n
 	else if(allocatedSlots == 0)
 	{
 		ANKI_ASSERT(m_nextSlotIndex == 0);
-		GpuSceneMemoryPool::getSingleton().deferredFree(m_poolToken);
+		GpuSceneBuffer::getSingleton().deferredFree(m_allocation);
 		m_freeSlotStack.destroy();
 	}
 }
 
-void AllGpuSceneContiguousArrays::init()
+AllGpuSceneContiguousArrays::AllGpuSceneContiguousArrays()
 {
 	const ConfigSet& cfg = ConfigSet::getSingleton();
 	constexpr F32 kGrowRate = 2.0;
@@ -110,6 +109,8 @@ void AllGpuSceneContiguousArrays::init()
 		cfg.getSceneMinGpuSceneDecals(),
 		cfg.getSceneMinGpuSceneFogDensityVolumes(),
 		cfg.getSceneMinGpuSceneRenderables(),
+		cfg.getSceneMinGpuSceneRenderables(),
+		cfg.getSceneMinGpuSceneRenderables(),
 		cfg.getSceneMinGpuSceneRenderables()};
 
 	for(GpuSceneContiguousArrayType type : EnumIterable<GpuSceneContiguousArrayType>())
@@ -121,7 +122,7 @@ void AllGpuSceneContiguousArrays::init()
 	}
 }
 
-void AllGpuSceneContiguousArrays::destroy()
+AllGpuSceneContiguousArrays::~AllGpuSceneContiguousArrays()
 {
 	for(GpuSceneContiguousArrayType type : EnumIterable<GpuSceneContiguousArrayType>())
 	{
@@ -129,15 +130,22 @@ void AllGpuSceneContiguousArrays::destroy()
 	}
 }
 
-AllGpuSceneContiguousArrays::Index AllGpuSceneContiguousArrays::allocate(GpuSceneContiguousArrayType type)
+GpuSceneContiguousArrayIndex AllGpuSceneContiguousArrays::allocate(GpuSceneContiguousArrayType type)
 {
-	const U32 idx = m_allocs[type].allocateObject();
-	return idx;
+	GpuSceneContiguousArrayIndex out;
+	out.m_index = m_allocs[type].allocateObject();
+	out.m_type = type;
+
+	return out;
 }
 
-void AllGpuSceneContiguousArrays::deferredFree(GpuSceneContiguousArrayType type, Index idx)
+void AllGpuSceneContiguousArrays::deferredFree(GpuSceneContiguousArrayIndex& idx)
 {
-	m_allocs[type].deferredFree(m_frame, idx);
+	if(idx.isValid())
+	{
+		m_allocs[idx.m_type].deferredFree(m_frame, idx.m_index);
+		idx.invalidate();
+	}
 }
 
 void AllGpuSceneContiguousArrays::endFrame()

+ 92 - 19
AnKi/Scene/ContiguousArrayAllocator.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <AnKi/Scene/Common.h>
+#include <AnKi/Core/GpuMemoryPools.h>
 
 namespace anki {
 
@@ -24,26 +25,77 @@ enum class GpuSceneContiguousArrayType : U8
 	kGlobalIlluminationProbes,
 	kDecals,
 	kFogDensityVolumes,
-	kRenderablesGBuffer,
+	kRenderables,
+
 	kRenderableBoundingVolumesGBuffer,
+	kRenderableBoundingVolumesForward,
+	kRenderableBoundingVolumesDepth,
 
 	kCount,
 	kFirst = 0
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(GpuSceneContiguousArrayType)
 
-/// Contains a number of contiguous array allocators for various GPU scene contiguous objects.
-class AllGpuSceneContiguousArrays
+class GpuSceneContiguousArrayIndex
 {
+	friend class AllGpuSceneContiguousArrays;
+
 public:
-	using Index = U32;
+	GpuSceneContiguousArrayIndex() = default;
+
+	GpuSceneContiguousArrayIndex(const GpuSceneContiguousArrayIndex& b) = delete;
+
+	GpuSceneContiguousArrayIndex(GpuSceneContiguousArrayIndex&& b)
+	{
+		*this = std::move(b);
+	}
+
+	~GpuSceneContiguousArrayIndex();
+
+	GpuSceneContiguousArrayIndex& operator=(const GpuSceneContiguousArrayIndex&) = delete;
+
+	GpuSceneContiguousArrayIndex& operator=(GpuSceneContiguousArrayIndex&& b)
+	{
+		ANKI_ASSERT(!isValid());
+		m_index = b.m_index;
+		m_type = b.m_type;
+		b.invalidate();
+		return *this;
+	}
+
+	U32 get() const
+	{
+		ANKI_ASSERT(m_index != kMaxU32);
+		return m_index;
+	}
 
-	void init();
+	Bool isValid() const
+	{
+		return m_index != kMaxU32;
+	}
 
-	void destroy();
+	U32 getOffsetInGpuScene() const;
 
+private:
+	U32 m_index = kMaxU32;
+	GpuSceneContiguousArrayType m_type = GpuSceneContiguousArrayType::kCount;
+
+	void invalidate()
+	{
+		m_index = kMaxU32;
+		m_type = GpuSceneContiguousArrayType::kCount;
+	}
+};
+
+/// Contains a number of contiguous array allocators for various GPU scene contiguous objects.
+class AllGpuSceneContiguousArrays : public MakeSingleton<AllGpuSceneContiguousArrays>
+{
+	template<typename>
+	friend class MakeSingleton;
+
+public:
 	/// @note Thread-safe against allocate(), deferredFree() and endFrame()
-	Index allocate(GpuSceneContiguousArrayType type);
+	GpuSceneContiguousArrayIndex allocate(GpuSceneContiguousArrayType type);
 
 	/// @note It's not thread-safe
 	PtrSize getArrayBase(GpuSceneContiguousArrayType type) const
@@ -57,13 +109,20 @@ public:
 		return m_allocs[type].getElementCount();
 	}
 
+	/// @note It's not thread-safe
+	U32 getElementOffsetInGpuScene(const GpuSceneContiguousArrayIndex& idx) const
+	{
+		ANKI_ASSERT(idx.isValid());
+		return U32(getArrayBase(idx.m_type) + m_componentCount[idx.m_type] * m_componentSize[idx.m_type] * idx.m_index);
+	}
+
 	constexpr static U32 getElementSize(GpuSceneContiguousArrayType type)
 	{
 		return m_componentSize[type] * m_componentCount[type];
 	}
 
 	/// @note Thread-safe against allocate(), deferredFree() and endFrame()
-	void deferredFree(GpuSceneContiguousArrayType type, Index idx);
+	void deferredFree(GpuSceneContiguousArrayIndex& idx);
 
 	/// @note Thread-safe against allocate(), deferredFree() and endFrame()
 	void endFrame();
@@ -79,11 +138,10 @@ private:
 		~ContiguousArrayAllocator()
 		{
 			ANKI_ASSERT(m_nextSlotIndex == 0 && "Forgot to deallocate");
-			for([[maybe_unused]] const SceneDynamicArray<Index>& arr : m_garbage)
+			for([[maybe_unused]] const SceneDynamicArray<U32>& arr : m_garbage)
 			{
 				ANKI_ASSERT(arr.getSize() == 0);
 			}
-			ANKI_ASSERT(m_poolToken.m_offset == kMaxPtrSize);
 		}
 
 		void init(U32 initialArraySize, U16 objectSize, F32 arrayGrowRate)
@@ -100,11 +158,11 @@ private:
 
 		/// Allocate a new object and return its index in the array.
 		/// @note It's thread-safe against itself, deferredFree and endFrame.
-		Index allocateObject();
+		U32 allocateObject();
 
 		/// Safely free an index allocated by allocateObject.
 		/// @note It's thread-safe against itself, allocateObject and endFrame.
-		void deferredFree(U32 crntFrameIdx, Index index);
+		void deferredFree(U32 crntFrameIdx, U32 index);
 
 		/// Call this every frame.
 		/// @note It's thread-safe against itself, deferredFree and allocateObject.
@@ -112,8 +170,7 @@ private:
 
 		PtrSize getArrayBase() const
 		{
-			ANKI_ASSERT(m_poolToken.isValid());
-			return m_poolToken.m_offset;
+			return m_allocation.getOffset();
 		}
 
 		U32 getElementCount() const
@@ -122,11 +179,11 @@ private:
 		}
 
 	private:
-		SegregatedListsGpuMemoryPoolToken m_poolToken;
+		GpuSceneBufferAllocation m_allocation;
 
-		SceneDynamicArray<Index> m_freeSlotStack;
+		SceneDynamicArray<U32> m_freeSlotStack;
 
-		Array<SceneDynamicArray<Index>, kMaxFramesInFlight> m_garbage;
+		Array<SceneDynamicArray<U32>, kMaxFramesInFlight> m_garbage;
 
 		mutable SpinLock m_mtx;
 
@@ -142,7 +199,7 @@ private:
 	U8 m_frame = 0;
 
 	static constexpr Array<U8, U32(GpuSceneContiguousArrayType::kCount)> m_componentCount = {
-		2, kMaxLodCount, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+		2, kMaxLodCount, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
 	static constexpr Array<U8, U32(GpuSceneContiguousArrayType::kCount)> m_componentSize = {
 		sizeof(Mat3x4),
 		sizeof(GpuSceneMeshLod),
@@ -154,8 +211,24 @@ private:
 		sizeof(GpuSceneDecal),
 		sizeof(GpuSceneFogDensityVolume),
 		sizeof(GpuSceneRenderable),
-		sizeof(GpuSceneRenderableBoundingVolume)};
+		sizeof(GpuSceneRenderable),
+		sizeof(GpuSceneRenderable),
+		sizeof(GpuSceneRenderableAabb)};
+
+	AllGpuSceneContiguousArrays();
+
+	~AllGpuSceneContiguousArrays();
 };
+
+inline GpuSceneContiguousArrayIndex::~GpuSceneContiguousArrayIndex()
+{
+	AllGpuSceneContiguousArrays::getSingleton().deferredFree(*this);
+}
+
+inline U32 GpuSceneContiguousArrayIndex::getOffsetInGpuScene() const
+{
+	return AllGpuSceneContiguousArrays::getSingleton().getElementOffsetInGpuScene(*this);
+}
 /// @}
 
 } // end namespace anki

+ 5 - 8
AnKi/Scene/RenderStateBucket.cpp

@@ -30,9 +30,7 @@ RenderStateBucketIndex RenderStateBucketContainer::addUser(const RenderStateInfo
 	SceneDynamicArray<ExtendedBucket>& buckets = m_buckets[technique];
 
 	RenderStateBucketIndex out;
-#if ANKI_ENABLE_ASSERTIONS
 	out.m_technique = technique;
-#endif
 
 	LockGuard lock(m_mtx);
 
@@ -70,9 +68,9 @@ RenderStateBucketIndex RenderStateBucketContainer::addUser(const RenderStateInfo
 	return out;
 }
 
-void RenderStateBucketContainer::removeUser(RenderingTechnique technique, RenderStateBucketIndex& bucketIndex)
+void RenderStateBucketContainer::removeUser(RenderStateBucketIndex& bucketIndex)
 {
-	if(bucketIndex.m_index == kMaxU32)
+	if(!bucketIndex.isValid())
 	{
 		return;
 	}
@@ -80,10 +78,9 @@ void RenderStateBucketContainer::removeUser(RenderingTechnique technique, Render
 	{
 		LockGuard lock(m_mtx);
 
-		ANKI_ASSERT(bucketIndex.m_index < m_buckets[technique].getSize());
-		ANKI_ASSERT(bucketIndex.m_technique == technique);
+		ANKI_ASSERT(bucketIndex.m_index < m_buckets[bucketIndex.m_technique].getSize());
 
-		ExtendedBucket& bucket = m_buckets[technique][bucketIndex.m_index];
+		ExtendedBucket& bucket = m_buckets[bucketIndex.m_technique][bucketIndex.m_index];
 		ANKI_ASSERT(bucket.m_userCount > 0 && bucket.m_program.isCreated());
 
 		--bucket.m_userCount;
@@ -95,7 +92,7 @@ void RenderStateBucketContainer::removeUser(RenderingTechnique technique, Render
 		}
 	}
 
-	bucketIndex = {};
+	bucketIndex.invalidate();
 }
 
 } // end namespace anki

+ 47 - 6
AnKi/Scene/RenderStateBucket.h

@@ -27,30 +27,62 @@ class RenderStateBucketIndex
 	friend class RenderStateBucketContainer;
 
 public:
+	RenderStateBucketIndex() = default;
+
+	RenderStateBucketIndex(const RenderStateBucketIndex&) = delete;
+
+	RenderStateBucketIndex(RenderStateBucketIndex&& b)
+	{
+		*this = std::move(b);
+	}
+
+	~RenderStateBucketIndex();
+
+	RenderStateBucketIndex& operator=(const RenderStateBucketIndex&) = delete;
+
+	RenderStateBucketIndex& operator=(RenderStateBucketIndex&& b)
+	{
+		ANKI_ASSERT(!isValid() && "Forgot to delete");
+		m_index = b.m_index;
+		m_technique = b.m_technique;
+		b.invalidate();
+		return *this;
+	}
+
 	U32 get() const
 	{
 		ANKI_ASSERT(m_index != kMaxU32);
 		return m_index;
 	}
 
+	Bool isValid() const
+	{
+		return m_index != kMaxU32;
+	}
+
 private:
 	U32 m_index = kMaxU32;
-#if ANKI_ENABLE_ASSERTIONS
 	RenderingTechnique m_technique = RenderingTechnique::kCount;
-#endif
+
+	void invalidate()
+	{
+		m_index = kMaxU32;
+		m_technique = RenderingTechnique::kCount;
+	}
 };
 
 /// Holds an array of all render state buckets.
-class RenderStateBucketContainer
+class RenderStateBucketContainer : public MakeSingleton<RenderStateBucketContainer>
 {
-public:
-	~RenderStateBucketContainer();
+	template<typename>
+	friend class MakeSingleton;
 
+public:
 	/// Add a new user for a specific render state and rendering technique.
 	RenderStateBucketIndex addUser(const RenderStateInfo& state, RenderingTechnique technique);
 
 	/// Remove the user.
-	void removeUser(RenderingTechnique technique, RenderStateBucketIndex& bucketIndex);
+	void removeUser(RenderStateBucketIndex& bucketIndex);
 
 	template<typename TFunc>
 	void interateBuckets(RenderingTechnique technique, TFunc func) const
@@ -74,7 +106,16 @@ private:
 
 	Array<SceneDynamicArray<ExtendedBucket>, U32(RenderingTechnique::kCount)> m_buckets;
 	Mutex m_mtx;
+
+	RenderStateBucketContainer() = default;
+
+	~RenderStateBucketContainer();
 };
+
+inline RenderStateBucketIndex::~RenderStateBucketIndex()
+{
+	RenderStateBucketContainer::getSingleton().removeUser(*this);
+}
 /// @}
 
 } // end namespace anki

+ 7 - 4
AnKi/Scene/SceneGraph.cpp

@@ -5,6 +5,7 @@
 
 #include <AnKi/Scene/SceneGraph.h>
 #include <AnKi/Scene/Octree.h>
+#include <AnKi/Scene/RenderStateBucket.h>
 #include <AnKi/Scene/Components/CameraComponent.h>
 #include <AnKi/Physics/PhysicsWorld.h>
 #include <AnKi/Resource/ResourceManager.h>
@@ -48,14 +49,15 @@ SceneGraph::~SceneGraph()
 		deleteInstance(SceneMemoryPool::getSingleton(), m_octree);
 	}
 
-	m_gpuSceneAllocators.destroy();
+	AllGpuSceneContiguousArrays::freeSingleton();
+	RenderStateBucketContainer::freeSingleton();
 }
 
 Error SceneGraph::init(AllocAlignedCallback allocCallback, void* allocCallbackData)
 {
 	SceneMemoryPool::allocateSingleton(allocCallback, allocCallbackData);
 
-	m_framePool.init(allocCallback, allocCallbackData, 1 * 1024 * 1024);
+	m_framePool.init(allocCallback, allocCallbackData, 1_MB, 2.0, 0, true, ANKI_SAFE_ALIGNMENT, "SceneGraphFramePool");
 
 	m_octree = newInstance<Octree>(SceneMemoryPool::getSingleton());
 	m_octree->init(m_sceneMin, m_sceneMax, ConfigSet::getSingleton().getSceneOctreeMaxDepth());
@@ -66,7 +68,8 @@ Error SceneGraph::init(AllocAlignedCallback allocCallback, void* allocCallbackDa
 	camc->setPerspective(0.1f, 1000.0f, toRad(60.0f), (1080.0f / 1920.0f) * toRad(60.0f));
 	m_mainCam = m_defaultMainCam;
 
-	m_gpuSceneAllocators.init();
+	AllGpuSceneContiguousArrays::allocateSingleton();
+	RenderStateBucketContainer::allocateSingleton();
 
 	return Error::kNone;
 }
@@ -160,7 +163,7 @@ Error SceneGraph::update(Second prevUpdateTime, Second crntTime)
 	ANKI_ASSERT(m_mainCam);
 	ANKI_TRACE_SCOPED_EVENT(SceneUpdate);
 
-	m_gpuSceneAllocators.endFrame();
+	AllGpuSceneContiguousArrays::getSingleton().endFrame();
 
 	m_stats.m_updateTime = HighRezTimer::getCurrentTime();
 

+ 0 - 16
AnKi/Scene/SceneGraph.h

@@ -7,8 +7,6 @@
 
 #include <AnKi/Scene/Common.h>
 #include <AnKi/Scene/SceneNode.h>
-#include <AnKi/Scene/ContiguousArrayAllocator.h>
-#include <AnKi/Scene/RenderStateBucket.h>
 #include <AnKi/Math.h>
 #include <AnKi/Util/HashMap.h>
 #include <AnKi/Scene/Events/EventManager.h>
@@ -154,16 +152,6 @@ public:
 		return *m_octree;
 	}
 
-	ANKI_INTERNAL AllGpuSceneContiguousArrays& getAllGpuSceneContiguousArrays()
-	{
-		return m_gpuSceneAllocators;
-	}
-
-	ANKI_INTERNAL RenderStateBucketContainer& getRenderStateBuckets()
-	{
-		return m_renderStateBuckets;
-	}
-
 private:
 	class UpdateSceneNodesCtx;
 
@@ -199,10 +187,6 @@ private:
 
 	SceneGraphStats m_stats;
 
-	AllGpuSceneContiguousArrays m_gpuSceneAllocators;
-
-	RenderStateBucketContainer m_renderStateBuckets;
-
 	SceneGraph();
 
 	~SceneGraph();

+ 1 - 1
AnKi/Scene/Visibility.cpp

@@ -763,7 +763,7 @@ void CombineResultsTask::combine()
 				  }
 			  });
 
-	const AllGpuSceneContiguousArrays& arrays = SceneGraph::getSingleton().getAllGpuSceneContiguousArrays();
+	const AllGpuSceneContiguousArrays& arrays = AllGpuSceneContiguousArrays::getSingleton();
 
 	auto setOffset = [&](ClusteredObjectType type, GpuSceneContiguousArrayType type2) {
 		results.m_clustererObjectsArrayOffsets[type] = arrays.getElementCount(type2) ? arrays.getArrayBase(type2) : 0;

+ 1 - 1
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -22,7 +22,7 @@ static_assert(sizeof(GpuSceneRenderable) == sizeof(Vec4) * 1);
 typedef UVec4 GpuSceneRenderablePacked;
 
 /// Used in visibility testing.
-struct GpuSceneRenderableBoundingVolume
+struct GpuSceneRenderableAabb
 {
 	Vec3 m_aabbMin;
 	U32 m_renderableOffset; ///< Points to a GpuSceneRenderable

+ 67 - 0
AnKi/Util/Enum.h

@@ -205,6 +205,73 @@ public:
 	TEnum m_begin;
 	TEnum m_end;
 };
+
+/// @memberof EnumBitsIterable
+template<typename TEnum, typename TBitEnum>
+class EnumBitsIterableIterator
+{
+public:
+	using Type = typename std::underlying_type<TBitEnum>::type;
+
+	constexpr EnumBitsIterableIterator(TBitEnum val)
+		: m_val(Type(val))
+	{
+	}
+
+	TEnum operator*() const
+	{
+		ANKI_ASSERT(m_val);
+		const TEnum out = TEnum(__builtin_ctzll(m_val));
+		ANKI_ASSERT(out >= TEnum::kFirst && out < TEnum::kCount);
+		return out;
+	}
+
+	void operator++()
+	{
+		ANKI_ASSERT(m_val);
+		m_val ^= Type(1_U64 << __builtin_ctzll(m_val));
+	}
+
+	bool operator!=(EnumBitsIterableIterator b) const
+	{
+		return m_val != b.m_val;
+	}
+
+private:
+	Type m_val;
+};
+
+/// Allow a mask to be used in a for range loop of a compatible enum.
+/// @code
+/// for(SomeEnum type : EnumIterableBits<SomeEnum, SomeCompatibleBitEnum>(bitmask))
+/// {
+/// 	...
+/// }
+/// @endcode
+template<typename TEnum, typename TBitEnum>
+class EnumBitsIterable
+{
+public:
+	using Iterator = EnumBitsIterableIterator<TEnum, TBitEnum>;
+
+	constexpr EnumBitsIterable(TBitEnum bits)
+		: m_bits(bits)
+	{
+	}
+
+	Iterator begin() const
+	{
+		return Iterator(m_bits);
+	}
+
+	Iterator end() const
+	{
+		return Iterator(TBitEnum(0));
+	}
+
+public:
+	TBitEnum m_bits;
+};
 /// @}
 
 } // end namespace anki

+ 2 - 3
AnKi/Util/MemoryPool.cpp

@@ -9,6 +9,7 @@
 #include <AnKi/Util/Thread.h>
 #include <AnKi/Util/Atomic.h>
 #include <AnKi/Util/Logger.h>
+#include <AnKi/Util/Tracer.h>
 #include <cstdlib>
 #include <cstring>
 #include <cstdio>
@@ -269,6 +270,7 @@ void StackMemoryPool::StackAllocatorBuilderInterface::freeChunk(Chunk* chunk)
 void StackMemoryPool::StackAllocatorBuilderInterface::recycleChunk([[maybe_unused]] Chunk& chunk)
 {
 	ANKI_ASSERT(chunk.m_chunkSize > 0);
+
 #if ANKI_MEM_EXTRA_CHECKS
 	invalidateMemory(&chunk.m_memoryStart[0], chunk.m_chunkSize);
 #endif
@@ -309,7 +311,6 @@ void* StackMemoryPool::allocate(PtrSize size, PtrSize alignment)
 		return nullptr;
 	}
 
-	m_allocationCount.fetchAdd(1);
 	const PtrSize address = ptrToNumber(&chunk->m_memoryStart[0]) + offset;
 	return numberToPtr<void*>(address);
 }
@@ -321,8 +322,6 @@ void StackMemoryPool::free(void* ptr)
 		return;
 	}
 
-	[[maybe_unused]] const U32 count = m_allocationCount.fetchSub(1);
-	ANKI_ASSERT(count > 0);
 	m_builder.free();
 }
 

+ 2 - 2
AnKi/Util/StackAllocatorBuilder.h

@@ -96,8 +96,8 @@ private:
 	/// The memory allocated by all chunks.
 	PtrSize m_memoryCapacity = 0;
 
-	/// Number of chunks allocated.
-	U32 m_chunkCount = 0;
+	/// Number of chunks currently in use.
+	U32 m_chunksInUse = 0;
 
 	/// The interface as decribed in the class docs.
 	TInterface m_interface;

+ 24 - 14
AnKi/Util/StackAllocatorBuilder.inl.h

@@ -34,7 +34,7 @@ void StackAllocatorBuilder<TChunk, TInterface, TLock>::destroy()
 	m_crntChunk.setNonAtomically(nullptr);
 	m_chunksListHead = nullptr;
 	m_memoryCapacity = 0;
-	m_chunkCount = 0;
+	m_chunksInUse = 0;
 }
 
 template<typename TChunk, typename TInterface, typename TLock>
@@ -90,7 +90,7 @@ Error StackAllocatorBuilder<TChunk, TInterface, TLock>::allocate(PtrSize size, [
 			// Compute the memory of the new chunk. Don't look at any previous chunk
 			PtrSize nextChunkSize = m_interface.getInitialChunkSize();
 			ANKI_ASSERT(nextChunkSize > 0);
-			for(U32 i = 0; i < m_chunkCount; ++i)
+			for(U32 i = 0; i < m_chunksInUse; ++i)
 			{
 				const F64 scale = m_interface.getNextChunkGrowScale();
 				ANKI_ASSERT(scale >= 1.0);
@@ -101,14 +101,28 @@ Error StackAllocatorBuilder<TChunk, TInterface, TLock>::allocate(PtrSize size, [
 			nextChunkSize = max(size, nextChunkSize); // Can't have the allocation fail
 			alignRoundUp(m_interface.getMaxAlignment(), nextChunkSize); // Align again
 
-			TChunk* nextChunk = (crntChunk) ? crntChunk->m_nextChunk : nullptr;
+			TChunk* nextChunk;
+			if(crntChunk)
+			{
+				nextChunk = crntChunk->m_nextChunk;
+			}
+			else if(crntChunk == nullptr && m_chunksListHead)
+			{
+				// This will happen after reset
+				nextChunk = m_chunksListHead;
+			}
+			else
+			{
+				nextChunk = nullptr;
+			}
 
 			if(nextChunk && nextChunk->m_chunkSize == nextChunkSize)
 			{
 				// Will recycle
 
-				crntChunk->m_nextChunk->m_offsetInChunk.store(0);
+				nextChunk->m_offsetInChunk.setNonAtomically(0);
 				m_interface.recycleChunk(*nextChunk);
+				++m_chunksInUse;
 				m_crntChunk.store(nextChunk);
 			}
 			else
@@ -121,16 +135,17 @@ Error StackAllocatorBuilder<TChunk, TInterface, TLock>::allocate(PtrSize size, [
 				newNextChunk->m_nextChunk = nullptr;
 				newNextChunk->m_offsetInChunk.setNonAtomically(0);
 				newNextChunk->m_chunkSize = nextChunkSize;
-				++m_chunkCount;
+				m_memoryCapacity += nextChunkSize;
 
 				// Remove the existing next chunk if there is one
 				TChunk* nextNextChunk = nullptr;
 				if(nextChunk)
 				{
+					m_memoryCapacity -= nextChunk->m_chunkSize;
+
 					nextNextChunk = nextChunk->m_nextChunk;
 					m_interface.freeChunk(nextChunk);
 					nextChunk = nullptr;
-					--m_chunkCount;
 				}
 
 				// Do list stuff
@@ -147,9 +162,8 @@ Error StackAllocatorBuilder<TChunk, TInterface, TLock>::allocate(PtrSize size, [
 
 				newNextChunk->m_nextChunk = nextNextChunk;
 
+				++m_chunksInUse;
 				m_crntChunk.store(newNextChunk);
-
-				m_memoryCapacity += nextChunkSize;
 			}
 		}
 	}
@@ -172,12 +186,8 @@ void StackAllocatorBuilder<TChunk, TInterface, TLock>::free()
 template<typename TChunk, typename TInterface, typename TLock>
 void StackAllocatorBuilder<TChunk, TInterface, TLock>::reset()
 {
-	m_crntChunk.setNonAtomically(m_chunksListHead);
-
-	if(m_chunksListHead)
-	{
-		m_chunksListHead->m_offsetInChunk.setNonAtomically(0);
-	}
+	m_crntChunk.setNonAtomically(nullptr);
+	m_chunksInUse = 0;
 
 	// Reset allocation count and do some error checks
 	Atomic<U32>* allocationCount = m_interface.getAllocationCount();

+ 2 - 0
AnKi/Util/Tracer.h

@@ -10,6 +10,7 @@
 #include <AnKi/Util/DynamicArray.h>
 #include <AnKi/Util/Singleton.h>
 #include <AnKi/Util/String.h>
+#include <AnKi/Util/Logger.h>
 
 namespace anki {
 
@@ -95,6 +96,7 @@ public:
 
 	void setEnabled(Bool enabled)
 	{
+		ANKI_UTIL_LOGV("Tracing %s", (enabled) ? "enabled" : "disable");
 		m_enabled = enabled;
 	}
 

+ 5 - 5
Samples/Common/SampleApp.cpp

@@ -169,6 +169,11 @@ Error SampleApp::userMainLoop(Bool& quit, Second elapsedTime)
 		}
 	}
 
+	if(in.getKey(KeyCode::kF12) == 1 && ANKI_ENABLE_TRACE)
+	{
+		Tracer::getSingleton().setEnabled(!Tracer::getSingleton().getEnabled());
+	}
+
 	if(in.getMouseButton(MouseButton::kRight) || in.hasTouchDevice())
 	{
 		in.hideCursor(true);
@@ -244,11 +249,6 @@ Error SampleApp::userMainLoop(Bool& quit, Second elapsedTime)
 			mover->moveLocalZ(moveDistance);
 		}
 
-		if(in.getKey(KeyCode::kF12) == 1 && ANKI_ENABLE_TRACE)
-		{
-			Tracer::getSingleton().setEnabled(!Tracer::getSingleton().getEnabled());
-		}
-
 		const Vec2 velocity = in.getMousePosition() - mousePosOn1stClick;
 		in.moveCursor(mousePosOn1stClick);
 		if(velocity != Vec2(0.0))

+ 2 - 1
Samples/SkeletalAnimation/Main.cpp

@@ -57,7 +57,8 @@ public:
 	}
 };
 
-int main(int argc, char* argv[])
+ANKI_MAIN_FUNCTION(myMain)
+int myMain(int argc, char* argv[])
 {
 	Error err = Error::kNone;
 

+ 10 - 10
Tests/Util/Memory.cpp

@@ -42,7 +42,7 @@ ANKI_TEST(Util, StackMemoryPool)
 
 	// Allocate
 	{
-		StackMemoryPool pool(allocAligned, nullptr, 100, 1.0, 0, true);
+		StackMemoryPool pool(allocAligned, nullptr, 100, 2.0, 0, true);
 
 		void* a = pool.allocate(25, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
@@ -52,14 +52,14 @@ ANKI_TEST(Util, StackMemoryPool)
 		ANKI_TEST_EXPECT_EQ(pool.getAllocationCount(), 0);
 
 		// Allocate a few
-		const U SIZE = 75;
-		a = pool.allocate(SIZE, 1);
+		constexpr U kSize = 75;
+		a = pool.allocate(kSize, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
-		a = pool.allocate(SIZE, 1);
+		a = pool.allocate(kSize, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
-		a = pool.allocate(SIZE, 1);
+		a = pool.allocate(kSize, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
-		a = pool.allocate(SIZE, 1);
+		a = pool.allocate(kSize, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
 		ANKI_TEST_EXPECT_EQ(pool.getAllocationCount(), 4);
 
@@ -68,13 +68,13 @@ ANKI_TEST(Util, StackMemoryPool)
 		ANKI_TEST_EXPECT_EQ(pool.getAllocationCount(), 0);
 
 		// Allocate again
-		a = pool.allocate(SIZE, 1);
+		a = pool.allocate(kSize, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
-		a = pool.allocate(SIZE, 1);
+		a = pool.allocate(kSize, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
-		a = pool.allocate(SIZE, 1);
+		a = pool.allocate(kSize, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
-		a = pool.allocate(SIZE, 1);
+		a = pool.allocate(kSize, 1);
 		ANKI_TEST_EXPECT_NEQ(a, nullptr);
 		ANKI_TEST_EXPECT_EQ(pool.getAllocationCount(), 4);
 	}