Browse Source

Bring some GPU scene structures together

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
2e2afd402f

+ 2 - 1
AnKi/Core/App.cpp

@@ -262,7 +262,8 @@ Error App::initInternal(AllocAlignedCallback allocCb, void* allocCbUserData)
 	//
 	// ThreadPool
 	//
-	m_threadHive = newInstance<ThreadHive>(m_mainPool, m_config->getCoreJobThreadCount(), &m_mainPool, true);
+	const Bool pinThreads = !ANKI_OS_ANDROID;
+	m_threadHive = newInstance<ThreadHive>(m_mainPool, m_config->getCoreJobThreadCount(), &m_mainPool, pinThreads);
 
 	//
 	// Graphics API

+ 1 - 1
AnKi/Core/GpuMemoryPools.cpp

@@ -33,7 +33,7 @@ void UnifiedGeometryMemoryPool::init(HeapMemoryPool* pool, GrManager* gr, const
 	// Allocate something dummy to force creating the GPU buffer
 	SegregatedListsGpuMemoryPoolToken token;
 	allocate(16, 4, token);
-	free(token);
+	deferredFree(token);
 }
 
 void GpuSceneMemoryPool::init(HeapMemoryPool* pool, GrManager* gr, const ConfigSet& cfg)

+ 9 - 4
AnKi/Core/GpuMemoryPools.h

@@ -36,9 +36,9 @@ public:
 		m_pool.allocate(size, alignment, token);
 	}
 
-	void free(SegregatedListsGpuMemoryPoolToken& token)
+	void deferredFree(SegregatedListsGpuMemoryPoolToken& token)
 	{
-		m_pool.free(token);
+		m_pool.deferredFree(token);
 	}
 
 	void endFrame()
@@ -77,9 +77,9 @@ public:
 		m_pool.allocate(size, alignment, token);
 	}
 
-	void free(SegregatedListsGpuMemoryPoolToken& token)
+	void deferredFree(SegregatedListsGpuMemoryPoolToken& token)
 	{
-		m_pool.free(token);
+		m_pool.deferredFree(token);
 	}
 
 	void endFrame()
@@ -97,6 +97,11 @@ public:
 		m_pool.getStats(externalFragmentation, userAllocatedSize, totalSize);
 	}
 
+	GrManager& getGrManager()
+	{
+		return m_pool.getGrManager();
+	}
+
 private:
 	SegregatedListsGpuMemoryPool m_pool;
 };

+ 1 - 1
AnKi/Gr/Utils/SegregatedListsGpuMemoryPool.cpp

@@ -222,7 +222,7 @@ void SegregatedListsGpuMemoryPool::allocate(PtrSize size, U32 alignment, Segrega
 	m_allocatedSize += size;
 }
 
-void SegregatedListsGpuMemoryPool::free(SegregatedListsGpuMemoryPoolToken& token)
+void SegregatedListsGpuMemoryPool::deferredFree(SegregatedListsGpuMemoryPoolToken& token)
 {
 	ANKI_ASSERT(isInitialized());
 

+ 8 - 2
AnKi/Gr/Utils/SegregatedListsGpuMemoryPool.h

@@ -59,9 +59,9 @@ public:
 	/// @note It's thread-safe.
 	void allocate(PtrSize size, U32 alignment, SegregatedListsGpuMemoryPoolToken& token);
 
-	/// Free memory.
+	/// Free memory a few frames down the line.
 	/// @note It's thread-safe.
-	void free(SegregatedListsGpuMemoryPoolToken& token);
+	void deferredFree(SegregatedListsGpuMemoryPoolToken& token);
 
 	/// @note It's thread-safe.
 	void endFrame();
@@ -77,6 +77,12 @@ public:
 	/// @note It's thread-safe.
 	void getStats(F32& externalFragmentation, PtrSize& userAllocatedSize, PtrSize& totalSize) const;
 
+	GrManager& getGrManager()
+	{
+		ANKI_ASSERT(m_gr);
+		return *m_gr;
+	}
+
 private:
 	class BuilderInterface;
 	class Chunk;

+ 3 - 2
AnKi/Resource/MeshResource.cpp

@@ -59,11 +59,12 @@ MeshResource::~MeshResource()
 
 	for(Lod& lod : m_lods)
 	{
-		getExternalSubsystems().m_unifiedGometryMemoryPool->free(lod.m_indexBufferAllocationToken);
+		getExternalSubsystems().m_unifiedGometryMemoryPool->deferredFree(lod.m_indexBufferAllocationToken);
 
 		for(VertexStreamId stream : EnumIterable(VertexStreamId::kMeshRelatedFirst, VertexStreamId::kMeshRelatedCount))
 		{
-			getExternalSubsystems().m_unifiedGometryMemoryPool->free(lod.m_vertexBuffersAllocationToken[stream]);
+			getExternalSubsystems().m_unifiedGometryMemoryPool->deferredFree(
+				lod.m_vertexBuffersAllocationToken[stream]);
 		}
 	}
 

+ 43 - 21
AnKi/Scene/Components/ModelComponent.cpp

@@ -18,15 +18,26 @@ ModelComponent::ModelComponent(SceneNode* node)
 	, m_node(node)
 	, m_spatial(this)
 {
-	getExternalSubsystems(*node).m_gpuSceneMemoryPool->allocate(sizeof(Mat3x4) * 2, alignof(F32), m_gpuSceneTransforms);
+	m_gpuSceneTransformsOffset = U32(
+		node->getSceneGraph().getAllGpuSceneContiguousArrays().allocate(GpuSceneContiguousArrayType::kTransformPairs));
 }
 
 ModelComponent::~ModelComponent()
 {
 	GpuSceneMemoryPool& gpuScene = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
-	gpuScene.free(m_gpuSceneMeshLods);
-	gpuScene.free(m_gpuSceneUniforms);
-	gpuScene.free(m_gpuSceneTransforms);
+	gpuScene.deferredFree(m_gpuSceneUniforms);
+
+	for(const PatchInfo& patch : m_patchInfos)
+	{
+		if(patch.m_gpuSceneMeshLodsOffset != kMaxU32)
+		{
+			m_node->getSceneGraph().getAllGpuSceneContiguousArrays().deferredFree(
+				GpuSceneContiguousArrayType::kMeshLods, patch.m_gpuSceneMeshLodsOffset);
+		}
+	}
+
+	m_node->getSceneGraph().getAllGpuSceneContiguousArrays().deferredFree(GpuSceneContiguousArrayType::kTransformPairs,
+																		  m_gpuSceneTransformsOffset);
 
 	m_patchInfos.destroy(m_node->getMemoryPool());
 
@@ -51,12 +62,24 @@ void ModelComponent::loadModelResource(CString filename)
 	// GPU scene allocations
 	GpuSceneMemoryPool& gpuScene = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
 
-	gpuScene.free(m_gpuSceneMeshLods);
-	gpuScene.allocate(sizeof(GpuSceneMeshLod) * kMaxLodCount * modelPatchCount, 4, m_gpuSceneMeshLods);
+	for(const PatchInfo& patch : m_patchInfos)
+	{
+		if(patch.m_gpuSceneMeshLodsOffset != kMaxU32)
+		{
+			m_node->getSceneGraph().getAllGpuSceneContiguousArrays().deferredFree(
+				GpuSceneContiguousArrayType::kMeshLods, patch.m_gpuSceneMeshLodsOffset);
+		}
+	}
 
-	U32 uniformsSize = 0;
 	m_patchInfos.resize(m_node->getMemoryPool(), modelPatchCount);
 	for(U32 i = 0; i < modelPatchCount; ++i)
+	{
+		m_patchInfos[i].m_gpuSceneMeshLodsOffset = U32(
+			m_node->getSceneGraph().getAllGpuSceneContiguousArrays().allocate(GpuSceneContiguousArrayType::kMeshLods));
+	}
+
+	U32 uniformsSize = 0;
+	for(U32 i = 0; i < modelPatchCount; ++i)
 	{
 		m_patchInfos[i].m_gpuSceneUniformsOffset = uniformsSize;
 
@@ -65,7 +88,7 @@ void ModelComponent::loadModelResource(CString filename)
 		uniformsSize += size;
 	}
 
-	gpuScene.free(m_gpuSceneUniforms);
+	gpuScene.deferredFree(m_gpuSceneUniforms);
 	gpuScene.allocate(uniformsSize, 4, m_gpuSceneUniforms);
 
 	for(U32 i = 0; i < modelPatchCount; ++i)
@@ -110,15 +133,16 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 		// Upload the mesh views
 		const U32 modelPatchCount = m_model->getModelPatches().getSize();
-		DynamicArrayRaii<GpuSceneMeshLod> meshLods(info.m_framePool, modelPatchCount * kMaxLodCount);
 		for(U32 i = 0; i < modelPatchCount; ++i)
 		{
 			const ModelPatch& patch = m_model->getModelPatches()[i];
 			const MeshResource& mesh = *patch.getMesh();
 
+			Array<GpuSceneMeshLod, kMaxLodCount> meshLods;
+
 			for(U32 l = 0; l < mesh.getLodCount(); ++l)
 			{
-				GpuSceneMeshLod& meshLod = meshLods[i * kMaxLodCount + l];
+				GpuSceneMeshLod& meshLod = meshLods[l];
 				meshLod = {};
 				meshLod.m_positionScale = mesh.getPositionsScale();
 				meshLod.m_positionTranslation = mesh.getPositionsTranslation();
@@ -154,12 +178,12 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 			// Copy the last LOD to the rest just in case
 			for(U32 l = mesh.getLodCount(); l < kMaxLodCount; ++l)
 			{
-				meshLods[i * kMaxLodCount + l] = meshLods[i * kMaxLodCount + (l - 1)];
+				meshLods[l] = meshLods[l - 1];
 			}
-		}
 
-		gpuScenePatcher.newCopy(*info.m_framePool, m_gpuSceneMeshLods.m_offset, meshLods.getSizeInBytes(),
-								&meshLods[0]);
+			gpuScenePatcher.newCopy(*info.m_framePool, m_patchInfos[i].m_gpuSceneMeshLodsOffset,
+									meshLods.getSizeInBytes(), &meshLods[0]);
+		}
 
 		// Upload the uniforms
 		DynamicArrayRaii<U32> allUniforms(info.m_framePool, U32(m_gpuSceneUniforms.m_size / 4));
@@ -187,7 +211,7 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		trfs[1] = Mat3x4(info.m_node->getPreviousWorldTransform());
 
 		getExternalSubsystems(*info.m_node)
-			.m_gpuSceneMicroPatcher->newCopy(*info.m_framePool, m_gpuSceneTransforms.m_offset, sizeof(trfs), &trfs[0]);
+			.m_gpuSceneMicroPatcher->newCopy(*info.m_framePool, m_gpuSceneTransformsOffset, sizeof(trfs), &trfs[0]);
 	}
 
 	// Spatial update
@@ -271,10 +295,9 @@ void ModelComponent::setupRenderableQueueElements(U32 lod, RenderingTechnique te
 		patch.getRenderingInfo(key, modelInf);
 
 		queueElem.m_program = modelInf.m_program.get();
-		queueElem.m_worldTransformsOffset = U32(m_gpuSceneTransforms.m_offset);
+		queueElem.m_worldTransformsOffset = m_gpuSceneTransformsOffset;
 		queueElem.m_uniformsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
-		queueElem.m_geometryOffset =
-			U32(m_gpuSceneMeshLods.m_offset + sizeof(GpuSceneMeshLod) * (kMaxLodCount * i + lod));
+		queueElem.m_geometryOffset = m_patchInfos[i].m_gpuSceneMeshLodsOffset + lod * sizeof(GpuSceneMeshLod);
 		queueElem.m_boneTransformsOffset = (hasSkin) ? m_skinComponent->getBoneTransformsGpuSceneOffset() : 0;
 		queueElem.m_indexCount = modelInf.m_indexCount;
 		queueElem.m_firstIndex = U32(modelInf.m_indexBufferOffset / 2 + modelInf.m_firstIndex);
@@ -342,10 +365,9 @@ void ModelComponent::setupRayTracingInstanceQueueElements(U32 lod, RenderingTech
 
 		queueElem.m_bottomLevelAccelerationStructure = modelInf.m_bottomLevelAccelerationStructure.get();
 		queueElem.m_shaderGroupHandleIndex = modelInf.m_shaderGroupHandleIndex;
-		queueElem.m_worldTransformsOffset = U32(m_gpuSceneTransforms.m_offset);
+		queueElem.m_worldTransformsOffset = m_gpuSceneTransformsOffset;
 		queueElem.m_uniformsOffset = m_patchInfos[i].m_gpuSceneUniformsOffset;
-		queueElem.m_geometryOffset =
-			U32(m_gpuSceneMeshLods.m_offset + sizeof(GpuSceneMeshLod) * (kMaxLodCount * i + lod));
+		queueElem.m_geometryOffset = m_patchInfos[i].m_gpuSceneMeshLodsOffset + lod * sizeof(GpuSceneMeshLod);
 		queueElem.m_indexBufferOffset = U32(modelInf.m_indexBufferOffset);
 
 		const Transform positionTransform(patch.getMesh()->getPositionsTranslation().xyz0(), Mat3x4::getIdentity(),

+ 3 - 3
AnKi/Scene/Components/ModelComponent.h

@@ -53,7 +53,8 @@ private:
 	class PatchInfo
 	{
 	public:
-		U32 m_gpuSceneUniformsOffset;
+		U32 m_gpuSceneUniformsOffset = kMaxU32;
+		U32 m_gpuSceneMeshLodsOffset = kMaxU32;
 		RenderingTechniqueBit m_techniques;
 	};
 
@@ -63,9 +64,8 @@ private:
 
 	ModelResourcePtr m_model;
 
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneMeshLods;
 	SegregatedListsGpuMemoryPoolToken m_gpuSceneUniforms;
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneTransforms;
+	U32 m_gpuSceneTransformsOffset = kMaxU32;
 	DynamicArray<PatchInfo> m_patchInfos;
 
 	Bool m_dirty : 1 = true;

+ 27 - 16
AnKi/Scene/Components/ParticleEmitterComponent.cpp

@@ -198,7 +198,6 @@ ParticleEmitterComponent::ParticleEmitterComponent(SceneNode* node)
 	, m_node(node)
 	, m_spatial(this)
 {
-	getExternalSubsystems(*node).m_gpuSceneMemoryPool->allocate(sizeof(Mat3x4), alignof(F32), m_gpuSceneTransform);
 }
 
 ParticleEmitterComponent::~ParticleEmitterComponent()
@@ -207,12 +206,16 @@ ParticleEmitterComponent::~ParticleEmitterComponent()
 	m_physicsParticles.destroy(m_node->getMemoryPool());
 
 	GpuSceneMemoryPool& gpuScenePool = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
-	gpuScenePool.free(m_gpuScenePositions);
-	gpuScenePool.free(m_gpuSceneScales);
-	gpuScenePool.free(m_gpuSceneAlphas);
-	gpuScenePool.free(m_gpuSceneParticles);
-	gpuScenePool.free(m_gpuSceneUniforms);
-	gpuScenePool.free(m_gpuSceneTransform);
+	gpuScenePool.deferredFree(m_gpuScenePositions);
+	gpuScenePool.deferredFree(m_gpuSceneScales);
+	gpuScenePool.deferredFree(m_gpuSceneAlphas);
+	gpuScenePool.deferredFree(m_gpuSceneUniforms);
+
+	if(m_gpuSceneParticleEmitterOffset != kMaxU32)
+	{
+		m_node->getSceneGraph().getAllGpuSceneContiguousArrays().deferredFree(
+			GpuSceneContiguousArrayType::kParticleEmitters, m_gpuSceneParticleEmitterOffset);
+	}
 
 	m_spatial.removeFromOctree(m_node->getSceneGraph().getOctree());
 }
@@ -237,11 +240,16 @@ void ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 	m_simpleParticles.destroy(m_node->getMemoryPool());
 	m_physicsParticles.destroy(m_node->getMemoryPool());
 	GpuSceneMemoryPool& gpuScenePool = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
-	gpuScenePool.free(m_gpuScenePositions);
-	gpuScenePool.free(m_gpuSceneScales);
-	gpuScenePool.free(m_gpuSceneAlphas);
-	gpuScenePool.free(m_gpuSceneParticles);
-	gpuScenePool.free(m_gpuSceneUniforms);
+	gpuScenePool.deferredFree(m_gpuScenePositions);
+	gpuScenePool.deferredFree(m_gpuSceneScales);
+	gpuScenePool.deferredFree(m_gpuSceneAlphas);
+	gpuScenePool.deferredFree(m_gpuSceneUniforms);
+
+	if(m_gpuSceneParticleEmitterOffset != kMaxU32)
+	{
+		m_node->getSceneGraph().getAllGpuSceneContiguousArrays().deferredFree(
+			GpuSceneContiguousArrayType::kParticleEmitters, m_gpuSceneParticleEmitterOffset);
+	}
 
 	// Init particles
 	m_simulationType = (m_props.m_usePhysicsEngine) ? SimulationType::kPhysicsEngine : SimulationType::kSimple;
@@ -270,9 +278,11 @@ void ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 	gpuScenePool.allocate(sizeof(Vec3) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuScenePositions);
 	gpuScenePool.allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneAlphas);
 	gpuScenePool.allocate(sizeof(F32) * m_props.m_maxNumOfParticles, alignof(F32), m_gpuSceneScales);
-	gpuScenePool.allocate(sizeof(GpuSceneParticles), alignof(U32), m_gpuSceneParticles);
 	gpuScenePool.allocate(m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(),
 						  alignof(U32), m_gpuSceneUniforms);
+
+	m_gpuSceneParticleEmitterOffset = U32(m_node->getSceneGraph().getAllGpuSceneContiguousArrays().allocate(
+		GpuSceneContiguousArrayType::kParticleEmitters));
 }
 
 Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
@@ -316,12 +326,13 @@ Error ParticleEmitterComponent::update(SceneComponentUpdateInfo& info, Bool& upd
 
 	if(m_resourceUpdated)
 	{
-		GpuSceneParticles particles = {};
+		GpuSceneParticleEmitter particles = {};
 		particles.m_vertexOffsets[U32(VertexStreamId::kParticlePosition)] = U32(m_gpuScenePositions.m_offset);
 		particles.m_vertexOffsets[U32(VertexStreamId::kParticleColor)] = U32(m_gpuSceneAlphas.m_offset);
 		particles.m_vertexOffsets[U32(VertexStreamId::kParticleScale)] = U32(m_gpuSceneScales.m_offset);
 
-		patcher.newCopy(*info.m_framePool, m_gpuSceneParticles.m_offset, sizeof(GpuSceneParticles), &particles);
+		patcher.newCopy(*info.m_framePool, m_gpuSceneParticleEmitterOffset, sizeof(GpuSceneParticleEmitter),
+						&particles);
 
 		patcher.newCopy(*info.m_framePool, m_gpuSceneUniforms.m_offset,
 						m_particleEmitterResource->getMaterial()->getPrefilledLocalUniforms().getSizeInBytes(),
@@ -458,7 +469,7 @@ void ParticleEmitterComponent::setupRenderableQueueElements(RenderingTechnique t
 	el->m_program = prog.get();
 	el->m_worldTransformsOffset = 0;
 	el->m_uniformsOffset = U32(m_gpuSceneUniforms.m_offset);
-	el->m_geometryOffset = U32(m_gpuSceneParticles.m_offset);
+	el->m_geometryOffset = m_gpuSceneParticleEmitterOffset;
 	el->m_boneTransformsOffset = 0;
 	el->m_vertexCount = 6 * m_aliveParticleCount;
 	el->m_firstVertex = 0;

+ 1 - 2
AnKi/Scene/Components/ParticleEmitterComponent.h

@@ -66,9 +66,8 @@ private:
 	SegregatedListsGpuMemoryPoolToken m_gpuScenePositions;
 	SegregatedListsGpuMemoryPoolToken m_gpuSceneAlphas;
 	SegregatedListsGpuMemoryPoolToken m_gpuSceneScales;
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneParticles;
 	SegregatedListsGpuMemoryPoolToken m_gpuSceneUniforms;
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneTransform;
+	U32 m_gpuSceneParticleEmitterOffset = kMaxU32;
 
 	Bool m_resourceUpdated = true;
 	SimulationType m_simulationType = SimulationType::kUndefined;

+ 2 - 2
AnKi/Scene/Components/SkinComponent.cpp

@@ -25,7 +25,7 @@ SkinComponent::~SkinComponent()
 	m_boneTrfs[1].destroy(m_node->getMemoryPool());
 	m_animationTrfs.destroy(m_node->getMemoryPool());
 
-	getExternalSubsystems(*m_node).m_gpuSceneMemoryPool->free(m_boneTransformsGpuSceneOffset);
+	getExternalSubsystems(*m_node).m_gpuSceneMemoryPool->deferredFree(m_boneTransformsGpuSceneOffset);
 }
 
 void SkinComponent::loadSkeletonResource(CString fname)
@@ -46,7 +46,7 @@ void SkinComponent::loadSkeletonResource(CString fname)
 	m_boneTrfs[0].destroy(m_node->getMemoryPool());
 	m_boneTrfs[1].destroy(m_node->getMemoryPool());
 	m_animationTrfs.destroy(m_node->getMemoryPool());
-	getExternalSubsystems(*m_node).m_gpuSceneMemoryPool->free(m_boneTransformsGpuSceneOffset);
+	getExternalSubsystems(*m_node).m_gpuSceneMemoryPool->deferredFree(m_boneTransformsGpuSceneOffset);
 
 	// Create
 	const U32 boneCount = m_skeleton->getBones().getSize();

+ 6 - 0
AnKi/Scene/ConfigVars.defs.h

@@ -26,3 +26,9 @@ ANKI_CONFIG_VAR_F32(SceneProbeShadowEffectiveDistance, 32.0f, 1.0f, kMaxF32,
 ANKI_CONFIG_VAR_BOOL(SceneRayTracedShadows, true, "Enable or not ray traced shadows. Ignored if RT is not supported")
 ANKI_CONFIG_VAR_F32(SceneRayTracingExtendedFrustumDistance, 100.0f, 10.0f, 10000.0f,
 					"Every object that its distance from the camera is bellow that value will take part in ray tracing")
+
+ANKI_CONFIG_VAR_U32(SceneMinGpuSceneTransforms, 8 * 1024, 8, 100 * 1024,
+					"The min number of transforms stored in the GPU scene")
+ANKI_CONFIG_VAR_U32(SceneMinGpuSceneMeshes, 8 * 1024, 8, 100 * 1024, "The min number of meshes stored in the GPU scene")
+ANKI_CONFIG_VAR_U32(SceneMinGpuSceneParticleEmitters, 1 * 1024, 8, 100 * 1024,
+					"The min number of particle emitters stored in the GPU scene")

+ 173 - 0
AnKi/Scene/ContiguousArrayAllocator.cpp

@@ -0,0 +1,173 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Scene/ContiguousArrayAllocator.h>
+#include <AnKi/Scene/SceneGraph.h>
+#include <AnKi/Core/ConfigSet.h>
+#include <AnKi/Gr/GrManager.h>
+
+namespace anki {
+
+void AllGpuSceneContiguousArrays::ContiguousArrayAllocator::destroy(GpuSceneMemoryPool* gpuScene,
+																	HeapMemoryPool* cpuPool)
+{
+	for(U32 i = 0; i < kMaxFramesInFlight; ++i)
+	{
+		collectGarbage(i, gpuScene, cpuPool);
+	}
+}
+
+AllGpuSceneContiguousArrays::ContiguousArrayAllocator::Index
+AllGpuSceneContiguousArrays::ContiguousArrayAllocator::allocateObject(GpuSceneMemoryPool* gpuScene,
+																	  HeapMemoryPool* cpuPool)
+{
+	ANKI_ASSERT(gpuScene && cpuPool);
+
+	LockGuard lock(m_mtx);
+
+	if(m_poolToken.m_offset == kMaxPtrSize)
+	{
+		// Initialize
+		const U32 alignment = sizeof(U32);
+		gpuScene->allocate(m_objectSize * m_initialArraySize, alignment, m_poolToken);
+		m_nextSlotIndex = 0;
+
+		m_freeSlotStack.create(*cpuPool, m_initialArraySize);
+		for(U32 i = 0; i < m_initialArraySize; ++i)
+		{
+			m_freeSlotStack[i] = i;
+		}
+	}
+	else if(m_nextSlotIndex == m_freeSlotStack.getSize())
+	{
+		// Grow
+		ANKI_ASSERT(!"TODO");
+	}
+
+	const Index idx = m_freeSlotStack[m_nextSlotIndex];
+	++m_nextSlotIndex;
+
+	ANKI_ASSERT(idx < m_freeSlotStack.getSize());
+	return idx;
+}
+
+void AllGpuSceneContiguousArrays::ContiguousArrayAllocator::deferredFree(U32 crntFrameIdx, HeapMemoryPool* cpuPool,
+																		 Index index)
+{
+	ANKI_ASSERT(cpuPool);
+
+	LockGuard lock(m_mtx);
+
+	ANKI_ASSERT(index < m_freeSlotStack.getSize());
+	m_garbage[crntFrameIdx].emplaceBack(*cpuPool, index);
+}
+
+void AllGpuSceneContiguousArrays::ContiguousArrayAllocator::collectGarbage(U32 newFrameIdx,
+																		   GpuSceneMemoryPool* gpuScene,
+																		   HeapMemoryPool* cpuPool)
+{
+	ANKI_ASSERT(gpuScene && cpuPool);
+
+	LockGuard lock(m_mtx);
+
+	if(m_garbage[newFrameIdx].getSize() == 0) [[likely]]
+	{
+		return;
+	}
+
+	// Release deferred frees
+	for(Index idx : m_garbage[newFrameIdx])
+	{
+		ANKI_ASSERT(m_nextSlotIndex > 0);
+		--m_nextSlotIndex;
+		m_freeSlotStack[m_nextSlotIndex] = idx;
+	}
+
+	m_garbage[newFrameIdx].destroy(*cpuPool);
+
+	// Sort so we can keep memory close to the beginning of the array for better cache behaviour
+	std::sort(m_freeSlotStack.getBegin() + m_nextSlotIndex, m_freeSlotStack.getEnd());
+
+	// Adjust the stack size
+	const U32 allocatedSlots = m_nextSlotIndex;
+	if(U32(F32(allocatedSlots) * m_growRate) < m_freeSlotStack.getSize()
+	   && m_freeSlotStack.getSize() > m_initialArraySize)
+	{
+		// Shrink
+		ANKI_ASSERT(!"TODO");
+	}
+	else if(allocatedSlots == 0)
+	{
+		ANKI_ASSERT(m_nextSlotIndex == 0);
+		gpuScene->deferredFree(m_poolToken);
+		m_freeSlotStack.destroy(*cpuPool);
+	}
+}
+
+void AllGpuSceneContiguousArrays::init(SceneGraph* scene)
+{
+	m_scene = scene;
+
+	const ConfigSet& cfg = *scene->m_subsystems.m_config;
+	constexpr F32 kGrowRate = 2.0;
+
+	U32 arraySize =
+		cfg.getSceneMinGpuSceneTransforms() / m_componentCount[GpuSceneContiguousArrayType::kTransformPairs];
+	m_allocs[GpuSceneContiguousArrayType::kTransformPairs].init(
+		arraySize,
+		m_componentSize[GpuSceneContiguousArrayType::kTransformPairs]
+			* m_componentCount[GpuSceneContiguousArrayType::kTransformPairs],
+		kGrowRate);
+
+	arraySize = cfg.getSceneMinGpuSceneMeshes() / m_componentCount[GpuSceneContiguousArrayType::kMeshLods];
+	m_allocs[GpuSceneContiguousArrayType::kMeshLods].init(
+		arraySize,
+		m_componentSize[GpuSceneContiguousArrayType::kMeshLods]
+			* m_componentCount[GpuSceneContiguousArrayType::kMeshLods],
+		kGrowRate);
+
+	arraySize =
+		cfg.getSceneMinGpuSceneParticleEmitters() / m_componentCount[GpuSceneContiguousArrayType::kParticleEmitters];
+	m_allocs[GpuSceneContiguousArrayType::kParticleEmitters].init(
+		arraySize,
+		m_componentSize[GpuSceneContiguousArrayType::kParticleEmitters]
+			* m_componentCount[GpuSceneContiguousArrayType::kParticleEmitters],
+		kGrowRate);
+}
+
+void AllGpuSceneContiguousArrays::destroy()
+{
+	for(GpuSceneContiguousArrayType type : EnumIterable<GpuSceneContiguousArrayType>())
+	{
+		m_allocs[type].destroy(m_scene->m_subsystems.m_gpuSceneMemoryPool, &m_scene->m_pool);
+	}
+}
+
+PtrSize AllGpuSceneContiguousArrays::allocate(GpuSceneContiguousArrayType type)
+{
+	const U32 idx = m_allocs[type].allocateObject(m_scene->m_subsystems.m_gpuSceneMemoryPool, &m_scene->m_pool);
+	return PtrSize(idx) * m_componentCount[type] * m_componentSize[type] + m_allocs[type].m_poolToken.m_offset;
+}
+
+void AllGpuSceneContiguousArrays::deferredFree(GpuSceneContiguousArrayType type, PtrSize offset)
+{
+	ANKI_ASSERT(offset >= m_allocs[type].m_poolToken.m_offset);
+	offset -= m_allocs[type].m_poolToken.m_offset;
+	ANKI_ASSERT((offset % (m_componentCount[type] * m_componentSize[type])) == 0);
+	const U32 idx = U32(offset / (m_componentCount[type] * m_componentSize[type]));
+	m_allocs[type].deferredFree(m_frame, &m_scene->m_pool, idx);
+}
+
+void AllGpuSceneContiguousArrays::endFrame()
+{
+	m_frame = (m_frame + 1) % kMaxFramesInFlight;
+
+	for(GpuSceneContiguousArrayType type : EnumIterable<GpuSceneContiguousArrayType>())
+	{
+		m_allocs[type].collectGarbage(m_frame, m_scene->m_subsystems.m_gpuSceneMemoryPool, &m_scene->m_pool);
+	}
+}
+
+} // end namespace anki

+ 113 - 0
AnKi/Scene/ContiguousArrayAllocator.h

@@ -0,0 +1,113 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Scene/Common.h>
+
+namespace anki {
+
+/// @addtogroup scene
+/// @{
+
+/// Some of the GPU scene structures are stored in structured buffers
+enum class GpuSceneContiguousArrayType : U8
+{
+	kTransformPairs,
+	kMeshLods,
+	kParticleEmitters,
+
+	kCount,
+	kFirst = 0
+};
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(GpuSceneContiguousArrayType)
+
+/// Contains a number of contiguous array allocators for various GPU scene contiguous objects.
+class AllGpuSceneContiguousArrays
+{
+public:
+	void init(SceneGraph* scene);
+
+	void destroy();
+
+	PtrSize allocate(GpuSceneContiguousArrayType type);
+
+	void deferredFree(GpuSceneContiguousArrayType type, PtrSize offset);
+
+	void endFrame();
+
+private:
+	/// GPU scene allocator that emulates a contiguous array of elements. It's an array of objects of the same size.
+	/// This array is a contiguous piece of memory. This helps same, in function, objects be close together.
+	class ContiguousArrayAllocator
+	{
+		friend class AllGpuSceneContiguousArrays;
+
+	public:
+		using Index = U32;
+
+		~ContiguousArrayAllocator()
+		{
+			ANKI_ASSERT(m_nextSlotIndex == 0 && "Forgot to deallocate");
+			for([[maybe_unused]] const DynamicArray<Index>& arr : m_garbage)
+			{
+				ANKI_ASSERT(arr.getSize() == 0);
+			}
+			ANKI_ASSERT(m_poolToken.m_offset == kMaxPtrSize);
+		}
+
+		void init(U32 initialArraySize, U16 objectSize, F32 arrayGrowRate)
+		{
+			ANKI_ASSERT(initialArraySize > 0);
+			ANKI_ASSERT(objectSize > 0 && objectSize <= 256); // 256 is arbitary
+			ANKI_ASSERT(arrayGrowRate > 1.0);
+			m_objectSize = objectSize;
+			m_growRate = arrayGrowRate;
+			m_initialArraySize = initialArraySize;
+		}
+
+		void destroy(GpuSceneMemoryPool* gpuScene, HeapMemoryPool* cpuPool);
+
+		/// Allocate a new object and return its index in the array.
+		/// @note It's thread-safe against itself, deferredFree and endFrame.
+		Index allocateObject(GpuSceneMemoryPool* gpuScene, HeapMemoryPool* cpuPool);
+
+		/// Safely free an index allocated by allocateObject.
+		/// @note It's thread-safe against itself, allocateObject and endFrame.
+		void deferredFree(U32 crntFrameIdx, HeapMemoryPool* cpuPool, Index index);
+
+		/// Call this every frame.
+		/// @note It's thread-safe against itself, deferredFree and allocateObject.
+		void collectGarbage(U32 newFrameIdx, GpuSceneMemoryPool* gpuScene, HeapMemoryPool* cpuPool);
+
+	private:
+		SegregatedListsGpuMemoryPoolToken m_poolToken;
+
+		DynamicArray<Index> m_freeSlotStack;
+
+		Array<DynamicArray<Index>, kMaxFramesInFlight> m_garbage;
+
+		SpinLock m_mtx;
+
+		F32 m_growRate = 2.0;
+		U32 m_initialArraySize = 0;
+		U16 m_objectSize = 0;
+
+		U32 m_nextSlotIndex = 0;
+	};
+
+	SceneGraph* m_scene = nullptr;
+
+	Array<ContiguousArrayAllocator, U32(GpuSceneContiguousArrayType::kCount)> m_allocs;
+
+	U8 m_frame = 0;
+
+	static constexpr Array<U8, U32(GpuSceneContiguousArrayType::kCount)> m_componentCount = {2, kMaxLodCount, 1};
+	static constexpr Array<U8, U32(GpuSceneContiguousArrayType::kCount)> m_componentSize = {
+		sizeof(Mat3x4), sizeof(GpuSceneMeshLod), sizeof(GpuSceneParticleEmitter)};
+};
+/// @}
+
+} // end namespace anki

+ 6 - 0
AnKi/Scene/SceneGraph.cpp

@@ -47,6 +47,8 @@ SceneGraph::~SceneGraph()
 	{
 		deleteInstance(m_pool, m_octree);
 	}
+
+	m_gpuSceneAllocators.destroy();
 }
 
 Error SceneGraph::init(const SceneGraphInitInfo& initInfo)
@@ -67,6 +69,8 @@ Error SceneGraph::init(const SceneGraphInitInfo& initInfo)
 	camc->setPerspective(0.1f, 1000.0f, toRad(60.0f), (1080.0f / 1920.0f) * toRad(60.0f));
 	m_mainCam = m_defaultMainCam;
 
+	m_gpuSceneAllocators.init(this);
+
 	return Error::kNone;
 }
 
@@ -159,6 +163,8 @@ Error SceneGraph::update(Second prevUpdateTime, Second crntTime)
 	ANKI_ASSERT(m_mainCam);
 	ANKI_TRACE_SCOPED_EVENT(SceneUpdate);
 
+	m_gpuSceneAllocators.endFrame();
+
 	m_stats.m_updateTime = HighRezTimer::getCurrentTime();
 
 	m_timestamp = *m_subsystems.m_globalTimestamp;

+ 9 - 0
AnKi/Scene/SceneGraph.h

@@ -7,6 +7,7 @@
 
 #include <AnKi/Scene/Common.h>
 #include <AnKi/Scene/SceneNode.h>
+#include <AnKi/Scene/ContiguousArrayAllocator.h>
 #include <AnKi/Math.h>
 #include <AnKi/Util/HashMap.h>
 #include <AnKi/Core/App.h>
@@ -43,6 +44,7 @@ class SceneGraph
 	friend class SceneNode;
 	friend class UpdateSceneNodesTask;
 	friend class Event;
+	friend class AllGpuSceneContiguousArrays;
 
 public:
 	SceneGraph();
@@ -169,6 +171,11 @@ public:
 		return *m_octree;
 	}
 
+	ANKI_INTERNAL AllGpuSceneContiguousArrays& getAllGpuSceneContiguousArrays()
+	{
+		return m_gpuSceneAllocators;
+	}
+
 private:
 	class UpdateSceneNodesCtx;
 
@@ -200,6 +207,8 @@ private:
 
 	SceneGraphStats m_stats;
 
+	AllGpuSceneContiguousArrays m_gpuSceneAllocators;
+
 	/// Put a node in the appropriate containers
 	Error registerNode(SceneNode* node);
 	void unregisterNode(SceneNode* node);

+ 1 - 1
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -41,7 +41,7 @@ VertOut main(VertIn input)
 	const U32 vertexId = input.m_svVertexId % 6u;
 
 	const GpuSceneRenderable renderable = unpackGpuSceneRenderable(input.m_gpuSceneRenderable);
-	const GpuSceneParticles particles = g_gpuScene.Load<GpuSceneParticles>(renderable.m_geometryOffset);
+	const GpuSceneParticleEmitter particles = g_gpuScene.Load<GpuSceneParticleEmitter>(renderable.m_geometryOffset);
 
 	U32 idx = particles.m_vertexOffsets[(U32)VertexStreamId::kParticlePosition] + particleId * sizeof(Vec3);
 	const Vec3 vertPos = g_gpuScene.Load<Vec3>(idx);

+ 2 - 2
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -32,13 +32,13 @@ struct GpuSceneMeshLod
 };
 static_assert(sizeof(GpuSceneMeshLod) == sizeof(Vec4) * 3);
 
-struct GpuSceneParticles
+struct GpuSceneParticleEmitter
 {
 	U32 m_vertexOffsets[(U32)VertexStreamId::kParticleRelatedCount];
 	U32 m_padding0;
 	U32 m_padding1;
 };
-static_assert(sizeof(GpuSceneParticles) == sizeof(Vec4) * 2);
+static_assert(sizeof(GpuSceneParticleEmitter) == sizeof(Vec4) * 2);
 
 struct RenderableGpuView
 {

+ 12 - 1
AnKi/Util/Array.h

@@ -171,7 +171,7 @@ public:
 	}
 
 	/// Fill the array.
-	void fill(Iterator begin, Iterator end, const T& val)
+	static void fill(Iterator begin, Iterator end, const T& val)
 	{
 		while(begin != end)
 		{
@@ -180,6 +180,17 @@ public:
 		}
 	}
 
+	void fill(const T& val)
+	{
+		auto begin = getBegin();
+		auto end = getEnd();
+		while(begin != end)
+		{
+			*begin = val;
+			++begin;
+		}
+	}
+
 	// Get size in bytes
 #define ANKI_ARRAY_SIZE_IN_BYTES_METHOD(type, condition) \
 	ANKI_ENABLE_METHOD(condition) \