Browse Source

Create the GPU scene patcher

Panagiotis Christopoulos Charitos 3 years ago
parent
commit
45512282ca

+ 11 - 0
AnKi/Core/App.cpp

@@ -124,6 +124,8 @@ void App::cleanup()
 	m_renderer = nullptr;
 	deleteInstance(m_mainPool, m_ui);
 	m_ui = nullptr;
+	deleteInstance(m_mainPool, m_gpuSceneMicroPatcher);
+	m_gpuSceneMicroPatcher = nullptr;
 	deleteInstance(m_mainPool, m_resources);
 	m_resources = nullptr;
 	deleteInstance(m_mainPool, m_resourceFs);
@@ -348,6 +350,12 @@ Error App::initInternal(AllocAlignedCallback allocCb, void* allocCbUserData)
 	m_ui = newInstance<UiManager>(m_mainPool);
 	ANKI_CHECK(m_ui->init(uiInitInfo));
 
+	//
+	// GPU scene
+	//
+	m_gpuSceneMicroPatcher = newInstance<GpuSceneMicroPatcher>(m_mainPool);
+	ANKI_CHECK(m_gpuSceneMicroPatcher->init(m_resources));
+
 	//
 	// Renderer
 	//
@@ -362,6 +370,8 @@ Error App::initInternal(AllocAlignedCallback allocCb, void* allocCbUserData)
 	renderInit.m_uiManager = m_ui;
 	renderInit.m_config = m_config;
 	renderInit.m_globTimestamp = &m_globalTimestamp;
+	renderInit.m_gpuScenePool = m_gpuSceneMemPool;
+	renderInit.m_gpuSceneMicroPatcher = m_gpuSceneMicroPatcher;
 	m_renderer = newInstance<MainRenderer>(m_mainPool);
 	ANKI_CHECK(m_renderer->init(renderInit));
 
@@ -382,6 +392,7 @@ Error App::initInternal(AllocAlignedCallback allocCb, void* allocCbUserData)
 	sceneInit.m_config = m_config;
 	sceneInit.m_globalTimestamp = &m_globalTimestamp;
 	sceneInit.m_gpuSceneMemoryPool = m_gpuSceneMemPool;
+	sceneInit.m_gpuSceneMicroPatcher = m_gpuSceneMicroPatcher;
 	sceneInit.m_input = m_input;
 	sceneInit.m_resourceManager = m_resources;
 	sceneInit.m_scriptManager = m_script;

+ 2 - 0
AnKi/Core/App.h

@@ -32,6 +32,7 @@ class UiQueueElement;
 class RenderQueue;
 class MaliHwCounters;
 class GpuSceneMemoryPool;
+class GpuSceneMicroPatcher;
 
 /// The core class of the engine.
 class App
@@ -149,6 +150,7 @@ private:
 	MaliHwCounters* m_maliHwCounters = nullptr;
 	UnifiedGeometryMemoryPool* m_unifiedGometryMemPool = nullptr;
 	GpuSceneMemoryPool* m_gpuSceneMemPool = nullptr;
+	GpuSceneMicroPatcher* m_gpuSceneMicroPatcher = nullptr;
 	RebarStagingGpuMemoryPool* m_rebarPool = nullptr;
 	PhysicsWorld* m_physics = nullptr;
 	ResourceFilesystem* m_resourceFs = nullptr;

+ 101 - 2
AnKi/Core/GpuMemoryPools.cpp

@@ -6,7 +6,9 @@
 #include <AnKi/Core/GpuMemoryPools.h>
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Gr/GrManager.h>
+#include <AnKi/Gr/CommandBuffer.h>
 #include <AnKi/Util/Tracer.h>
+#include <AnKi/Resource/ResourceManager.h>
 
 namespace anki {
 
@@ -25,7 +27,7 @@ void UnifiedGeometryMemoryPool::init(HeapMemoryPool* pool, GrManager* gr, const
 		buffUsage |= BufferUsageBit::kAccelerationStructureBuild;
 	}
 
-	m_alloc.init(gr, pool, buffUsage, classes, poolSize, "UnifiedGeometry", false);
+	m_pool.init(gr, pool, buffUsage, classes, poolSize, "UnifiedGeometry", false);
 }
 
 void GpuSceneMemoryPool::init(HeapMemoryPool* pool, GrManager* gr, const ConfigSet& cfg)
@@ -38,7 +40,7 @@ void GpuSceneMemoryPool::init(HeapMemoryPool* pool, GrManager* gr, const ConfigS
 
 	BufferUsageBit buffUsage = BufferUsageBit::kAllStorage | BufferUsageBit::kTransferDestination;
 
-	m_alloc.init(gr, pool, buffUsage, classes, poolSize, "GpuScene", true);
+	m_pool.init(gr, pool, buffUsage, classes, poolSize, "GpuScene", true);
 }
 
 RebarStagingGpuMemoryPool::~RebarStagingGpuMemoryPool()
@@ -130,4 +132,101 @@ PtrSize RebarStagingGpuMemoryPool::endFrame()
 	return usedMemory;
 }
 
+/// It packs the source and destination offsets as well as the size of the patch itself.
+class GpuSceneMicroPatcher::PatchHeader
+{
+public:
+	U32 m_dwordCountAndSrcDwordOffsetPack;
+	U32 m_dstDwordOffset;
+};
+
+GpuSceneMicroPatcher::~GpuSceneMicroPatcher()
+{
+	static_assert(sizeof(PatchHeader) == 8);
+}
+
+Error GpuSceneMicroPatcher::init(ResourceManager* rsrc)
+{
+	ANKI_CHECK(rsrc->loadResource("ShaderBinaries/GpuSceneMicroPatching.ankiprogbin", m_copyProgram));
+	const ShaderProgramResourceVariant* variant;
+	m_copyProgram->getOrCreateVariant(variant);
+	m_grProgram = variant->getProgram();
+
+	return Error::kNone;
+}
+
+void GpuSceneMicroPatcher::newCopy(StackMemoryPool& frameCpuPool, PtrSize gpuSceneDestOffset, PtrSize dataSize,
+								   const void* data)
+{
+	ANKI_ASSERT(dataSize > 0 && (dataSize % 4) == 0);
+	ANKI_ASSERT((ptrToNumber(data) % 4) == 0);
+	ANKI_ASSERT((gpuSceneDestOffset % 4) == 0 && gpuSceneDestOffset / 4 < kMaxU32);
+
+	const U32 dataDwords = U32(dataSize / 4);
+	U32 gpuSceneDestDwordOffset = U32(gpuSceneDestOffset / 4);
+
+	const U32* patchIt = static_cast<const U32*>(data);
+	const U32* const patchEnd = patchIt + dataDwords;
+
+	// Break the data into multiple copies
+	LockGuard lock(m_mtx);
+	while(patchIt < patchEnd)
+	{
+		const U32 patchDwords = U32(patchEnd - patchIt);
+
+		PatchHeader& header = *m_crntFramePatchHeaders.emplaceBack(frameCpuPool);
+		ANKI_ASSERT(((patchDwords - 1) & 0b111111) == (patchDwords - 1));
+		header.m_dwordCountAndSrcDwordOffsetPack = patchDwords - 1;
+		header.m_dwordCountAndSrcDwordOffsetPack <<= 26;
+		ANKI_ASSERT((m_crntFramePatchData.getSize() & 0x3FFFFFF) == m_crntFramePatchData.getSize());
+		header.m_dwordCountAndSrcDwordOffsetPack |= m_crntFramePatchData.getSize();
+		header.m_dstDwordOffset = gpuSceneDestDwordOffset;
+
+		const U32 srcOffset = m_crntFramePatchData.getSize();
+		m_crntFramePatchData.resize(frameCpuPool, srcOffset + patchDwords);
+		memcpy(&m_crntFramePatchData[srcOffset], patchIt, patchDwords * 4);
+
+		patchIt += kDwordsPerPatch;
+		gpuSceneDestDwordOffset += kDwordsPerPatch;
+	}
+}
+
+void GpuSceneMicroPatcher::patchGpuScene(RebarStagingGpuMemoryPool& rebarPool, CommandBuffer& cmdb,
+										 const BufferPtr& gpuSceneBuffer)
+{
+	if(m_crntFramePatchHeaders.getSize() == 0)
+	{
+		return;
+	}
+
+	ANKI_ASSERT(m_crntFramePatchData.getSize() > 0);
+
+	ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatches, m_crntFramePatchHeaders.getSize());
+	ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatchUploadData, m_crntFramePatchData.getSizeInBytes());
+
+	RebarGpuMemoryToken headersToken;
+	void* mapped = rebarPool.allocateFrame(m_crntFramePatchHeaders.getSizeInBytes(), headersToken);
+	memcpy(mapped, &m_crntFramePatchHeaders[0], m_crntFramePatchHeaders.getSizeInBytes());
+
+	RebarGpuMemoryToken dataToken;
+	mapped = rebarPool.allocateFrame(m_crntFramePatchData.getSizeInBytes(), dataToken);
+	memcpy(mapped, &m_crntFramePatchData[0], m_crntFramePatchData.getSizeInBytes());
+
+	cmdb.bindStorageBuffer(0, 0, rebarPool.getBuffer(), headersToken.m_offset, headersToken.m_range);
+	cmdb.bindStorageBuffer(0, 1, rebarPool.getBuffer(), dataToken.m_offset, dataToken.m_range);
+	cmdb.bindStorageBuffer(0, 2, gpuSceneBuffer, 0, kMaxPtrSize);
+
+	cmdb.bindShaderProgram(m_grProgram);
+
+	const U32 workgroupCountX = m_crntFramePatchHeaders.getSize();
+	cmdb.dispatchCompute(workgroupCountX, 1, 1);
+
+	// Cleanup to prepare for the new frame
+	U32* data;
+	U32 size, storage;
+	m_crntFramePatchData.moveAndReset(data, size, storage);
+	PatchHeader* datah;
+	m_crntFramePatchHeaders.moveAndReset(datah, size, storage);
+}
+
 } // end namespace anki

+ 47 - 12
AnKi/Core/GpuMemoryPools.h

@@ -9,6 +9,7 @@
 #include <AnKi/Gr/Buffer.h>
 #include <AnKi/Gr/Utils/StackGpuMemoryPool.h>
 #include <AnKi/Gr/Utils/SegregatedListsGpuMemoryPool.h>
+#include <AnKi/Resource/ShaderProgramResource.h>
 
 namespace anki {
 
@@ -32,31 +33,31 @@ public:
 
 	void allocate(PtrSize size, U32 alignment, SegregatedListsGpuMemoryPoolToken& token)
 	{
-		m_alloc.allocate(size, alignment, token);
+		m_pool.allocate(size, alignment, token);
 	}
 
 	void free(SegregatedListsGpuMemoryPoolToken& token)
 	{
-		m_alloc.free(token);
+		m_pool.free(token);
 	}
 
 	void endFrame()
 	{
-		m_alloc.endFrame();
+		m_pool.endFrame();
 	}
 
 	const BufferPtr& getBuffer() const
 	{
-		return m_alloc.getGpuBuffer();
+		return m_pool.getGpuBuffer();
 	}
 
 	void getStats(F32& externalFragmentation, PtrSize& userAllocatedSize, PtrSize& totalSize) const
 	{
-		m_alloc.getStats(externalFragmentation, userAllocatedSize, totalSize);
+		m_pool.getStats(externalFragmentation, userAllocatedSize, totalSize);
 	}
 
 private:
-	SegregatedListsGpuMemoryPool m_alloc;
+	SegregatedListsGpuMemoryPool m_pool;
 };
 
 /// Memory pool for the GPU scene.
@@ -73,31 +74,31 @@ public:
 
 	void allocate(PtrSize size, U32 alignment, SegregatedListsGpuMemoryPoolToken& token)
 	{
-		m_alloc.allocate(size, alignment, token);
+		m_pool.allocate(size, alignment, token);
 	}
 
 	void free(SegregatedListsGpuMemoryPoolToken& token)
 	{
-		m_alloc.free(token);
+		m_pool.free(token);
 	}
 
 	void endFrame()
 	{
-		m_alloc.endFrame();
+		m_pool.endFrame();
 	}
 
 	const BufferPtr& getBuffer() const
 	{
-		return m_alloc.getGpuBuffer();
+		return m_pool.getGpuBuffer();
 	}
 
 	void getStats(F32& externalFragmentation, PtrSize& userAllocatedSize, PtrSize& totalSize) const
 	{
-		m_alloc.getStats(externalFragmentation, userAllocatedSize, totalSize);
+		m_pool.getStats(externalFragmentation, userAllocatedSize, totalSize);
 	}
 
 private:
-	SegregatedListsGpuMemoryPool m_alloc;
+	SegregatedListsGpuMemoryPool m_pool;
 };
 
 /// Token that gets returned when requesting for memory to write to a resource.
@@ -166,6 +167,40 @@ private:
 
 	U8 m_frameCount = 0;
 };
+
+/// Creates the copy jobs that will patch the GPU Scene.
+class GpuSceneMicroPatcher
+{
+public:
+	GpuSceneMicroPatcher() = default;
+
+	GpuSceneMicroPatcher(const GpuSceneMicroPatcher&) = delete;
+
+	~GpuSceneMicroPatcher();
+
+	GpuSceneMicroPatcher& operator=(const GpuSceneMicroPatcher&) = delete;
+
+	Error init(ResourceManager* rsrc);
+
+	/// Copy data for the GPU scene to a staging buffer.
+	/// @note It's thread-safe.
+	void newCopy(StackMemoryPool& frameCpuPool, PtrSize gpuSceneDestOffset, PtrSize dataSize, const void* data);
+
+	/// Copy the data to the GPU scene buffer.
+	void patchGpuScene(RebarStagingGpuMemoryPool& rebarPool, CommandBuffer& cmdb, const BufferPtr& gpuSceneBuffer);
+
+private:
+	static constexpr U32 kDwordsPerPatch = 64;
+
+	class PatchHeader;
+
+	DynamicArray<PatchHeader> m_crntFramePatchHeaders;
+	DynamicArray<U32> m_crntFramePatchData;
+	Mutex m_mtx;
+
+	ShaderProgramResourcePtr m_copyProgram;
+	ShaderProgramPtr m_grProgram;
+};
 /// @}
 
 } // end namespace anki

+ 2 - 0
AnKi/Renderer/Common.h

@@ -82,6 +82,8 @@ public:
 	UiManager* m_uiManager = nullptr;
 	ConfigSet* m_config = nullptr;
 	Timestamp* m_globTimestamp = nullptr;
+	GpuSceneMemoryPool* m_gpuScenePool = nullptr;
+	GpuSceneMicroPatcher* m_gpuSceneMicroPatcher = nullptr;
 };
 
 /// GPU buffers and textures that the clusterer refers to.

+ 0 - 10
AnKi/Renderer/RenderQueue.h

@@ -371,15 +371,6 @@ public:
 };
 static_assert(std::is_trivially_destructible<SkyboxQueueElement>::value == true);
 
-class GpuSceneMicroPatch
-{
-public:
-	PtrSize m_gpuSceneBufferOffset;
-	void* m_dataToCopy;
-	PtrSize m_dataToCopySize;
-};
-static_assert(std::is_trivially_destructible<GpuSceneMicroPatch>::value == true);
-
 /// The render queue. This is what the renderer is fed to render.
 class RenderQueue : public RenderingMatrices
 {
@@ -398,7 +389,6 @@ public:
 	WeakArray<UiQueueElement> m_uis;
 	WeakArray<GenericGpuComputeJobQueueElement> m_genericGpuComputeJobs;
 	WeakArray<RayTracingInstanceQueueElement> m_rayTracingInstances;
-	WeakArray<GpuSceneMicroPatch> m_gpuSceneMicroPatches;
 
 	/// Contains the ray tracing elements. The rest of the members are unused. It's separate to avoid multithreading
 	/// bugs.

+ 18 - 0
AnKi/Renderer/Renderer.cpp

@@ -343,6 +343,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	m_vrsSriGeneration->importRenderTargets(ctx);
 
 	// Populate render graph. WARNING Watch the order
+	gpuSceneCopy(ctx);
 	m_genericCompute->populateRenderGraph(ctx);
 	m_clusterBinning->populateRenderGraph(ctx);
 	if(m_accelerationStructureBuilder)
@@ -680,4 +681,21 @@ Format Renderer::getDepthNoStencilFormat() const
 	}
 }
 
+void Renderer::gpuSceneCopy(RenderingContext& ctx)
+{
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+
+	m_runCtx.m_gpuSceneHandle = rgraph.importBuffer(m_subsystems.m_gpuScenePool->getBuffer(),
+													m_subsystems.m_gpuScenePool->getBuffer()->getBufferUsage());
+
+	ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("GPU scene patching");
+	rpass.newBufferDependency(m_runCtx.m_gpuSceneHandle, BufferUsageBit::kStorageComputeWrite);
+
+	rpass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+		m_subsystems.m_gpuSceneMicroPatcher->patchGpuScene(*m_subsystems.m_rebarStagingPool,
+														   *rgraphCtx.m_commandBuffer.get(),
+														   m_subsystems.m_gpuScenePool->getBuffer());
+	});
+}
+
 } // end namespace anki

+ 8 - 0
AnKi/Renderer/Renderer.h

@@ -221,7 +221,15 @@ private:
 	DynamicArray<DebugRtInfo> m_debugRts;
 	String m_currentDebugRtName;
 
+	class
+	{
+	public:
+		BufferHandle m_gpuSceneHandle;
+	} m_runCtx;
+
 	Error initInternal(UVec2 swapchainSize);
+
+	void gpuSceneCopy(RenderingContext& ctx);
 };
 /// @}
 

+ 2 - 0
AnKi/Scene/Common.h

@@ -19,6 +19,7 @@ class ConfigSet;
 class UiManager;
 class UnifiedGeometryMemoryPool;
 class GpuSceneMemoryPool;
+class GpuSceneMicroPatcher;
 class ScriptManager;
 class GrManager;
 class PhysicsWorld;
@@ -55,6 +56,7 @@ public:
 	PhysicsWorld* m_physicsWorld = nullptr;
 	UnifiedGeometryMemoryPool* m_unifiedGeometryMemPool = nullptr;
 	GpuSceneMemoryPool* m_gpuSceneMemoryPool = nullptr;
+	GpuSceneMicroPatcher* m_gpuSceneMicroPatcher = nullptr;
 };
 /// @}
 

+ 2 - 8
AnKi/Scene/Components/MoveComponent.cpp

@@ -86,17 +86,11 @@ Error MoveComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 	// Micro patch
 	if(dirty || m_dirtyLastFrame)
 	{
-		Mat3x4* trfs = newArray<Mat3x4>(*info.m_framePool, 2);
+		Array<Mat3x4, 2> trfs;
 		trfs[0] = Mat3x4(m_wtrf);
 		trfs[1] = Mat3x4(m_prevWTrf);
 
-		GpuSceneMicroPatch* patch = newInstance<GpuSceneMicroPatch>(*info.m_framePool);
-		patch->m_gpuSceneBufferOffset = m_gpuSceneTransforms.m_offset;
-		patch->m_dataToCopySize = sizeof(Mat3x4) * 2;
-		patch->m_dataToCopy = trfs;
-
-		GpuSceneMicroPatch** patchArray = newArray<GpuSceneMicroPatch*>(*info.m_framePool, 1);
-		info.m_gpuSceneMicroPatches = {patchArray, 1};
+		info.m_gpuSceneMicroPatcher->newCopy(*info.m_framePool, m_gpuSceneTransforms.m_offset, sizeof(trfs), &trfs[0]);
 	}
 
 	m_dirtyLastFrame = dirty;

+ 3 - 4
AnKi/Scene/Components/SceneComponent.h

@@ -13,7 +13,6 @@ namespace anki {
 
 // Forward
 class SceneComponentUpdateInfo;
-class GpuSceneMicroPatch;
 
 /// @addtogroup scene
 /// @{
@@ -98,12 +97,12 @@ private:
 class SceneComponentUpdateInfo
 {
 public:
-	SceneNode* m_node;
+	SceneNode* m_node = nullptr;
 	const Second m_previousTime;
 	const Second m_currentTime;
 	const Second m_dt;
-	WeakArray<GpuSceneMicroPatch*> m_gpuSceneMicroPatches;
-	StackMemoryPool* m_framePool;
+	StackMemoryPool* m_framePool = nullptr;
+	GpuSceneMicroPatcher* m_gpuSceneMicroPatcher = nullptr;
 
 	SceneComponentUpdateInfo(Second prevTime, Second crntTime)
 		: m_previousTime(prevTime)

+ 3 - 2
AnKi/Scene/SceneGraph.cpp

@@ -240,7 +240,8 @@ Error SceneGraph::updateNode(Second prevTime, Second crntTime, SceneNode& node)
 
 	// Components update
 	SceneComponentUpdateInfo componentUpdateInfo(prevTime, crntTime);
-	componentUpdateInfo.m_framePool = &node.getFrameMemoryPool();
+	componentUpdateInfo.m_framePool = &m_framePool;
+	componentUpdateInfo.m_gpuSceneMicroPatcher = m_subsystems.m_gpuSceneMicroPatcher;
 
 	Timestamp componentTimestamp = 0;
 	Bool atLeastOneComponentUpdated = false;
@@ -297,7 +298,7 @@ Error SceneGraph::updateNode(Second prevTime, Second crntTime, SceneNode& node)
 	return err;
 }
 
-Error SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx) const
+Error SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx)
 {
 	ANKI_TRACE_SCOPED_EVENT(SCENE_NODES_UPDATE);
 

+ 2 - 2
AnKi/Scene/SceneGraph.h

@@ -215,8 +215,8 @@ private:
 	/// Delete the nodes that are marked for deletion
 	void deleteNodesMarkedForDeletion();
 
-	Error updateNodes(UpdateSceneNodesCtx& ctx) const;
-	[[nodiscard]] static Error updateNode(Second prevTime, Second crntTime, SceneNode& node);
+	Error updateNodes(UpdateSceneNodesCtx& ctx);
+	Error updateNode(Second prevTime, Second crntTime, SceneNode& node);
 
 	/// Do visibility tests.
 	static void doVisibilityTests(SceneNode& frustumable, SceneGraph& scene, RenderQueue& rqueue);