Browse Source

Add readback manager to the renderer

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
412badef9a

+ 1 - 0
AnKi/Core/GpuMemory/GpuReadbackMemoryPool.cpp

@@ -36,6 +36,7 @@ GpuReadbackMemoryAllocation GpuReadbackMemoryPool::allocate(PtrSize size)
 void GpuReadbackMemoryPool::deferredFree(GpuReadbackMemoryAllocation& allocation)
 void GpuReadbackMemoryPool::deferredFree(GpuReadbackMemoryAllocation& allocation)
 {
 {
 	m_pool.deferredFree(allocation.m_token);
 	m_pool.deferredFree(allocation.m_token);
+	::new(&allocation) GpuReadbackMemoryAllocation();
 }
 }
 
 
 void GpuReadbackMemoryPool::endFrame()
 void GpuReadbackMemoryPool::endFrame()

+ 6 - 0
AnKi/Core/GpuMemory/GpuReadbackMemoryPool.h

@@ -64,6 +64,12 @@ public:
 		return *m_buffer;
 		return *m_buffer;
 	}
 	}
 
 
+	const void* getMappedMemory() const
+	{
+		ANKI_ASSERT(isValid());
+		return m_mappedMemory;
+	}
+
 private:
 private:
 	SegregatedListsGpuMemoryPoolToken m_token;
 	SegregatedListsGpuMemoryPoolToken m_token;
 	Buffer* m_buffer = nullptr;
 	Buffer* m_buffer = nullptr;

+ 5 - 6
AnKi/Gr/Vulkan/GrManagerImpl.cpp

@@ -1362,7 +1362,6 @@ void GrManagerImpl::flushCommandBuffer(MicroCommandBufferPtr cmdb, Bool cmdbRend
 	timelineInfo.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
 	timelineInfo.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
 	timelineInfo.waitSemaphoreValueCount = userWaitSemaphores.getSize();
 	timelineInfo.waitSemaphoreValueCount = userWaitSemaphores.getSize();
 	timelineInfo.pWaitSemaphoreValues = &waitTimelineValues[0];
 	timelineInfo.pWaitSemaphoreValues = &waitTimelineValues[0];
-	timelineInfo.signalSemaphoreValueCount = (userSignalSemaphore != nullptr);
 	timelineInfo.pSignalSemaphoreValues = &signalTimelineValues[0];
 	timelineInfo.pSignalSemaphoreValues = &signalTimelineValues[0];
 	submit.pNext = &timelineInfo;
 	submit.pNext = &timelineInfo;
 
 
@@ -1387,11 +1386,8 @@ void GrManagerImpl::flushCommandBuffer(MicroCommandBufferPtr cmdb, Bool cmdbRend
 	{
 	{
 		*userSignalSemaphore = m_semaphoreFactory.newInstance(fence, true);
 		*userSignalSemaphore = m_semaphoreFactory.newInstance(fence, true);
 
 
-		signalSemaphores[submit.signalSemaphoreCount] = (*userSignalSemaphore)->getHandle();
-
-		signalTimelineValues[submit.signalSemaphoreCount] = (*userSignalSemaphore)->getNextSemaphoreValue();
-
-		++submit.signalSemaphoreCount;
+		signalSemaphores[submit.signalSemaphoreCount++] = (*userSignalSemaphore)->getHandle();
+		signalTimelineValues[timelineInfo.signalSemaphoreValueCount++] = (*userSignalSemaphore)->getNextSemaphoreValue();
 	}
 	}
 
 
 	// Submit
 	// Submit
@@ -1419,6 +1415,9 @@ void GrManagerImpl::flushCommandBuffer(MicroCommandBufferPtr cmdb, Bool cmdbRend
 
 
 			signalSemaphores[submit.signalSemaphoreCount++] = frame.m_renderSemaphore->getHandle();
 			signalSemaphores[submit.signalSemaphoreCount++] = frame.m_renderSemaphore->getHandle();
 
 
+			// Increment the timeline values as well because the spec wants a dummy value even for non-timeline semaphores
+			signalTimelineValues[timelineInfo.signalSemaphoreValueCount++] = 0;
+
 			// Update the frame fence
 			// Update the frame fence
 			frame.m_presentFence = fence;
 			frame.m_presentFence = fence;
 
 

+ 3 - 2
AnKi/Renderer/MainRenderer.cpp

@@ -149,11 +149,12 @@ Error MainRenderer::render(RenderQueue& rqueue, Texture* presentTex)
 	m_rgraph->run();
 	m_rgraph->run();
 
 
 	// Flush
 	// Flush
-	m_rgraph->flush();
+	FencePtr fence;
+	m_rgraph->flush(&fence);
 
 
 	// Reset for the next frame
 	// Reset for the next frame
 	m_rgraph->reset();
 	m_rgraph->reset();
-	m_r->finalize(ctx);
+	m_r->finalize(ctx, fence.get());
 
 
 	// Stats
 	// Stats
 	if(m_statsEnabled)
 	if(m_statsEnabled)

+ 101 - 0
AnKi/Renderer/Readback.cpp

@@ -0,0 +1,101 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Renderer/Readback.h>
+
+namespace anki {
+
+void ReadbackManager::allocateData(MultiframeReadbackToken& token, PtrSize size, Buffer*& buffer, PtrSize& bufferOffset)
+{
+	for([[maybe_unused]] U64 frame : token.m_frameIds)
+	{
+		ANKI_ASSERT(frame != m_frameId && "Can't allocate multiple times in a frame");
+	}
+
+	if(token.m_allocations[token.m_slot].isValid()) [[unlikely]]
+	{
+		ANKI_R_LOGW("Allocation hasn't been released. Haven't called getMostRecentReadDataAndRelease");
+		GpuReadbackMemoryPool::getSingleton().deferredFree(token.m_allocations[token.m_slot]);
+	}
+
+	ANKI_ASSERT(!token.m_allocations[token.m_slot].isValid());
+
+	token.m_allocations[token.m_slot] = GpuReadbackMemoryPool::getSingleton().allocate(size);
+	token.m_frameIds[token.m_slot] = m_frameId;
+
+	buffer = &token.m_allocations[token.m_slot].getBuffer();
+	bufferOffset = token.m_allocations[token.m_slot].getOffset();
+
+	token.m_slot = (token.m_slot + 1) % kMaxFramesInFlight;
+}
+
+void ReadbackManager::getMostRecentReadDataAndRelease(MultiframeReadbackToken& token, void* data, PtrSize dataSize, PtrSize& dataOut)
+{
+	ANKI_ASSERT(data && dataSize > 0);
+	dataOut = 0;
+
+	const U64 earliestFrame = m_frameId - (kMaxFramesInFlight - 1);
+	U32 bestSlot = kMaxU32;
+	U32 secondBestSlot = kMaxU32;
+
+	for(U32 i = 0; i < kMaxFramesInFlight; ++i)
+	{
+		if(token.m_frameIds[i] == earliestFrame && token.m_allocations[i].isValid())
+		{
+			bestSlot = i;
+		}
+		else if(token.m_frameIds[i] < earliestFrame && token.m_allocations[i].isValid())
+		{
+			secondBestSlot = i;
+		}
+	}
+
+	const U32 slot = (bestSlot != kMaxU32) ? bestSlot : secondBestSlot;
+	if(slot == kMaxU32)
+	{
+		return;
+	}
+
+	GpuReadbackMemoryAllocation& allocation = token.m_allocations[slot];
+	dataSize = allocation.getAllocatedSize();
+
+	memcpy(data, static_cast<const U8*>(allocation.getMappedMemory()) + allocation.getOffset(), min(dataSize, dataSize));
+
+	GpuReadbackMemoryPool::getSingleton().deferredFree(allocation);
+}
+
+void ReadbackManager::endFrame(Fence* fence)
+{
+	ANKI_ASSERT(fence);
+
+	// Release fences
+	for(Frame& frame : m_frames)
+	{
+		if(frame.m_fence.isCreated())
+		{
+			if(frame.m_fence->clientWait(0.0))
+			{
+				frame.m_fence.reset(nullptr);
+			}
+		}
+	}
+
+	Frame& frame = m_frames[m_frameId % m_frames.getSize()];
+	if(frame.m_fence.isCreated()) [[unlikely]]
+	{
+		ANKI_R_LOGW("Readback fence is not signaled. Need to wait it");
+		const Bool signaled = frame.m_fence->clientWait(10.0_sec);
+		if(!signaled)
+		{
+			ANKI_R_LOGF("Fence won't signal. Can't recover");
+		}
+	}
+
+	frame.m_fence.reset(fence);
+
+	++m_frameId;
+}
+
+} // end namespace anki

+ 29 - 24
AnKi/Renderer/Readback.h

@@ -13,43 +13,48 @@ namespace anki {
 /// @addtogroup renderer
 /// @addtogroup renderer
 /// @{
 /// @{
 
 
-class MultiframeReadback
+/// TODO
+class MultiframeReadbackToken
 {
 {
-public:
-	void* getMostRecentReadData(PtrSize* dataSize = nullptr) const
-	{
-	}
-
-	void freeMostRecentData()
-	{
-	}
-
-	void allocateData(PtrSize size, Buffer*& buffer, PtrSize bufferOffset)
-	{
-		if(m_fences[m_crntSlot].isCreated())
-		{
-			ANKI_R_LOGW("Allocation not freed. Will have to free it now");
-			GpuReadbackMemoryPool::getSingleton().deferredFree(m_allocations[m_crntSlot]);
-			m_fences[m_crntSlot].reset(nullptr);
-		}
-
-		m_crntSlot = (m_crntSlot + 1) % kMaxFramesInFlight;
-	}
+	friend class ReadbackManager;
 
 
 private:
 private:
 	Array<GpuReadbackMemoryAllocation, kMaxFramesInFlight> m_allocations;
 	Array<GpuReadbackMemoryAllocation, kMaxFramesInFlight> m_allocations;
-	Array<U64, kMaxFramesInFlight> m_frames;
+	Array<U64, kMaxFramesInFlight> m_frameIds = {};
+	U32 m_slot = 0;
 };
 };
 
 
-class ReadbackManager : public MakeSingleton<ReadbackManager>
+/// TODO
+class ReadbackManager
 {
 {
 	template<typename>
 	template<typename>
 	friend class MakeSingleton;
 	friend class MakeSingleton;
 
 
 public:
 public:
+	/// @note Not thread-safe
+	void allocateData(MultiframeReadbackToken& token, PtrSize size, Buffer*& buffer, PtrSize& bufferOffset);
+
+	/// XXX
+	/// @note Not thread-safe
+	void getMostRecentReadDataAndRelease(MultiframeReadbackToken& token, void* data, PtrSize dataSize, PtrSize& dataOut);
+
+	/// @note Not thread-safe
+	template<typename TMemPool>
+	void getMostRecentReadDataAndRelease(MultiframeReadbackToken& token, DynamicArray<U8, TMemPool>& data);
+
+	/// @note Not thread-safe
+	void endFrame(Fence* fence);
+
 private:
 private:
-};
+	class Frame
+	{
+	public:
+		FencePtr m_fence;
+	};
 
 
+	Array<Frame, kMaxFramesInFlight> m_frames;
+	U64 m_frameId = kMaxFramesInFlight;
+};
 /// @}
 /// @}
 
 
 } // end namespace anki
 } // end namespace anki

+ 3 - 1
AnKi/Renderer/Renderer.cpp

@@ -390,7 +390,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	return Error::kNone;
 	return Error::kNone;
 }
 }
 
 
-void Renderer::finalize(const RenderingContext& ctx)
+void Renderer::finalize(const RenderingContext& ctx, Fence* fence)
 {
 {
 	++m_frameCount;
 	++m_frameCount;
 
 
@@ -405,6 +405,8 @@ void Renderer::finalize(const RenderingContext& ctx)
 		m_depthDownscale->getClientDepthMapInfo(depthValues, width, height);
 		m_depthDownscale->getClientDepthMapInfo(depthValues, width, height);
 		ctx.m_renderQueue->m_fillCoverageBufferCallback(ctx.m_renderQueue->m_fillCoverageBufferCallbackUserData, depthValues, width, height);
 		ctx.m_renderQueue->m_fillCoverageBufferCallback(ctx.m_renderQueue->m_fillCoverageBufferCallbackUserData, depthValues, width, height);
 	}
 	}
+
+	m_readbaks.endFrame(fence);
 }
 }
 
 
 TextureInitInfo Renderer::create2DRenderTargetInitInfo(U32 w, U32 h, Format format, TextureUsageBit usage, CString name)
 TextureInitInfo Renderer::create2DRenderTargetInitInfo(U32 w, U32 h, Format format, TextureUsageBit usage, CString name)

+ 8 - 1
AnKi/Renderer/Renderer.h

@@ -13,6 +13,7 @@
 #include <AnKi/Gr.h>
 #include <AnKi/Gr.h>
 #include <AnKi/Resource/Forward.h>
 #include <AnKi/Resource/Forward.h>
 #include <AnKi/Collision/Forward.h>
 #include <AnKi/Collision/Forward.h>
+#include <AnKi/Renderer/Readback.h>
 
 
 namespace anki {
 namespace anki {
 
 
@@ -73,7 +74,7 @@ public:
 	/// This function does all the rendering stages and produces a final result.
 	/// This function does all the rendering stages and produces a final result.
 	Error populateRenderGraph(RenderingContext& ctx);
 	Error populateRenderGraph(RenderingContext& ctx);
 
 
-	void finalize(const RenderingContext& ctx);
+	void finalize(const RenderingContext& ctx, Fence* fence);
 
 
 	U64 getFrameCount() const
 	U64 getFrameCount() const
 	{
 	{
@@ -100,6 +101,11 @@ public:
 		return m_hzbHelper;
 		return m_hzbHelper;
 	}
 	}
 
 
+	ReadbackManager& getReadbackManager()
+	{
+		return m_readbaks;
+	}
+
 	/// Create the init info for a 2D texture that will be used as a render target.
 	/// Create the init info for a 2D texture that will be used as a render target.
 	[[nodiscard]] TextureInitInfo create2DRenderTargetInitInfo(U32 w, U32 h, Format format, TextureUsageBit usage, CString name = {});
 	[[nodiscard]] TextureInitInfo create2DRenderTargetInitInfo(U32 w, U32 h, Format format, TextureUsageBit usage, CString name = {});
 
 
@@ -195,6 +201,7 @@ private:
 	RenderableDrawer m_sceneDrawer;
 	RenderableDrawer m_sceneDrawer;
 	GpuVisibility m_visibility;
 	GpuVisibility m_visibility;
 	HzbHelper m_hzbHelper;
 	HzbHelper m_hzbHelper;
+	ReadbackManager m_readbaks;
 
 
 	U64 m_frameCount; ///< Frame number
 	U64 m_frameCount; ///< Frame number