Browse Source

Add some code for GPU readbacks

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
5a43e8d403

+ 4 - 0
AnKi/Core/App.cpp

@@ -17,6 +17,7 @@
 #include <AnKi/Core/CoreTracer.h>
 #include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
 #include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
+#include <AnKi/Core/GpuMemory/GpuReadbackMemoryPool.h>
 #include <AnKi/Core/DeveloperConsole.h>
 #include <AnKi/Core/StatsUi.h>
 #include <AnKi/Window/NativeWindow.h>
@@ -133,6 +134,7 @@ void App::cleanup()
 	GpuVisibleTransientMemoryPool::freeSingleton();
 	UnifiedGeometryBuffer::freeSingleton();
 	GpuSceneBuffer::freeSingleton();
+	GpuReadbackMemoryPool::freeSingleton();
 	CoreThreadHive::freeSingleton();
 	MaliHwCounters::freeSingleton();
 	GrManager::freeSingleton();
@@ -286,6 +288,7 @@ Error App::initInternal()
 	GpuSceneBuffer::allocateSingleton().init();
 	RebarTransientMemoryPool::allocateSingleton().init();
 	GpuVisibleTransientMemoryPool::allocateSingleton();
+	GpuReadbackMemoryPool::allocateSingleton();
 
 	//
 	// Physics
@@ -469,6 +472,7 @@ Error App::mainLoop()
 			UnifiedGeometryBuffer::getSingleton().endFrame();
 			GpuSceneBuffer::getSingleton().endFrame();
 			GpuVisibleTransientMemoryPool::getSingleton().endFrame();
+			GpuReadbackMemoryPool::getSingleton().endFrame();
 
 			// Update the trace info with some async loader stats
 			U64 asyncTaskCount = ResourceManager::getSingleton().getAsyncLoader().getCompletedTaskCount();

+ 4 - 2
AnKi/Core/CMakeLists.txt

@@ -7,7 +7,8 @@ set(sources
 	StatsUi.cpp
 	GpuMemory/UnifiedGeometryBuffer.cpp
 	GpuMemory/GpuSceneBuffer.cpp
-	GpuMemory/RebarTransientMemoryPool.cpp)
+	GpuMemory/RebarTransientMemoryPool.cpp
+	GpuMemory/GpuReadbackMemoryPool.cpp)
 
 set(headers
 	AllConfigVars.defs.h
@@ -23,7 +24,8 @@ set(headers
 	StdinListener.h
 	GpuMemory/UnifiedGeometryBuffer.h
 	GpuMemory/GpuSceneBuffer.h
-	GpuMemory/RebarTransientMemoryPool.h)
+	GpuMemory/RebarTransientMemoryPool.h
+	GpuMemory/GpuReadbackMemoryPool.h)
 
 add_library(AnKiCore ${sources} ${headers})
 

+ 46 - 0
AnKi/Core/GpuMemory/GpuReadbackMemoryPool.cpp

@@ -0,0 +1,46 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Core/GpuMemory/GpuReadbackMemoryPool.h>
+#include <AnKi/Gr/GrManager.h>
+
+namespace anki {
+
+GpuReadbackMemoryPool::GpuReadbackMemoryPool()
+{
+	const Array classes = {64_B, 256_B, 1_MB, 5_MB};
+
+	const BufferUsageBit buffUsage = BufferUsageBit::kAllStorage;
+	const BufferMapAccessBit mapAccess = BufferMapAccessBit::kRead;
+
+	m_pool.init(buffUsage, classes, classes.getBack(), "GpuReadback", false, mapAccess);
+
+	m_alignment = GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment;
+}
+
+GpuReadbackMemoryPool ::~GpuReadbackMemoryPool()
+{
+}
+
+GpuReadbackMemoryAllocation GpuReadbackMemoryPool::allocate(PtrSize size)
+{
+	GpuReadbackMemoryAllocation out;
+	m_pool.allocate(size, m_alignment, out.m_token);
+	out.m_buffer = &m_pool.getGpuBuffer();
+	out.m_mappedMemory = static_cast<U8*>(m_pool.getGpuBufferMappedMemory()) + out.m_token.m_offset;
+	return out;
+}
+
+void GpuReadbackMemoryPool::deferredFree(GpuReadbackMemoryAllocation& allocation)
+{
+	m_pool.deferredFree(allocation.m_token);
+}
+
+void GpuReadbackMemoryPool::endFrame()
+{
+	m_pool.endFrame();
+}
+
+} // end namespace anki

+ 100 - 0
AnKi/Core/GpuMemory/GpuReadbackMemoryPool.h

@@ -0,0 +1,100 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Core/Common.h>
+#include <AnKi/Gr/Utils/SegregatedListsGpuMemoryPool.h>
+
+namespace anki {
+
+/// @addtogroup core
+/// @{
+
+/// @memberof GpuReadbackMemoryPool
+class GpuReadbackMemoryAllocation
+{
+	friend class GpuReadbackMemoryPool;
+
+public:
+	GpuReadbackMemoryAllocation() = default;
+
+	GpuReadbackMemoryAllocation(const GpuReadbackMemoryAllocation&) = delete;
+
+	GpuReadbackMemoryAllocation(GpuReadbackMemoryAllocation&& b)
+	{
+		*this = std::move(b);
+	}
+
+	~GpuReadbackMemoryAllocation();
+
+	GpuReadbackMemoryAllocation& operator=(const GpuReadbackMemoryAllocation&) = delete;
+
+	GpuReadbackMemoryAllocation& operator=(GpuReadbackMemoryAllocation&& b)
+	{
+		ANKI_ASSERT(!isValid() && "Forgot to delete");
+		m_token = b.m_token;
+		b.m_token = {};
+		return *this;
+	}
+
+	Bool isValid() const
+	{
+		return m_token.m_offset != kMaxPtrSize;
+	}
+
+	/// Get offset in the Unified Geometry Buffer buffer.
+	U32 getOffset() const
+	{
+		ANKI_ASSERT(isValid());
+		return U32(m_token.m_offset);
+	}
+
+	U32 getAllocatedSize() const
+	{
+		ANKI_ASSERT(isValid());
+		return U32(m_token.m_size);
+	}
+
+	Buffer& getBuffer() const
+	{
+		ANKI_ASSERT(isValid());
+		return *m_buffer;
+	}
+
+private:
+	SegregatedListsGpuMemoryPoolToken m_token;
+	Buffer* m_buffer = nullptr;
+	void* m_mappedMemory = nullptr;
+};
+
+class GpuReadbackMemoryPool : public MakeSingleton<GpuReadbackMemoryPool>
+{
+	template<typename>
+	friend class MakeSingleton;
+
+public:
+	GpuReadbackMemoryAllocation allocate(PtrSize size);
+
+	void deferredFree(GpuReadbackMemoryAllocation& allocation);
+
+	void endFrame();
+
+private:
+	SegregatedListsGpuMemoryPool m_pool;
+	U32 m_alignment = 0;
+
+	GpuReadbackMemoryPool();
+
+	~GpuReadbackMemoryPool();
+};
+
+inline GpuReadbackMemoryAllocation::~GpuReadbackMemoryAllocation()
+{
+	GpuReadbackMemoryPool::getSingleton().deferredFree(*this);
+}
+/// @}
+
+} // end namespace anki

+ 2 - 2
AnKi/Gr/RenderGraph.cpp

@@ -1396,7 +1396,7 @@ void RenderGraph::run() const
 	}
 }
 
-void RenderGraph::flush()
+void RenderGraph::flush(FencePtr* optionalFence)
 {
 	ANKI_TRACE_SCOPED_EVENT(GrRenderGraphFlush);
 
@@ -1416,7 +1416,7 @@ void RenderGraph::flush()
 		}
 
 		// Flush
-		m_ctx->m_graphicsCmdbs[i]->flush();
+		m_ctx->m_graphicsCmdbs[i]->flush({}, (i == m_ctx->m_graphicsCmdbs.getSize() - 1) ? optionalFence : nullptr);
 	}
 }
 

+ 1 - 1
AnKi/Gr/RenderGraph.h

@@ -649,7 +649,7 @@ public:
 
 	/// @name 3rd step methods
 	/// @{
-	void flush();
+	void flush(FencePtr* optionalFence = nullptr);
 	/// @}
 
 	/// @name 4th step methods

+ 19 - 1
AnKi/Gr/Utils/SegregatedListsGpuMemoryPool.cpp

@@ -50,7 +50,7 @@ public:
 };
 
 void SegregatedListsGpuMemoryPool::init(BufferUsageBit gpuBufferUsage, ConstWeakArray<PtrSize> classUpperSizes, PtrSize initialGpuBufferSize,
-										CString bufferName, Bool allowCoWs)
+										CString bufferName, Bool allowCoWs, BufferMapAccessBit map)
 {
 	ANKI_ASSERT(!isInitialized());
 
@@ -75,6 +75,7 @@ void SegregatedListsGpuMemoryPool::init(BufferUsageBit gpuBufferUsage, ConstWeak
 	m_frame = 0;
 	m_allocatedSize = 0;
 	m_allowCoWs = allowCoWs;
+	m_mapAccess = map;
 }
 
 void SegregatedListsGpuMemoryPool::destroy()
@@ -86,6 +87,11 @@ void SegregatedListsGpuMemoryPool::destroy()
 
 	GrManager::getSingleton().finish();
 
+	if(m_mappedGpuBufferMemory)
+	{
+		m_gpuBuffer->unmap();
+	}
+
 	for(GrDynamicArray<SegregatedListsGpuMemoryPoolToken>& arr : m_garbage)
 	{
 		for(const SegregatedListsGpuMemoryPoolToken& token : arr)
@@ -114,8 +120,14 @@ Error SegregatedListsGpuMemoryPool::allocateChunk(Chunk*& newChunk, PtrSize& chu
 		BufferInitInfo buffInit(m_bufferName);
 		buffInit.m_size = m_initialBufferSize;
 		buffInit.m_usage = m_bufferUsage | BufferUsageBit::kAllTransfer;
+		buffInit.m_mapAccess = m_mapAccess;
 		m_gpuBuffer = GrManager::getSingleton().newBuffer(buffInit);
 
+		if(!!m_mapAccess)
+		{
+			m_mappedGpuBufferMemory = m_gpuBuffer->map(0, kMaxPtrSize, m_mapAccess);
+		}
+
 		newChunk = newInstance<Chunk>(GrMemoryPool::getSingleton());
 		newChunk->m_offsetInGpuBuffer = 0;
 	}
@@ -136,6 +148,7 @@ Error SegregatedListsGpuMemoryPool::allocateChunk(Chunk*& newChunk, PtrSize& chu
 		BufferInitInfo buffInit(m_bufferName);
 		buffInit.m_size = m_gpuBuffer->getSize() * 2;
 		buffInit.m_usage = m_bufferUsage | BufferUsageBit::kAllTransfer;
+		buffInit.m_mapAccess = m_mapAccess;
 		BufferPtr newBuffer = GrManager::getSingleton().newBuffer(buffInit);
 
 		// Do the copy
@@ -166,6 +179,11 @@ Error SegregatedListsGpuMemoryPool::allocateChunk(Chunk*& newChunk, PtrSize& chu
 
 		// Switch the buffers
 		m_gpuBuffer = newBuffer;
+
+		if(!!m_mapAccess)
+		{
+			m_mappedGpuBufferMemory = m_gpuBuffer->map(0, kMaxPtrSize, m_mapAccess);
+		}
 	}
 	else
 	{

+ 12 - 3
AnKi/Gr/Utils/SegregatedListsGpuMemoryPool.h

@@ -40,8 +40,8 @@ private:
 	PtrSize m_chunkOffset = kMaxPtrSize;
 };
 
-/// GPU memory allocator based on segregated lists. It allocates a GPU buffer with some initial size. If there is a need
-/// to grow it allocates a bigger buffer and copies contents of the old one to the new (CoW).
+/// GPU memory allocator based on segregated lists. It allocates a GPU buffer with some initial size. If there is a need to grow it allocates a bigger
+/// buffer and copies contents of the old one to the new (CoW).
 class SegregatedListsGpuMemoryPool
 {
 public:
@@ -57,7 +57,7 @@ public:
 	SegregatedListsGpuMemoryPool& operator=(const SegregatedListsGpuMemoryPool&) = delete;
 
 	void init(BufferUsageBit gpuBufferUsage, ConstWeakArray<PtrSize> classUpperSizes, PtrSize initialGpuBufferSize, CString bufferName,
-			  Bool allowCoWs);
+			  Bool allowCoWs, BufferMapAccessBit map = BufferMapAccessBit::kNone);
 
 	void destroy();
 
@@ -80,6 +80,12 @@ public:
 		return *m_gpuBuffer;
 	}
 
+	void* getGpuBufferMappedMemory() const
+	{
+		ANKI_ASSERT(m_mappedGpuBufferMemory);
+		return m_mappedGpuBufferMemory;
+	}
+
 	/// @note It's thread-safe.
 	void getStats(F32& externalFragmentation, PtrSize& userAllocatedSize, PtrSize& totalSize) const;
 
@@ -97,6 +103,7 @@ private:
 
 	Builder* m_builder = nullptr;
 	BufferPtr m_gpuBuffer;
+	void* m_mappedGpuBufferMemory = nullptr;
 	PtrSize m_allocatedSize = 0;
 
 	GrDynamicArray<Chunk*> m_deletedChunks;
@@ -105,6 +112,8 @@ private:
 	U8 m_frame = 0;
 	Bool m_allowCoWs = true;
 
+	BufferMapAccessBit m_mapAccess = BufferMapAccessBit::kNone;
+
 	Error allocateChunk(Chunk*& newChunk, PtrSize& chunkSize);
 	void deleteChunk(Chunk* chunk);
 

+ 3 - 0
AnKi/Renderer/HzbHelper.h

@@ -21,6 +21,9 @@ public:
 	void populateRenderGraph(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, RenderTargetHandle dstHzbRt, UVec2 dstHzbRtSize,
 							 RenderingContext& ctx);
 
+	void populateRenderGraphDirectionalLight(RenderTargetHandle srcDepthRt, UVec2 srcDepthRtSize, WeakArray<RenderTargetHandle> dstHzbRts,
+											 WeakArray<Mat4> viewProjectionMatrices, RenderingContext& ctx);
+
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramPtr m_grProg;

+ 55 - 0
AnKi/Renderer/Readback.h

@@ -0,0 +1,55 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Renderer/RendererObject.h>
+#include <AnKi/Core/GpuMemory/GpuReadbackMemoryPool.h>
+
+namespace anki {
+
+/// @addtogroup renderer
+/// @{
+
+class MultiframeReadback
+{
+public:
+	void* getMostRecentReadData(PtrSize* dataSize = nullptr) const
+	{
+	}
+
+	void freeMostRecentData()
+	{
+	}
+
+	void allocateData(PtrSize size, Buffer*& buffer, PtrSize bufferOffset)
+	{
+		if(m_fences[m_crntSlot].isCreated())
+		{
+			ANKI_R_LOGW("Allocation not freed. Will have to free it now");
+			GpuReadbackMemoryPool::getSingleton().deferredFree(m_allocations[m_crntSlot]);
+			m_fences[m_crntSlot].reset(nullptr);
+		}
+
+		m_crntSlot = (m_crntSlot + 1) % kMaxFramesInFlight;
+	}
+
+private:
+	Array<GpuReadbackMemoryAllocation, kMaxFramesInFlight> m_allocations;
+	Array<U64, kMaxFramesInFlight> m_frames;
+};
+
+class ReadbackManager : public MakeSingleton<ReadbackManager>
+{
+	template<typename>
+	friend class MakeSingleton;
+
+public:
+private:
+};
+
+/// @}
+
+} // end namespace anki

+ 7 - 2
AnKi/Scene/Visibility.cpp

@@ -404,6 +404,7 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 
 			const Plane& nearPlane = primaryFrustum.getViewPlanes()[FrustumPlaneType::kNear];
 			const F32 distanceFromCamera = max(0.0f, testPlane(nearPlane, aabb));
+			Bool updateTimestamp = false;
 
 			WeakArray<RenderableQueueElement> elements;
 			partemitc.setupRenderableQueueElements(RenderingTechnique::kGBuffer, elements);
@@ -411,6 +412,7 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 			{
 				el.m_distanceFromCamera = distanceFromCamera;
 				*result.m_renderables.newElement() = el;
+				updateTimestamp = true;
 			}
 
 			partemitc.setupRenderableQueueElements(RenderingTechnique::kForward, elements);
@@ -421,8 +423,11 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 			}
 
 			// Update timestamp
-			ANKI_ASSERT(comp.getTimestamp() > 0);
-			m_frcCtx->m_queueViews[taskId].m_timestamp = max(m_frcCtx->m_queueViews[taskId].m_timestamp, comp.getTimestamp());
+			if(updateTimestamp)
+			{
+				ANKI_ASSERT(comp.getTimestamp() > 0);
+				m_frcCtx->m_queueViews[taskId].m_timestamp = max(m_frcCtx->m_queueViews[taskId].m_timestamp, comp.getTimestamp());
+			}
 		}
 		else if(compClassId == LightComponent::getStaticClassId())
 		{