Browse Source

More work on the light/probe visibility

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
dd8b83a146

+ 32 - 0
AnKi/Renderer/Common.h

@@ -9,6 +9,7 @@
 #include <AnKi/Util/Ptr.h>
 #include <AnKi/Util/Ptr.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
+#include <AnKi/Scene/ContiguousArrayAllocator.h>
 
 
 namespace anki {
 namespace anki {
 
 
@@ -168,6 +169,37 @@ inline U32 chooseDirectionalLightShadowCascadeDetail(U32 cascade)
 {
 {
 	return (cascade <= 1) ? 0 : 1;
 	return (cascade <= 1) ? 0 : 1;
 }
 }
+
+inline GpuSceneContiguousArrayType gpuSceneNonRenderableObjectTypeToGpuSceneContiguousArrayType(GpuSceneNonRenderableObjectType type)
+{
+	GpuSceneContiguousArrayType out;
+	switch(type)
+	{
+	case GpuSceneNonRenderableObjectType::kPointLight:
+		out = GpuSceneContiguousArrayType::kPointLights;
+		break;
+	case GpuSceneNonRenderableObjectType::kSpotLight:
+		out = GpuSceneContiguousArrayType::kSpotLights;
+		break;
+	case GpuSceneNonRenderableObjectType::kDecal:
+		out = GpuSceneContiguousArrayType::kDecals;
+		break;
+	case GpuSceneNonRenderableObjectType::kFogDensityVolume:
+		out = GpuSceneContiguousArrayType::kFogDensityVolumes;
+		break;
+	case GpuSceneNonRenderableObjectType::kReflectionProbe:
+		out = GpuSceneContiguousArrayType::kReflectionProbes;
+		break;
+	case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
+		out = GpuSceneContiguousArrayType::kGlobalIlluminationProbes;
+		break;
+	default:
+		ANKI_ASSERT(1);
+		out = GpuSceneContiguousArrayType::kCount;
+	}
+
+	return out;
+}
 /// @}
 /// @}
 
 
 } // end namespace anki
 } // end namespace anki

+ 56 - 1
AnKi/Renderer/NonRenderableVisibility.cpp

@@ -4,11 +4,66 @@
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
 #include <AnKi/Renderer/NonRenderableVisibility.h>
 #include <AnKi/Renderer/NonRenderableVisibility.h>
+#include <AnKi/Renderer/Renderer.h>
+#include <AnKi/Scene/ContiguousArrayAllocator.h>
+#include <AnKi/Shaders/Include/GpuSceneFunctions.h>
 
 
 namespace anki {
 namespace anki {
 
 
-void NonRenderableVisibility::populateRenderGraph(RenderingContext& rgraph)
+void NonRenderableVisibility::populateRenderGraph(RenderingContext& ctx)
 {
 {
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+
+	m_runCtx = {};
+
+	for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
+	{
+		const GpuSceneContiguousArrayType arrayType = gpuSceneNonRenderableObjectTypeToGpuSceneContiguousArrayType(type);
+		const U32 objCount = GpuSceneContiguousArrays::getSingleton().getElementCount(arrayType);
+
+		if(objCount == 0)
+		{
+			continue;
+		}
+
+		GpuVisibilityNonRenderablesInput in;
+		in.m_passesName = "NonRenderableVisibility";
+		in.m_objectType = type;
+		in.m_viewProjectionMat = ctx.m_matrices.m_viewProjection;
+		in.m_hzbRt = nullptr; // TODO
+		in.m_rgraph = &rgraph;
+
+		const GpuSceneNonRenderableObjectTypeWithFeedback feedbackType = toGpuSceneNonRenderableObjectTypeWithFeedback(type);
+		if(feedbackType != GpuSceneNonRenderableObjectTypeWithFeedback::kCount)
+		{
+			// Read feedback UUIDs from the GPU
+			DynamicArray<U32, MemoryPoolPtrWrapper<StackMemoryPool>> readbackData(ctx.m_tempPool);
+			getRenderer().getReadbackManager().readMostRecentData(m_readbacks[feedbackType], readbackData);
+
+			if(readbackData.getSize())
+			{
+				ANKI_ASSERT(readbackData.getSize() > 1);
+				const U32 uuidCount = readbackData[0];
+
+				if(uuidCount)
+				{
+					m_runCtx.m_uuids[feedbackType] = WeakArray<U32>(&readbackData[1], readbackData[0]);
+
+					// Transfer ownership
+					WeakArray<U32> dummy;
+					readbackData.moveAndReset(dummy);
+				}
+			}
+
+			// Allocate feedback buffer for this frame
+			in.m_cpuFeedback.m_bufferRange = (objCount + 1) * sizeof(U32);
+			getRenderer().getReadbackManager().allocateData(m_readbacks[feedbackType], in.m_cpuFeedback.m_bufferRange, in.m_cpuFeedback.m_buffer,
+															in.m_cpuFeedback.m_bufferOffset);
+		}
+
+		GpuVisibilityNonRenderablesOutput out;
+		getRenderer().getGpuVisibilityNonRenderables().populateRenderGraph(in, out);
+	}
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 14 - 2
AnKi/Renderer/NonRenderableVisibility.h

@@ -8,19 +8,31 @@
 #include <AnKi/Gr.h>
 #include <AnKi/Gr.h>
 #include <AnKi/Renderer/RendererObject.h>
 #include <AnKi/Renderer/RendererObject.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
+#include <AnKi/Renderer/Utils/Readback.h>
 
 
 namespace anki {
 namespace anki {
 
 
 /// @addtogroup renderer
 /// @addtogroup renderer
 /// @{
 /// @{
 
 
-/// XXX
+/// Multiple passes for GPU visibility of non-renderable entities.
 class NonRenderableVisibility : public RendererObject
 class NonRenderableVisibility : public RendererObject
 {
 {
 public:
 public:
-	void populateRenderGraph(RenderingContext& rgraph);
+	void populateRenderGraph(RenderingContext& ctx);
 
 
 private:
 private:
+	Array<MultiframeReadbackToken, U32(GpuSceneNonRenderableObjectTypeWithFeedback::kCount)> m_readbacks;
+
+	class
+	{
+	public:
+		Array<Buffer*, U32(GpuSceneNonRenderableObjectType::kCount)> m_visOutBuffers = {};
+		Array<PtrSize, U32(GpuSceneNonRenderableObjectType::kCount)> m_visOutBufferOffsets = {};
+		Array<PtrSize, U32(GpuSceneNonRenderableObjectType::kCount)> m_visOutBufferRanges = {};
+
+		Array<WeakArray<U32>, U32(GpuSceneNonRenderableObjectTypeWithFeedback::kCount)> m_uuids;
+	} m_runCtx;
 };
 };
 /// @}
 /// @}
 
 

+ 1 - 1
AnKi/Renderer/Renderer.h

@@ -96,7 +96,7 @@ public:
 		return m_visibility;
 		return m_visibility;
 	}
 	}
 
 
-	const GpuVisibilityNonRenderables& getGpuVisibilityNonRenderables() const
+	GpuVisibilityNonRenderables& getGpuVisibilityNonRenderables()
 	{
 	{
 		return m_nonRenderablesVisibility;
 		return m_nonRenderablesVisibility;
 	}
 	}

+ 58 - 38
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -38,37 +38,6 @@ static GpuSceneContiguousArrayType techniqueToArrayType(RenderingTechnique techn
 	return arrayType;
 	return arrayType;
 }
 }
 
 
-static GpuSceneContiguousArrayType objectTypeToArrayType(GpuSceneNonRenderableObjectType type)
-{
-	GpuSceneContiguousArrayType out;
-	switch(type)
-	{
-	case GpuSceneNonRenderableObjectType::kPointLight:
-		out = GpuSceneContiguousArrayType::kPointLights;
-		break;
-	case GpuSceneNonRenderableObjectType::kSpotLight:
-		out = GpuSceneContiguousArrayType::kSpotLights;
-		break;
-	case GpuSceneNonRenderableObjectType::kDecal:
-		out = GpuSceneContiguousArrayType::kDecals;
-		break;
-	case GpuSceneNonRenderableObjectType::kFogDensityVolume:
-		out = GpuSceneContiguousArrayType::kFogDensityVolumes;
-		break;
-	case GpuSceneNonRenderableObjectType::kReflectionProbe:
-		out = GpuSceneContiguousArrayType::kReflectionProbes;
-		break;
-	case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
-		out = GpuSceneContiguousArrayType::kGlobalIlluminationProbes;
-		break;
-	default:
-		ANKI_ASSERT(1);
-		out = GpuSceneContiguousArrayType::kCount;
-	}
-
-	return out;
-}
-
 Error GpuVisibility::init()
 Error GpuVisibility::init()
 {
 {
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibility.ankiprogbin", m_prog));
 	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuVisibility.ankiprogbin", m_prog));
@@ -260,17 +229,44 @@ Error GpuVisibilityNonRenderables::init()
 				const ShaderProgramResourceVariant* variant;
 				const ShaderProgramResourceVariant* variant;
 				m_prog->getOrCreateVariant(variantInit, variant);
 				m_prog->getOrCreateVariant(variantInit, variant);
 
 
-				m_grProgs[hzb][type][cpuFeedback].reset(&variant->getProgram());
+				if(variant)
+				{
+					m_grProgs[hzb][type][cpuFeedback].reset(&variant->getProgram());
+				}
+				else
+				{
+					m_grProgs[hzb][type][cpuFeedback].reset(nullptr);
+				}
 			}
 			}
 		}
 		}
 	}
 	}
 
 
+	{
+		CommandBufferInitInfo cmdbInit("TmpClear");
+		cmdbInit.m_flags |= CommandBufferFlag::kSmallBatch;
+		CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
+
+		for(U32 i = 0; i < kMaxFeedbackRequestsPerFrame; ++i)
+		{
+			BufferInitInfo buffInit("GpuVisibilityNonRenderablesFeedbackCounters");
+			buffInit.m_size = 2 * sizeof(U32);
+			buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
+
+			m_counterBuffers[i] = GrManager::getSingleton().newBuffer(buffInit);
+
+			cmdb->fillBuffer(m_counterBuffers[i].get(), 0, kMaxPtrSize, 0);
+		}
+
+		cmdb->flush();
+		GrManager::getSingleton().finish();
+	}
+
 	return Error::kNone;
 	return Error::kNone;
 }
 }
 
 
-void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out) const
+void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
 {
 {
-	const GpuSceneContiguousArrayType arrayType = objectTypeToArrayType(in.m_objectType);
+	const GpuSceneContiguousArrayType arrayType = gpuSceneNonRenderableObjectTypeToGpuSceneContiguousArrayType(in.m_objectType);
 	const U32 objCount = GpuSceneContiguousArrays::getSingleton().getElementCount(arrayType);
 	const U32 objCount = GpuSceneContiguousArrays::getSingleton().getElementCount(arrayType);
 
 
 	if(objCount == 0)
 	if(objCount == 0)
@@ -280,7 +276,21 @@ void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderable
 
 
 	if(in.m_cpuFeedback.m_buffer)
 	if(in.m_cpuFeedback.m_buffer)
 	{
 	{
-		ANKI_ASSERT(in.m_cpuFeedback.m_bufferRange == sizeof(U32) * 2 * objCount + 1);
+		ANKI_ASSERT(in.m_cpuFeedback.m_bufferRange == sizeof(U32) * (objCount + 1));
+	}
+
+	// Find the counter buffer required for feedback
+	U32 counterBufferIdx = kMaxU32;
+	if(in.m_cpuFeedback.m_buffer)
+	{
+		if(m_lastFrameIdx != getRenderer().getFrameCount())
+		{
+			m_lastFrameIdx = getRenderer().getFrameCount();
+			m_feedbackRequestCountThisFrame = 0;
+		}
+
+		counterBufferIdx = m_feedbackRequestCountThisFrame++;
+		m_counterIdx[counterBufferIdx] = (m_counterIdx[counterBufferIdx] + 1) & 1;
 	}
 	}
 
 
 	// Allocate memory for the result
 	// Allocate memory for the result
@@ -310,13 +320,15 @@ void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderable
 
 
 	pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedback.m_buffer, feedbackBufferOffset = in.m_cpuFeedback.m_bufferOffset,
 	pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedback.m_buffer, feedbackBufferOffset = in.m_cpuFeedback.m_bufferOffset,
 				  feedbackBufferRange = in.m_cpuFeedback.m_bufferRange, viewProjectionMat = in.m_viewProjectionMat,
 				  feedbackBufferRange = in.m_cpuFeedback.m_bufferRange, viewProjectionMat = in.m_viewProjectionMat,
-				  visibleIndicesBuffHandle = out.m_bufferHandle](RenderPassWorkContext& rgraph) {
+				  visibleIndicesBuffHandle = out.m_bufferHandle, counterBufferIdx,
+				  counterIdx = m_counterIdx[counterBufferIdx]](RenderPassWorkContext& rgraph) {
 		CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 		CommandBuffer& cmdb = *rgraph.m_commandBuffer;
-		const GpuSceneContiguousArrayType arrayType = objectTypeToArrayType(objType);
+		const GpuSceneContiguousArrayType arrayType = gpuSceneNonRenderableObjectTypeToGpuSceneContiguousArrayType(objType);
 		const U32 objCount = GpuSceneContiguousArrays::getSingleton().getElementCount(arrayType);
 		const U32 objCount = GpuSceneContiguousArrays::getSingleton().getElementCount(arrayType);
 		const GpuSceneContiguousArrays& cArrays = GpuSceneContiguousArrays::getSingleton();
 		const GpuSceneContiguousArrays& cArrays = GpuSceneContiguousArrays::getSingleton();
+		const Bool needsFeedback = feedbackBuffer != nullptr;
 
 
-		cmdb.bindShaderProgram(m_grProgs[0][objType][0].get());
+		cmdb.bindShaderProgram(m_grProgs[0][objType][needsFeedback].get());
 
 
 		cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(), cArrays.getArrayBase(arrayType),
 		cmdb.bindStorageBuffer(0, 0, &GpuSceneBuffer::getSingleton().getBuffer(), cArrays.getArrayBase(arrayType),
 							   cArrays.getElementCount(GpuSceneContiguousArrayType::kRenderables),
 							   cArrays.getElementCount(GpuSceneContiguousArrayType::kRenderables),
@@ -331,8 +343,16 @@ void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderable
 			unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
 			unis->m_clipPlanes[i] = Vec4(planes[i].getNormal().xyz(), planes[i].getOffset());
 		}
 		}
 
 
+		unis->m_feedbackCounterIdx = counterIdx;
+
 		rgraph.bindStorageBuffer(0, 2, visibleIndicesBuffHandle);
 		rgraph.bindStorageBuffer(0, 2, visibleIndicesBuffHandle);
 
 
+		if(needsFeedback)
+		{
+			cmdb.bindStorageBuffer(0, 3, feedbackBuffer, feedbackBufferOffset, feedbackBufferRange);
+			cmdb.bindStorageBuffer(0, 4, m_counterBuffers[counterBufferIdx].get(), 0, kMaxPtrSize);
+		}
+
 		dispatchPPCompute(cmdb, 64, 1, objCount, 1);
 		dispatchPPCompute(cmdb, 64, 1, objCount, 1);
 	});
 	});
 }
 }

+ 19 - 12
AnKi/Renderer/Utils/GpuVisibility.h

@@ -53,17 +53,6 @@ private:
 #endif
 #endif
 };
 };
 
 
-/// @memberof GpuVisibilityNonRenderables
-class GpuVisibilityNonRenderablesOutput
-{
-public:
-	BufferHandle m_bufferHandle; ///< Some buffer handle to be used for tracking. No need to track all buffers.
-
-	Buffer* m_visibleIndicesBuffer = nullptr;
-	PtrSize m_visibleIndicesBufferOffset = 0;
-	PtrSize m_visibleIndicesBufferRange = 0;
-};
-
 /// @memberof GpuVisibilityNonRenderables
 /// @memberof GpuVisibilityNonRenderables
 class GpuVisibilityNonRenderablesInput
 class GpuVisibilityNonRenderablesInput
 {
 {
@@ -83,17 +72,35 @@ public:
 	} m_cpuFeedback;
 	} m_cpuFeedback;
 };
 };
 
 
+/// @memberof GpuVisibilityNonRenderables
+class GpuVisibilityNonRenderablesOutput
+{
+public:
+	BufferHandle m_bufferHandle; ///< Some buffer handle to be used for tracking. No need to track all buffers.
+
+	Buffer* m_visibleIndicesBuffer = nullptr;
+	PtrSize m_visibleIndicesBufferOffset = 0;
+	PtrSize m_visibleIndicesBufferRange = 0;
+};
+
 /// GPU visibility of lights, probes etc.
 /// GPU visibility of lights, probes etc.
 class GpuVisibilityNonRenderables : public RendererObject
 class GpuVisibilityNonRenderables : public RendererObject
 {
 {
 public:
 public:
 	Error init();
 	Error init();
 
 
-	void populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out) const;
+	void populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out);
 
 
 private:
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;
 	Array3d<ShaderProgramPtr, 2, U32(GpuSceneNonRenderableObjectType::kCount), 2> m_grProgs;
 	Array3d<ShaderProgramPtr, 2, U32(GpuSceneNonRenderableObjectType::kCount), 2> m_grProgs;
+
+	static constexpr U32 kMaxFeedbackRequestsPerFrame = 6;
+
+	Array<BufferPtr, kMaxFeedbackRequestsPerFrame> m_counterBuffers; ///< A buffer containing multiple counters for atomic operations.
+	Array<U8, kMaxFeedbackRequestsPerFrame> m_counterIdx = {};
+	U64 m_lastFrameIdx = kMaxU64;
+	U32 m_feedbackRequestCountThisFrame = 0;
 };
 };
 /// @}
 /// @}
 
 

+ 23 - 17
AnKi/Renderer/Utils/Readback.cpp

@@ -7,35 +7,34 @@
 
 
 namespace anki {
 namespace anki {
 
 
-void ReadbackManager::allocateData(MultiframeReadbackToken& token, PtrSize size, Buffer*& buffer, PtrSize& bufferOffset)
+void ReadbackManager::allocateData(MultiframeReadbackToken& token, PtrSize size, Buffer*& buffer, PtrSize& bufferOffset) const
 {
 {
 	for([[maybe_unused]] U64 frame : token.m_frameIds)
 	for([[maybe_unused]] U64 frame : token.m_frameIds)
 	{
 	{
 		ANKI_ASSERT(frame != m_frameId && "Can't allocate multiple times in a frame");
 		ANKI_ASSERT(frame != m_frameId && "Can't allocate multiple times in a frame");
 	}
 	}
 
 
-	if(token.m_allocations[token.m_slot].isValid()) [[unlikely]]
+	GpuReadbackMemoryAllocation& allocation = token.m_allocations[token.m_slot];
+
+	if(allocation.isValid() && allocation.getAllocatedSize() != size)
 	{
 	{
-		ANKI_R_LOGW("Allocation hasn't been released. Haven't called getMostRecentReadDataAndRelease");
-		GpuReadbackMemoryPool::getSingleton().deferredFree(token.m_allocations[token.m_slot]);
+		GpuReadbackMemoryPool::getSingleton().deferredFree(allocation);
 	}
 	}
 
 
-	ANKI_ASSERT(!token.m_allocations[token.m_slot].isValid());
-
-	token.m_allocations[token.m_slot] = GpuReadbackMemoryPool::getSingleton().allocate(size);
+	if(!allocation.isValid())
+	{
+		allocation = GpuReadbackMemoryPool::getSingleton().allocate(size);
+	}
 	token.m_frameIds[token.m_slot] = m_frameId;
 	token.m_frameIds[token.m_slot] = m_frameId;
 
 
-	buffer = &token.m_allocations[token.m_slot].getBuffer();
-	bufferOffset = token.m_allocations[token.m_slot].getOffset();
+	buffer = &allocation.getBuffer();
+	bufferOffset = allocation.getOffset();
 
 
 	token.m_slot = (token.m_slot + 1) % kMaxFramesInFlight;
 	token.m_slot = (token.m_slot + 1) % kMaxFramesInFlight;
 }
 }
 
 
-void ReadbackManager::getMostRecentReadDataAndRelease(MultiframeReadbackToken& token, void* data, PtrSize dataSize, PtrSize& dataOut)
+U32 ReadbackManager::findBestSlot(const MultiframeReadbackToken& token) const
 {
 {
-	ANKI_ASSERT(data && dataSize > 0);
-	dataOut = 0;
-
 	const U64 earliestFrame = m_frameId - (kMaxFramesInFlight - 1);
 	const U64 earliestFrame = m_frameId - (kMaxFramesInFlight - 1);
 	U32 bestSlot = kMaxU32;
 	U32 bestSlot = kMaxU32;
 	U32 secondBestSlot = kMaxU32;
 	U32 secondBestSlot = kMaxU32;
@@ -54,17 +53,24 @@ void ReadbackManager::getMostRecentReadDataAndRelease(MultiframeReadbackToken& t
 	}
 	}
 
 
 	const U32 slot = (bestSlot != kMaxU32) ? bestSlot : secondBestSlot;
 	const U32 slot = (bestSlot != kMaxU32) ? bestSlot : secondBestSlot;
+	return slot;
+}
+
+void ReadbackManager::readMostRecentData(const MultiframeReadbackToken& token, void* data, PtrSize dataSize, PtrSize& dataOut) const
+{
+	ANKI_ASSERT(data && dataSize > 0);
+	dataOut = 0;
+
+	const U32 slot = findBestSlot(token);
 	if(slot == kMaxU32)
 	if(slot == kMaxU32)
 	{
 	{
 		return;
 		return;
 	}
 	}
 
 
-	GpuReadbackMemoryAllocation& allocation = token.m_allocations[slot];
+	const GpuReadbackMemoryAllocation& allocation = token.m_allocations[slot];
 	dataOut = min(dataSize, PtrSize(allocation.getAllocatedSize()));
 	dataOut = min(dataSize, PtrSize(allocation.getAllocatedSize()));
 
 
-	memcpy(data, static_cast<const U8*>(allocation.getMappedMemory()) + allocation.getOffset(), min(dataOut, dataOut));
-
-	GpuReadbackMemoryPool::getSingleton().deferredFree(allocation);
+	memcpy(data, static_cast<const U8*>(allocation.getMappedMemory()) + allocation.getOffset(), dataOut);
 }
 }
 
 
 void ReadbackManager::endFrame(Fence* fence)
 void ReadbackManager::endFrame(Fence* fence)

+ 24 - 14
AnKi/Renderer/Utils/Readback.h

@@ -13,7 +13,7 @@ namespace anki {
 /// @addtogroup renderer
 /// @addtogroup renderer
 /// @{
 /// @{
 
 
-/// TODO
+/// A persistent GPU readback token. It's essentially a group of allocations.
 class MultiframeReadbackToken
 class MultiframeReadbackToken
 {
 {
 	friend class ReadbackManager;
 	friend class ReadbackManager;
@@ -24,25 +24,33 @@ private:
 	U32 m_slot = 0;
 	U32 m_slot = 0;
 };
 };
 
 
-/// TODO
+/// A small class that is used to streamling the use of GPU readbacks.
 class ReadbackManager
 class ReadbackManager
 {
 {
-	template<typename>
-	friend class MakeSingleton;
-
 public:
 public:
-	/// @note Not thread-safe
-	void allocateData(MultiframeReadbackToken& token, PtrSize size, Buffer*& buffer, PtrSize& bufferOffset);
+	void allocateData(MultiframeReadbackToken& token, PtrSize size, Buffer*& buffer, PtrSize& bufferOffset) const;
+
+	/// Read the most up to date data from the GPU.
+	void readMostRecentData(const MultiframeReadbackToken& token, void* data, PtrSize dataSize, PtrSize& dataOut) const;
 
 
-	/// XXX
-	/// @note Not thread-safe
-	void getMostRecentReadDataAndRelease(MultiframeReadbackToken& token, void* data, PtrSize dataSize, PtrSize& dataOut);
+	/// Read the most up to date data from the GPU.
+	template<typename T, typename TMemPool>
+	void readMostRecentData(const MultiframeReadbackToken& token, DynamicArray<T, TMemPool>& data) const
+	{
+		const U32 slot = findBestSlot(token);
+		if(slot != kMaxU32 && token.m_allocations[slot].isValid())
+		{
+			const GpuReadbackMemoryAllocation& allocation = token.m_allocations[slot];
 
 
-	/// @note Not thread-safe
-	template<typename TMemPool>
-	void getMostRecentReadDataAndRelease(MultiframeReadbackToken& token, DynamicArray<U8, TMemPool>& data);
+			data.resize(allocation.getAllocatedSize() / sizeof(T));
+			memcpy(&data[0], static_cast<const U8*>(allocation.getMappedMemory()) + allocation.getOffset(), allocation.getAllocatedSize());
+		}
+		else
+		{
+			data.resize(0);
+		}
+	}
 
 
-	/// @note Not thread-safe
 	void endFrame(Fence* fence);
 	void endFrame(Fence* fence);
 
 
 private:
 private:
@@ -54,6 +62,8 @@ private:
 
 
 	Array<Frame, kMaxFramesInFlight> m_frames;
 	Array<Frame, kMaxFramesInFlight> m_frames;
 	U64 m_frameId = kMaxFramesInFlight;
 	U64 m_frameId = kMaxFramesInFlight;
+
+	U32 findBestSlot(const MultiframeReadbackToken& token) const;
 };
 };
 /// @}
 /// @}
 
 

+ 27 - 4
AnKi/Scene/Components/LightComponent.cpp

@@ -17,12 +17,10 @@
 namespace anki {
 namespace anki {
 
 
 LightComponent::LightComponent(SceneNode* node)
 LightComponent::LightComponent(SceneNode* node)
-	: SceneComponent(node, getStaticClassId())
-	, m_uuid(SceneGraph::getSingleton().getNewUuid())
+	: QueryableSceneComponent<LightComponent>(node, getStaticClassId())
 	, m_spatial(this)
 	, m_spatial(this)
 	, m_type(LightComponentType::kPoint)
 	, m_type(LightComponentType::kPoint)
 {
 {
-	ANKI_ASSERT(m_uuid > 0);
 	m_point.m_radius = 1.0f;
 	m_point.m_radius = 1.0f;
 
 
 	setLightComponentType(LightComponentType::kPoint);
 	setLightComponentType(LightComponentType::kPoint);
@@ -67,6 +65,11 @@ void LightComponent::setLightComponentType(LightComponentType type)
 	}
 	}
 
 
 	m_type = type;
 	m_type = type;
+
+	if(type != LightComponentType::kDirectional)
+	{
+		refreshUuid();
+	}
 }
 }
 
 
 Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
@@ -120,6 +123,15 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 			}
 			}
 		}
 		}
 
 
+		if(m_shadow && shapeUpdated)
+		{
+			refreshUuid();
+		}
+		else if(!m_shadow)
+		{
+			releaseUuid();
+		}
+
 		// Upload to the GPU scene
 		// Upload to the GPU scene
 		GpuScenePointLight gpuLight;
 		GpuScenePointLight gpuLight;
 		gpuLight.m_position = m_worldTransform.getOrigin().xyz();
 		gpuLight.m_position = m_worldTransform.getOrigin().xyz();
@@ -127,6 +139,7 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		gpuLight.m_diffuseColor = m_diffColor.xyz();
 		gpuLight.m_diffuseColor = m_diffColor.xyz();
 		gpuLight.m_squareRadiusOverOne = 1.0f / (m_point.m_radius * m_point.m_radius);
 		gpuLight.m_squareRadiusOverOne = 1.0f / (m_point.m_radius * m_point.m_radius);
 		gpuLight.m_shadow = m_shadow;
 		gpuLight.m_shadow = m_shadow;
+		gpuLight.m_uuid = (m_shadow) ? getUuid() : 0;
 		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneLightIndex.getOffsetInGpuScene(), gpuLight);
 		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneLightIndex.getOffsetInGpuScene(), gpuLight);
 	}
 	}
 	else if(updated && m_type == LightComponentType::kSpot)
 	else if(updated && m_type == LightComponentType::kSpot)
@@ -177,6 +190,15 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 			}
 			}
 		}
 		}
 
 
+		if(m_shadow && shapeUpdated)
+		{
+			refreshUuid();
+		}
+		else if(!m_shadow)
+		{
+			releaseUuid();
+		}
+
 		// Upload to the GPU scene
 		// Upload to the GPU scene
 		GpuSceneSpotLight gpuLight;
 		GpuSceneSpotLight gpuLight;
 		gpuLight.m_position = m_worldTransform.getOrigin().xyz();
 		gpuLight.m_position = m_worldTransform.getOrigin().xyz();
@@ -191,6 +213,7 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		gpuLight.m_shadow = m_shadow;
 		gpuLight.m_shadow = m_shadow;
 		gpuLight.m_outerCos = cos(m_spot.m_outerAngle / 2.0f);
 		gpuLight.m_outerCos = cos(m_spot.m_outerAngle / 2.0f);
 		gpuLight.m_innerCos = cos(m_spot.m_innerAngle / 2.0f);
 		gpuLight.m_innerCos = cos(m_spot.m_innerAngle / 2.0f);
+		gpuLight.m_uuid = (m_shadow) ? getUuid() : 0;
 		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneLightIndex.getOffsetInGpuScene(), gpuLight);
 		GpuSceneMicroPatcher::getSingleton().newCopy(*info.m_framePool, m_gpuSceneLightIndex.getOffsetInGpuScene(), gpuLight);
 	}
 	}
 	else if(m_type == LightComponentType::kDirectional)
 	else if(m_type == LightComponentType::kDirectional)
@@ -222,7 +245,7 @@ void LightComponent::setupDirectionalLightQueueElement(const Frustum& primaryFru
 
 
 	const U32 shadowCascadeCount = cascadeFrustums.getSize();
 	const U32 shadowCascadeCount = cascadeFrustums.getSize();
 
 
-	el.m_uuid = m_uuid;
+	el.m_uuid = hasUuid() ? getUuid() : 0;
 	el.m_diffuseColor = m_diffColor.xyz();
 	el.m_diffuseColor = m_diffColor.xyz();
 	el.m_direction = -m_worldTransform.getRotation().getZAxis().xyz();
 	el.m_direction = -m_worldTransform.getRotation().getZAxis().xyz();
 	for(U32 i = 0; i < shadowCascadeCount; ++i)
 	for(U32 i = 0; i < shadowCascadeCount; ++i)

+ 3 - 4
AnKi/Scene/Components/LightComponent.h

@@ -29,7 +29,7 @@ enum class LightComponentType : U8
 };
 };
 
 
 /// Light component. Contains all the info of lights.
 /// Light component. Contains all the info of lights.
-class LightComponent : public SceneComponent
+class LightComponent : public QueryableSceneComponent<LightComponent>
 {
 {
 	ANKI_SCENE_COMPONENT(LightComponent)
 	ANKI_SCENE_COMPONENT(LightComponent)
 
 
@@ -119,7 +119,7 @@ public:
 	void setupPointLightQueueElement(PointLightQueueElement& el) const
 	void setupPointLightQueueElement(PointLightQueueElement& el) const
 	{
 	{
 		ANKI_ASSERT(m_type == LightComponentType::kPoint);
 		ANKI_ASSERT(m_type == LightComponentType::kPoint);
-		el.m_uuid = m_uuid;
+		el.m_uuid = getUuid();
 		el.m_worldPosition = m_worldTransform.getOrigin().xyz();
 		el.m_worldPosition = m_worldTransform.getOrigin().xyz();
 		el.m_radius = m_point.m_radius;
 		el.m_radius = m_point.m_radius;
 		el.m_diffuseColor = m_diffColor.xyz();
 		el.m_diffuseColor = m_diffColor.xyz();
@@ -130,7 +130,7 @@ public:
 	void setupSpotLightQueueElement(SpotLightQueueElement& el) const
 	void setupSpotLightQueueElement(SpotLightQueueElement& el) const
 	{
 	{
 		ANKI_ASSERT(m_type == LightComponentType::kSpot);
 		ANKI_ASSERT(m_type == LightComponentType::kSpot);
-		el.m_uuid = m_uuid;
+		el.m_uuid = getUuid();
 		el.m_worldTransform = Mat4(m_worldTransform);
 		el.m_worldTransform = Mat4(m_worldTransform);
 		el.m_textureMatrix = m_spot.m_textureMat;
 		el.m_textureMatrix = m_spot.m_textureMat;
 		el.m_distance = m_spot.m_distance;
 		el.m_distance = m_spot.m_distance;
@@ -149,7 +149,6 @@ public:
 	void setupDirectionalLightQueueElement(const Frustum& cameraFrustum, DirectionalLightQueueElement& el, WeakArray<Frustum> cascadeFrustums) const;
 	void setupDirectionalLightQueueElement(const Frustum& cameraFrustum, DirectionalLightQueueElement& el, WeakArray<Frustum> cascadeFrustums) const;
 
 
 private:
 private:
-	U64 m_uuid;
 	Vec4 m_diffColor = Vec4(0.5f);
 	Vec4 m_diffColor = Vec4(0.5f);
 	Transform m_worldTransform = Transform::getIdentity();
 	Transform m_worldTransform = Transform::getIdentity();
 
 

+ 72 - 0
AnKi/Scene/Components/SceneComponent.h

@@ -189,6 +189,78 @@ private:
 	Timestamp m_timestamp = 1; ///< Indicates when an update happened
 	Timestamp m_timestamp = 1; ///< Indicates when an update happened
 	U8 m_classId; ///< Cache the type ID.
 	U8 m_classId; ///< Cache the type ID.
 };
 };
+
+/// Scene component that has a UUID and a static method that can be used to fetch the component by using the UUID.
+template<typename T>
+class QueryableSceneComponent : public SceneComponent
+{
+public:
+	QueryableSceneComponent(SceneNode* node, U8 classId)
+		: SceneComponent(node, classId)
+	{
+	}
+
+	~QueryableSceneComponent()
+	{
+		releaseUuid();
+	}
+
+	U32 getUuid() const
+	{
+		ANKI_ASSERT(m_uuid);
+		return m_uuid;
+	}
+
+	static T* tryFindComponent(U32 uuid)
+	{
+		auto it = m_uuidToSceneComponent.find(uuid);
+		return (it != m_uuidToSceneComponent.getEnd()) ? *it : nullptr;
+	}
+
+protected:
+	/// @note Not thread-safe.
+	void refreshUuid()
+	{
+		const U32 oldUuid = m_uuid;
+		m_uuid = SceneGraph::getSingleton().getNewUuid();
+
+		LockGuard lock(m_uuidToSceneComponentLock);
+		if(oldUuid != 0)
+		{
+			auto it = m_uuidToSceneComponent.find(oldUuid);
+			ANKI_ASSERT(it != m_uuidToSceneComponent.getEnd());
+			m_uuidToSceneComponent.erase(it);
+		}
+
+		ANKI_ASSERT(m_uuidToSceneComponent.find(m_uuid) == m_uuidToSceneComponent.getEnd());
+		m_uuidToSceneComponent.emplace(m_uuid, static_cast<T*>(this));
+	}
+
+	/// @note Not thread-safe.
+	void releaseUuid()
+	{
+		if(m_uuid != 0)
+		{
+			LockGuard lock(m_uuidToSceneComponentLock);
+			auto it = m_uuidToSceneComponent.find(m_uuid);
+			ANKI_ASSERT(it != m_uuidToSceneComponent.getEnd());
+			m_uuidToSceneComponent.erase(it);
+
+			m_uuid = 0;
+		}
+	}
+
+	Bool hasUuid() const
+	{
+		return m_uuid != 0;
+	}
+
+private:
+	U32 m_uuid = 0;
+
+	inline static SceneHashMap<U32, T*> m_uuidToSceneComponent;
+	inline static SpinLock m_uuidToSceneComponentLock;
+};
 /// @}
 /// @}
 
 
 } // end namespace anki
 } // end namespace anki

+ 2 - 2
AnKi/Scene/SceneGraph.h

@@ -127,7 +127,7 @@ public:
 
 
 	/// Get a unique UUID.
 	/// Get a unique UUID.
 	/// @note It's thread-safe.
 	/// @note It's thread-safe.
-	U64 getNewUuid()
+	U32 getNewUuid()
 	{
 	{
 		return m_nodesUuid.fetchAdd(1);
 		return m_nodesUuid.fetchAdd(1);
 	}
 	}
@@ -169,7 +169,7 @@ private:
 
 
 	Atomic<U32> m_objectsMarkedForDeletionCount = {0};
 	Atomic<U32> m_objectsMarkedForDeletionCount = {0};
 
 
-	Atomic<U64> m_nodesUuid = {1};
+	Atomic<U32> m_nodesUuid = {1};
 
 
 	SceneGraph();
 	SceneGraph();
 
 

+ 1 - 1
AnKi/Scene/SceneNode.h

@@ -378,7 +378,7 @@ public:
 	}
 	}
 
 
 private:
 private:
-	U64 m_uuid;
+	U32 m_uuid;
 	SceneString m_name; ///< A unique name.
 	SceneString m_name; ///< A unique name.
 
 
 	GrDynamicArray<SceneComponent*> m_components;
 	GrDynamicArray<SceneComponent*> m_components;

+ 18 - 14
AnKi/Shaders/GpuVisibilityNonRenderables.ankiprog

@@ -32,12 +32,15 @@ typedef GpuSceneGlobalIlluminationProbe ObjectType;
 #endif
 #endif
 
 
 [[vk::binding(0)]] StructuredBuffer<ObjectType> g_objects;
 [[vk::binding(0)]] StructuredBuffer<ObjectType> g_objects;
-[[vk::binding(1)]] ConstantBuffer<GpuVisibilityUniforms> g_unis;
+[[vk::binding(1)]] ConstantBuffer<GpuVisibilityNonRenderableUniforms> g_unis;
 [[vk::binding(2)]] RWStructuredBuffer<U32> g_visibleIndices; // 1st element is the count. What follows is indices
 [[vk::binding(2)]] RWStructuredBuffer<U32> g_visibleIndices; // 1st element is the count. What follows is indices
 
 
 #if CPU_FEEDBACK
 #if CPU_FEEDBACK
-// 1st element is a count. What follows is a pair of object index and object UUID.
+// 1st element is a count. What follows is an array of UUIDs.
 [[vk::binding(3)]] RWStructuredBuffer<U32> g_cpuFeedbackBuffer;
 [[vk::binding(3)]] RWStructuredBuffer<U32> g_cpuFeedbackBuffer;
+
+// Contains 2 U32s. One that it gets cleared and another that will be incremented.
+[[vk::binding(4)]] RWStructuredBuffer<U32> g_counterBuffer;
 #endif
 #endif
 
 
 Vec4 getSphere(GpuScenePointLight l)
 Vec4 getSphere(GpuScenePointLight l)
@@ -88,6 +91,14 @@ Vec4 getSphere(GpuSceneGlobalIlluminationProbe l)
 	U32 objectCount, unused;
 	U32 objectCount, unused;
 	g_objects.GetDimensions(objectCount, unused);
 	g_objects.GetDimensions(objectCount, unused);
 
 
+#if CPU_FEEDBACK
+	// Zero feedback counters of the next frame
+	if(svDispatchThreadId.x == 0)
+	{
+		g_counterBuffer[!g_unis.m_feedbackCounterIdx] = 0u;
+	}
+#endif
+
 	if(svDispatchThreadId.x >= objectCount)
 	if(svDispatchThreadId.x >= objectCount)
 	{
 	{
 		return;
 		return;
@@ -110,21 +121,14 @@ Vec4 getSphere(GpuSceneGlobalIlluminationProbe l)
 
 
 	// Give feedback to the CPU
 	// Give feedback to the CPU
 	//
 	//
-#if CPU_FEEDBACK \
-	&& (OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_POINT_LIGHT || OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_SPOT_LIGHT \
-		|| OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE \
-		|| OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE)
-#	if(OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_POINT_LIGHT || OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_SPOT_LIGHT)
-	if(obj.m_shadow)
-#	endif
+#if CPU_FEEDBACK
+	if(obj.m_uuid != 0)
 	{
 	{
 		U32 count;
 		U32 count;
-		InterlockedAdd(g_cpuFeedbackBuffer[0], 1, count);
-
-		count *= 2;
-		++count;
+		InterlockedAdd(g_counterBuffer[g_unis.m_feedbackCounterIdx], 1, count);
 
 
-		g_cpuFeedbackBuffer[count] = svDispatchThreadId.x;
+		U32 dummy;
+		InterlockedExchange(g_cpuFeedbackBuffer[0], count, dummy);
 		g_cpuFeedbackBuffer[count + 1] = obj.m_uuid;
 		g_cpuFeedbackBuffer[count + 1] = obj.m_uuid;
 	}
 	}
 #endif
 #endif

+ 23 - 0
AnKi/Shaders/Include/GpuSceneFunctions.h

@@ -50,4 +50,27 @@ inline GpuSceneRenderableAabb initGpuSceneRenderableAabb(Vec3 aabbMin, Vec3 aabb
 	return gpuVolume;
 	return gpuVolume;
 }
 }
 
 
+inline GpuSceneNonRenderableObjectTypeWithFeedback toGpuSceneNonRenderableObjectTypeWithFeedback(GpuSceneNonRenderableObjectType type)
+{
+	GpuSceneNonRenderableObjectTypeWithFeedback ret;
+	switch(type)
+	{
+	case GpuSceneNonRenderableObjectType::kPointLight:
+		ret = GpuSceneNonRenderableObjectTypeWithFeedback::kPointLight;
+		break;
+	case GpuSceneNonRenderableObjectType::kSpotLight:
+		ret = GpuSceneNonRenderableObjectTypeWithFeedback::kSpotLight;
+		break;
+	case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
+		ret = GpuSceneNonRenderableObjectTypeWithFeedback::kGlobalIlluminationProbe;
+		break;
+	case GpuSceneNonRenderableObjectType::kReflectionProbe:
+		ret = GpuSceneNonRenderableObjectTypeWithFeedback::kReflectionProbe;
+		break;
+	default:
+		ret = GpuSceneNonRenderableObjectTypeWithFeedback::kCount;
+	}
+	return ret;
+}
+
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE

+ 13 - 0
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -190,4 +190,17 @@ enum class GpuSceneNonRenderableObjectTypeBit : U32
 };
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(GpuSceneNonRenderableObjectTypeBit)
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(GpuSceneNonRenderableObjectTypeBit)
 
 
+/// Non-renderable types that require GPU to CPU feedback.
+enum class GpuSceneNonRenderableObjectTypeWithFeedback : U32
+{
+	kPointLight,
+	kSpotLight,
+	kReflectionProbe,
+	kGlobalIlluminationProbe,
+
+	kCount,
+	kFirst = 0
+};
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(GpuSceneNonRenderableObjectTypeWithFeedback)
+
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE

+ 9 - 2
AnKi/Shaders/Include/GpuVisibilityTypes.h

@@ -13,13 +13,15 @@ struct GpuVisibilityUniforms
 {
 {
 	Vec4 m_clipPlanes[6u];
 	Vec4 m_clipPlanes[6u];
 
 
-	UVec3 m_padding1;
 	U32 m_aabbCount;
 	U32 m_aabbCount;
+	U32 m_padding0;
+	U32 m_padding1;
+	U32 m_padding2;
 
 
 	Vec4 m_maxLodDistances;
 	Vec4 m_maxLodDistances;
 
 
 	Vec3 m_lodReferencePoint;
 	Vec3 m_lodReferencePoint;
-	F32 m_padding2;
+	F32 m_padding3;
 
 
 	Mat4 m_viewProjectionMat;
 	Mat4 m_viewProjectionMat;
 };
 };
@@ -27,6 +29,11 @@ struct GpuVisibilityUniforms
 struct GpuVisibilityNonRenderableUniforms
 struct GpuVisibilityNonRenderableUniforms
 {
 {
 	Vec4 m_clipPlanes[6u];
 	Vec4 m_clipPlanes[6u];
+
+	U32 m_feedbackCounterIdx;
+	U32 m_padding0;
+	U32 m_padding1;
+	U32 m_padding2;
 };
 };
 
 
 struct PointLightRendererCacheEntry
 struct PointLightRendererCacheEntry

+ 2 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -169,3 +169,5 @@ T WaveActiveMax(T value);
 bool WaveIsFirstLane();
 bool WaveIsFirstLane();
 
 
 unsigned WaveActiveCountBits(bool bit);
 unsigned WaveActiveCountBits(bool bit);
+
+unsigned WaveGetLaneCount();