Browse Source

Minor improvements

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
03216a37e8
2 changed files with 48 additions and 34 deletions
  1. 44 31
      AnKi/Renderer/Utils/GpuVisibility.cpp
  2. 4 3
      AnKi/Renderer/Utils/GpuVisibility.h

+ 44 - 31
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -235,34 +235,13 @@ Error GpuVisibilityNonRenderables::init()
 		}
 	}
 
-	{
-		CommandBufferInitInfo cmdbInit("TmpClear");
-		cmdbInit.m_flags |= CommandBufferFlag::kSmallBatch;
-		CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
-
-		for(U32 i = 0; i < kMaxRenderGraphAccelerationStructures; ++i)
-		{
-			RendererString name;
-			name.sprintf("GpuVisibilityNonRenderablesCounters#%u", i);
-
-			BufferInitInfo buffInit(name);
-			buffInit.m_size = 3 * sizeof(U32);
-			buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kTransferDestination;
-
-			m_counterBuffers[i] = GrManager::getSingleton().newBuffer(buffInit);
-
-			cmdb->fillBuffer(m_counterBuffers[i].get(), 0, kMaxPtrSize, 0);
-		}
-
-		cmdb->flush();
-		GrManager::getSingleton().finish();
-	}
-
 	return Error::kNone;
 }
 
 void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderablesInput& in, GpuVisibilityNonRenderablesOutput& out)
 {
+	RenderGraphDescription& rgraph = *in.m_rgraph;
+
 	U32 objCount = 0;
 	switch(in.m_objectType)
 	{
@@ -295,15 +274,44 @@ void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderable
 		ANKI_ASSERT(in.m_cpuFeedbackBuffer.m_range == sizeof(U32) * (objCount + 1));
 	}
 
-	// Find the counter buffer
-	U32 counterBufferIdx = kMaxU32;
-	if(m_lastFrameIdx != getRenderer().getFrameCount())
+	const Bool firstRunInFrame = m_lastFrameIdx != getRenderer().getFrameCount();
+	if(firstRunInFrame)
 	{
+		// 1st run in this frame, do some bookkeeping
 		m_lastFrameIdx = getRenderer().getFrameCount();
-		m_runIdx = 0;
+		m_counterBufferOffset = 0;
+		m_counterBufferZeroingHandle = {};
 	}
 
-	counterBufferIdx = m_runIdx++;
+	constexpr U32 kCountersPerDispatch = 3; // 1 for the threadgroup, 1 for the visbile object count and 1 for objects with feedback
+	const U32 counterBufferElementSize = getAlignedRoundUp(GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment,
+														   U32(kCountersPerDispatch * sizeof(U32)));
+	if(!m_counterBuffer.isCreated() || m_counterBufferOffset + counterBufferElementSize > m_counterBuffer->getSize()) [[unlikely]]
+	{
+		// Counter buffer not created or not big enough, create a new one
+
+		BufferInitInfo buffInit("GpuVisibilityNonRenderablesCounters");
+		buffInit.m_size = (m_counterBuffer.isCreated()) ? m_counterBuffer->getSize() * 2
+														: kCountersPerDispatch * counterBufferElementSize * kInitialCounterArraySize;
+		buffInit.m_usage = BufferUsageBit::kStorageComputeWrite | BufferUsageBit::kStorageComputeRead | BufferUsageBit::kTransferDestination;
+		m_counterBuffer = GrManager::getSingleton().newBuffer(buffInit);
+
+		m_counterBufferZeroingHandle = rgraph.importBuffer(m_counterBuffer.get(), buffInit.m_usage, 0, kMaxPtrSize);
+
+		ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("GpuVisibilityNonRenderablesClearCounterBuffer");
+
+		pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kTransferDestination);
+
+		pass.setWork([counterBuffer = m_counterBuffer](RenderPassWorkContext& rgraph) {
+			rgraph.m_commandBuffer->fillBuffer(counterBuffer.get(), 0, kMaxPtrSize, 0);
+		});
+
+		m_counterBufferOffset = 0;
+	}
+	else if(!firstRunInFrame)
+	{
+		m_counterBufferOffset += counterBufferElementSize;
+	}
 
 	// Allocate memory for the result
 	GpuVisibleTransientMemoryAllocation visibleIndicesAlloc = GpuVisibleTransientMemoryPool::getSingleton().allocate((objCount + 1) * sizeof(U32));
@@ -313,7 +321,6 @@ void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderable
 	out.m_visiblesBuffer.m_range = visibleIndicesAlloc.m_size;
 
 	// Import buffers
-	RenderGraphDescription& rgraph = *in.m_rgraph;
 	out.m_bufferHandle =
 		rgraph.importBuffer(out.m_visiblesBuffer.m_buffer, BufferUsageBit::kNone, out.m_visiblesBuffer.m_offset, out.m_visiblesBuffer.m_range);
 
@@ -328,8 +335,14 @@ void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderable
 		pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
 	}
 
+	if(m_counterBufferZeroingHandle.isValid()) [[unlikely]]
+	{
+		pass.newBufferDependency(m_counterBufferZeroingHandle, BufferUsageBit::kStorageComputeRead | BufferUsageBit::kStorageComputeWrite);
+	}
+
 	pass.setWork([this, objType = in.m_objectType, feedbackBuffer = in.m_cpuFeedbackBuffer, viewProjectionMat = in.m_viewProjectionMat,
-				  visibleIndicesBuffHandle = out.m_bufferHandle, counterBufferIdx, objCount](RenderPassWorkContext& rgraph) {
+				  visibleIndicesBuffHandle = out.m_bufferHandle, counterBuffer = m_counterBuffer, counterBufferOffset = m_counterBufferOffset,
+				  objCount](RenderPassWorkContext& rgraph) {
 		CommandBuffer& cmdb = *rgraph.m_commandBuffer;
 
 		const Bool needsFeedback = feedbackBuffer.m_buffer != nullptr;
@@ -378,7 +391,7 @@ void GpuVisibilityNonRenderables::populateRenderGraph(GpuVisibilityNonRenderable
 		cmdb.setPushConstants(&unis, sizeof(unis));
 
 		rgraph.bindStorageBuffer(0, 1, visibleIndicesBuffHandle);
-		cmdb.bindStorageBuffer(0, 2, m_counterBuffers[counterBufferIdx].get(), 0, kMaxPtrSize);
+		cmdb.bindStorageBuffer(0, 2, counterBuffer.get(), counterBufferOffset, sizeof(U32) * kCountersPerDispatch);
 
 		if(needsFeedback)
 		{

+ 4 - 3
AnKi/Renderer/Utils/GpuVisibility.h

@@ -90,11 +90,12 @@ private:
 	ShaderProgramResourcePtr m_prog;
 	Array3d<ShaderProgramPtr, 2, U32(GpuSceneNonRenderableObjectType::kCount), 2> m_grProgs;
 
-	static constexpr U32 kMaxPopulateRenderGraphPerFrame = 32; ///< Max times the populateRenderGraph() will be called per frame.
+	static constexpr U32 kInitialCounterArraySize = 32;
 
-	Array<BufferPtr, kMaxPopulateRenderGraphPerFrame> m_counterBuffers; ///< A buffer containing multiple counters for atomic operations.
+	BufferHandle m_counterBufferZeroingHandle;
+	BufferPtr m_counterBuffer; ///< A buffer containing multiple counters for atomic operations.
 	U64 m_lastFrameIdx = kMaxU64;
-	U32 m_runIdx = 0;
+	U32 m_counterBufferOffset = 0;
 };
 /// @}