Browse Source

Fix a bug

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
9931918829

+ 11 - 3
AnKi/Renderer/GBuffer.cpp

@@ -161,10 +161,18 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 
 	const CommonMatrices& matrices = (getRenderer().getFrameCount() <= 1) ? ctx.m_matrices : ctx.m_prevMatrices;
 	const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
+
+	GpuVisibilityInput visIn;
+	visIn.m_passesName = "GBuffer visibility";
+	visIn.m_technique = RenderingTechnique::kGBuffer;
+	visIn.m_viewProjectionMatrix = matrices.m_viewProjection;
+	visIn.m_lodReferencePoint = matrices.m_cameraTransform.getTranslationPart().xyz();
+	visIn.m_lodDistances = lodDistances;
+	visIn.m_rgraph = &rgraph;
+	visIn.m_hzbRt = &m_runCtx.m_hzbRt;
+
 	GpuVisibilityOutput visOut;
-	getRenderer().getGpuVisibility().populateRenderGraph("GBuffer visibility", RenderingTechnique::kGBuffer, matrices.m_viewProjection,
-														 matrices.m_cameraTransform.getTranslationPart().xyz(), lodDistances, &m_runCtx.m_hzbRt,
-														 rgraph, visOut);
+	getRenderer().getGpuVisibility().populateRenderGraph(visIn, visOut);
 
 	const Bool enableVrs = GrManager::getSingleton().getDeviceCapabilities().m_vrs && g_vrsCVar.get() && g_gbufferVrsCVar.get();
 	const Bool fbDescrHasVrs = m_fbDescr.m_shadingRateAttachmentTexelWidth > 0;

+ 20 - 6
AnKi/Renderer/IndirectDiffuseProbes.cpp

@@ -197,9 +197,16 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 	{
 		const RenderQueue& queue = *giCtx->m_probeToUpdateThisFrame->m_renderQueues[i];
 		Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
-		getRenderer().getGpuVisibility().populateRenderGraph("GI GBuffer visibility", RenderingTechnique::kGBuffer, queue.m_viewProjectionMatrix,
-															 queue.m_cameraTransform.getTranslationPart().xyz(), lodDistances, nullptr, rgraph,
-															 giCtx->m_gbufferVisOut[i]);
+
+		GpuVisibilityInput visIn;
+		visIn.m_passesName = "GI GBuffer visibility";
+		visIn.m_technique = RenderingTechnique::kGBuffer;
+		visIn.m_viewProjectionMatrix = queue.m_viewProjectionMatrix;
+		visIn.m_lodReferencePoint = queue.m_cameraTransform.getTranslationPart().xyz();
+		visIn.m_lodDistances = lodDistances;
+		visIn.m_rgraph = &rgraph;
+
+		getRenderer().getGpuVisibility().populateRenderGraph(visIn, giCtx->m_gbufferVisOut[i]);
 	}
 
 	// GBuffer
@@ -244,9 +251,16 @@ void IndirectDiffuseProbes::populateRenderGraph(RenderingContext& rctx)
 		{
 			const RenderQueue& queue = *giCtx->m_probeToUpdateThisFrame->m_renderQueues[i]->m_directionalLight.m_shadowRenderQueues[0];
 			Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
-			getRenderer().getGpuVisibility().populateRenderGraph("GI shadows visibility", RenderingTechnique::kDepth, queue.m_viewProjectionMatrix,
-																 queue.m_cameraTransform.getTranslationPart().xyz(), lodDistances, nullptr, rgraph,
-																 giCtx->m_shadowsVisOut[i]);
+
+			GpuVisibilityInput visIn;
+			visIn.m_passesName = "GI shadows visibility";
+			visIn.m_technique = RenderingTechnique::kDepth;
+			visIn.m_viewProjectionMatrix = queue.m_viewProjectionMatrix;
+			visIn.m_lodReferencePoint = queue.m_cameraTransform.getTranslationPart().xyz();
+			visIn.m_lodDistances = lodDistances;
+			visIn.m_rgraph = &rgraph;
+
+			getRenderer().getGpuVisibility().populateRenderGraph(visIn, giCtx->m_shadowsVisOut[i]);
 		}
 	}
 

+ 19 - 6
AnKi/Renderer/ProbeReflections.cpp

@@ -339,9 +339,16 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 	{
 		const RenderQueue& queue = *m_ctx.m_probe->m_renderQueues[i];
 		Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
-		getRenderer().getGpuVisibility().populateRenderGraph("Cube refl GBuffer visibility", RenderingTechnique::kGBuffer,
-															 queue.m_viewProjectionMatrix, queue.m_cameraTransform.getTranslationPart().xyz(),
-															 lodDistances, nullptr, rgraph, visOuts[i]);
+
+		GpuVisibilityInput visIn;
+		visIn.m_passesName = "Cube refl GBuffer visibility";
+		visIn.m_technique = RenderingTechnique::kGBuffer;
+		visIn.m_viewProjectionMatrix = queue.m_viewProjectionMatrix;
+		visIn.m_lodReferencePoint = queue.m_cameraTransform.getTranslationPart().xyz();
+		visIn.m_lodDistances = lodDistances;
+		visIn.m_rgraph = &rgraph;
+
+		getRenderer().getGpuVisibility().populateRenderGraph(visIn, visOuts[i]);
 	}
 
 	// GBuffer pass
@@ -390,9 +397,15 @@ void ProbeReflections::populateRenderGraph(RenderingContext& rctx)
 			const RenderQueue& queue = *m_ctx.m_probe->m_renderQueues[i]->m_directionalLight.m_shadowRenderQueues[0];
 			Array<F32, kMaxLodCount - 1> lodDistances = {1000.0f, 1001.0f}; // Something far to force detailed LODs
 
-			getRenderer().getGpuVisibility().populateRenderGraph("Cube refl shadows visibility", RenderingTechnique::kDepth,
-																 queue.m_viewProjectionMatrix, queue.m_cameraTransform.getTranslationPart().xyz(),
-																 lodDistances, nullptr, rgraph, shadowVisOuts[i]);
+			GpuVisibilityInput visIn;
+			visIn.m_passesName = "Cube refl shadows visibility";
+			visIn.m_technique = RenderingTechnique::kDepth;
+			visIn.m_viewProjectionMatrix = queue.m_viewProjectionMatrix;
+			visIn.m_lodReferencePoint = queue.m_cameraTransform.getTranslationPart().xyz();
+			visIn.m_lodDistances = lodDistances;
+			visIn.m_rgraph = &rgraph;
+
+			getRenderer().getGpuVisibility().populateRenderGraph(visIn, shadowVisOuts[i]);
 		}
 	}
 

+ 11 - 3
AnKi/Renderer/ShadowMapping.cpp

@@ -256,9 +256,17 @@ void ShadowMapping::newWorkItem(const UVec4& atlasViewport, const RenderQueue& q
 	ViewportWorkItem& work = *workItems.emplaceBack();
 
 	const Array<F32, kMaxLodCount - 1> lodDistances = {g_lod0MaxDistanceCVar.get(), g_lod1MaxDistanceCVar.get()};
-	getRenderer().getGpuVisibility().populateRenderGraph("Shadowmapping visibility", RenderingTechnique::kDepth, queue.m_viewProjectionMatrix,
-														 queue.m_cameraTransform.getTranslationPart().xyz(), lodDistances, hzbRt, rgraph,
-														 work.m_visOut);
+
+	GpuVisibilityInput visIn;
+	visIn.m_passesName = "Shadows visibility";
+	visIn.m_technique = RenderingTechnique::kDepth;
+	visIn.m_viewProjectionMatrix = queue.m_viewProjectionMatrix;
+	visIn.m_lodReferencePoint = queue.m_cameraTransform.getTranslationPart().xyz();
+	visIn.m_lodDistances = lodDistances;
+	visIn.m_hzbRt = hzbRt;
+	visIn.m_rgraph = &rgraph;
+
+	getRenderer().getGpuVisibility().populateRenderGraph(visIn, work.m_visOut);
 
 	work.m_viewport = atlasViewport;
 	work.m_mvp = queue.m_viewProjectionMatrix;

+ 6 - 5
AnKi/Renderer/Utils/Drawer.cpp

@@ -151,8 +151,8 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 {
 	setState(args, cmdb);
 
-	cmdb.bindVertexBuffer(0, args.m_instanceRateRenderablesBuffer, args.m_instanceRateRenderablesBufferOffset, sizeof(GpuSceneRenderablePacked),
-						  VertexStepRate::kInstance);
+	cmdb.bindVertexBuffer(0, args.m_instanceRateRenderablesBuffer.m_buffer, args.m_instanceRateRenderablesBuffer.m_offset,
+						  sizeof(GpuSceneRenderablePacked), VertexStepRate::kInstance);
 
 	U32 allUserCount = 0;
 	U32 bucketCount = 0;
@@ -170,9 +170,10 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 
 		const U32 maxDrawCount = userCount;
 
-		cmdb.drawIndexedIndirectCount(state.m_primitiveTopology, args.m_drawIndexedIndirectArgsBuffer,
-									  args.m_drawIndexedIndirectArgsBufferOffset + sizeof(DrawIndexedIndirectArgs) * allUserCount,
-									  args.m_mdiDrawCountsBuffer, args.m_mdiDrawCountsBufferOffset + sizeof(U32) * bucketCount, maxDrawCount);
+		cmdb.drawIndexedIndirectCount(state.m_primitiveTopology, args.m_drawIndexedIndirectArgsBuffer.m_buffer,
+									  args.m_drawIndexedIndirectArgsBuffer.m_offset + sizeof(DrawIndexedIndirectArgs) * allUserCount,
+									  args.m_mdiDrawCountsBuffer.m_buffer, args.m_mdiDrawCountsBuffer.m_offset + sizeof(U32) * bucketCount,
+									  maxDrawCount);
 
 		++bucketCount;
 		allUserCount += userCount;

+ 3 - 17
AnKi/Renderer/Utils/Drawer.h

@@ -34,29 +34,15 @@ public:
 	// For MDI
 	RenderingTechnique m_renderingTechinuqe = RenderingTechnique::kCount;
 
-	Buffer* m_mdiDrawCountsBuffer = nullptr;
-	PtrSize m_mdiDrawCountsBufferOffset = 0;
-	PtrSize m_mdiDrawCountsBufferRange = 0;
-
-	Buffer* m_drawIndexedIndirectArgsBuffer = nullptr;
-	PtrSize m_drawIndexedIndirectArgsBufferOffset = 0;
-	PtrSize m_drawIndexedIndirectArgsBufferRange = 0;
-
-	Buffer* m_instanceRateRenderablesBuffer = nullptr;
-	PtrSize m_instanceRateRenderablesBufferOffset = 0;
-	PtrSize m_instanceRateRenderablesBufferRange = 0;
+	BufferOffsetRange m_mdiDrawCountsBuffer;
+	BufferOffsetRange m_drawIndexedIndirectArgsBuffer;
+	BufferOffsetRange m_instanceRateRenderablesBuffer;
 
 	void fillMdi(const GpuVisibilityOutput& visOut)
 	{
 		m_mdiDrawCountsBuffer = visOut.m_mdiDrawCountsBuffer;
-		m_mdiDrawCountsBufferOffset = visOut.m_mdiDrawCountsBufferOffset;
-		m_mdiDrawCountsBufferRange = visOut.m_mdiDrawCountsBufferRange;
 		m_drawIndexedIndirectArgsBuffer = visOut.m_drawIndexedIndirectArgsBuffer;
-		m_drawIndexedIndirectArgsBufferOffset = visOut.m_drawIndexedIndirectArgsBufferOffset;
-		m_drawIndexedIndirectArgsBufferRange = visOut.m_drawIndexedIndirectArgsBufferRange;
 		m_instanceRateRenderablesBuffer = visOut.m_instanceRateRenderablesBuffer;
-		m_instanceRateRenderablesBufferOffset = visOut.m_instanceRateRenderablesBufferOffset;
-		m_instanceRateRenderablesBufferRange = visOut.m_instanceRateRenderablesBufferRange;
 	}
 };
 

+ 15 - 22
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -45,12 +45,10 @@ Error GpuVisibility::init()
 	return Error::kNone;
 }
 
-void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
-										const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt,
-										RenderGraphDescription& rgraph, GpuVisibilityOutput& out)
+void GpuVisibility::populateRenderGraph(GpuVisibilityInput& in, GpuVisibilityOutput& out)
 {
 	U32 aabbCount = 0;
-	switch(technique)
+	switch(in.m_technique)
 	{
 	case RenderingTechnique::kGBuffer:
 		aabbCount = GpuSceneArrays::RenderableAabbGBuffer::getSingleton().getElementCount();
@@ -61,7 +59,7 @@ void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique t
 		ANKI_ASSERT(0);
 	}
 
-	const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(technique);
+	const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique);
 
 #if ANKI_STATS_ENABLED
 	Bool firstCallInTheFrame = false;
@@ -93,44 +91,39 @@ void GpuVisibility::populateRenderGraph(CString passesName, RenderingTechnique t
 	// Allocate memory for the indirect commands
 	const GpuVisibleTransientMemoryAllocation indirectArgs =
 		GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(DrawIndexedIndirectArgs));
-	out.m_drawIndexedIndirectArgsBuffer = indirectArgs.m_buffer;
-	out.m_drawIndexedIndirectArgsBufferOffset = indirectArgs.m_offset;
-	out.m_drawIndexedIndirectArgsBufferRange = indirectArgs.m_size;
+	out.m_drawIndexedIndirectArgsBuffer = {indirectArgs.m_buffer, indirectArgs.m_offset, indirectArgs.m_size};
 
 	const GpuVisibleTransientMemoryAllocation instanceRateRenderables =
 		GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(GpuSceneRenderable));
-	out.m_instanceRateRenderablesBuffer = instanceRateRenderables.m_buffer;
-	out.m_instanceRateRenderablesBufferOffset = instanceRateRenderables.m_offset;
-	out.m_instanceRateRenderablesBufferRange = instanceRateRenderables.m_size;
+	out.m_instanceRateRenderablesBuffer = {instanceRateRenderables.m_buffer, instanceRateRenderables.m_offset, instanceRateRenderables.m_size};
 
 	// Allocate and zero the MDI counts
 	RebarAllocation mdiDrawCounts;
 	U32* atomics = RebarTransientMemoryPool::getSingleton().allocateFrame<U32>(bucketCount, mdiDrawCounts);
 	memset(atomics, 0, mdiDrawCounts.m_range);
-	out.m_mdiDrawCountsBuffer = &RebarTransientMemoryPool::getSingleton().getBuffer();
-	out.m_mdiDrawCountsBufferOffset = mdiDrawCounts.m_offset;
-	out.m_mdiDrawCountsBufferRange = mdiDrawCounts.m_range;
+	out.m_mdiDrawCountsBuffer = {&RebarTransientMemoryPool::getSingleton().getBuffer(), mdiDrawCounts.m_offset, mdiDrawCounts.m_range};
 
 	// Import buffers
-	out.m_mdiDrawCountsHandle = rgraph.importBuffer(&RebarTransientMemoryPool::getSingleton().getBuffer(), BufferUsageBit::kNone,
-													mdiDrawCounts.m_offset, mdiDrawCounts.m_range);
+	out.m_mdiDrawCountsHandle = in.m_rgraph->importBuffer(&RebarTransientMemoryPool::getSingleton().getBuffer(), BufferUsageBit::kNone,
+														  mdiDrawCounts.m_offset, mdiDrawCounts.m_range);
 
 	// Create the renderpass
-	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passesName);
+	ComputeRenderPassDescription& pass = in.m_rgraph->newComputeRenderPass(in.m_passesName);
 
 	pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kStorageComputeRead);
 	pass.newBufferDependency(out.m_mdiDrawCountsHandle, BufferUsageBit::kStorageComputeWrite);
 
-	if(hzbRt)
+	if(in.m_hzbRt)
 	{
-		pass.newTextureDependency(*hzbRt, TextureUsageBit::kSampledCompute);
+		pass.newTextureDependency(*in.m_hzbRt, TextureUsageBit::kSampledCompute);
 	}
 
 	const RenderTargetHandle hzbRtCopy =
-		(hzbRt) ? *hzbRt : RenderTargetHandle(); // Can't pass to the lambda the hzbRt which is a pointer to who knows what
+		(in.m_hzbRt) ? *in.m_hzbRt : RenderTargetHandle(); // Can't pass to the lambda the hzbRt which is a pointer to who knows what
 
-	pass.setWork([this, viewProjectionMat, lodReferencePoint, lodDistances, technique, hzbRtCopy, mdiDrawCountsHandle = out.m_mdiDrawCountsHandle,
-				  instanceRateRenderables, indirectArgs, aabbCount
+	pass.setWork([this, viewProjectionMat = in.m_viewProjectionMatrix, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
+				  technique = in.m_technique, hzbRtCopy, mdiDrawCountsHandle = out.m_mdiDrawCountsHandle, instanceRateRenderables, indirectArgs,
+				  aabbCount
 #if ANKI_STATS_ENABLED
 				  ,
 				  clearStatsBuffer, clearStatsBufferOffset, writeStatsBuffer, writeStatsBufferOffset

+ 18 - 14
AnKi/Renderer/Utils/GpuVisibility.h

@@ -14,22 +14,28 @@ namespace anki {
 /// @addtogroup renderer
 /// @{
 
+/// @memberof GpuVisibility
+class GpuVisibilityInput
+{
+public:
+	CString m_passesName;
+	RenderingTechnique m_technique = RenderingTechnique::kCount;
+	Mat4 m_viewProjectionMatrix = Mat4::getIdentity();
+	Vec3 m_lodReferencePoint = Vec3(0.0f);
+	Array<F32, kMaxLodCount - 1> m_lodDistances = {};
+	const RenderTargetHandle* m_hzbRt = nullptr; ///< Optional.
+	RenderGraphDescription* m_rgraph = nullptr;
+};
+
+/// @memberof GpuVisibility
 class GpuVisibilityOutput
 {
 public:
 	BufferHandle m_mdiDrawCountsHandle; ///< Just expose one handle for depedencies. No need to track all other buffers.
 
-	Buffer* m_instanceRateRenderablesBuffer;
-	Buffer* m_drawIndexedIndirectArgsBuffer;
-	Buffer* m_mdiDrawCountsBuffer;
-
-	PtrSize m_instanceRateRenderablesBufferOffset;
-	PtrSize m_drawIndexedIndirectArgsBufferOffset;
-	PtrSize m_mdiDrawCountsBufferOffset;
-
-	PtrSize m_instanceRateRenderablesBufferRange;
-	PtrSize m_drawIndexedIndirectArgsBufferRange;
-	PtrSize m_mdiDrawCountsBufferRange;
+	BufferOffsetRange m_instanceRateRenderablesBuffer;
+	BufferOffsetRange m_drawIndexedIndirectArgsBuffer;
+	BufferOffsetRange m_mdiDrawCountsBuffer;
 };
 
 /// Performs GPU visibility for some pass.
@@ -39,9 +45,7 @@ public:
 	Error init();
 
 	/// Populate the rendergraph.
-	void populateRenderGraph(CString passesName, RenderingTechnique technique, const Mat4& viewProjectionMat, Vec3 lodReferencePoint,
-							 const Array<F32, kMaxLodCount - 1> lodDistances, const RenderTargetHandle* hzbRt, RenderGraphDescription& rgraph,
-							 GpuVisibilityOutput& out);
+	void populateRenderGraph(GpuVisibilityInput& in, GpuVisibilityOutput& out);
 
 private:
 	ShaderProgramResourcePtr m_prog;

+ 3 - 3
AnKi/Shaders/GpuVisibilityNonRenderables.ankiprog

@@ -36,7 +36,7 @@ typedef GpuSceneGlobalIlluminationProbe ObjectType;
 constexpr U32 kVisibleObjCounterIdx = 1;
 constexpr U32 kThreadgroupCounterIdx = 0;
 constexpr U32 kFeedbackCounterIdx = 2;
-[[vk::binding(2)]] RWStructuredBuffer<U32> g_counterBuffer; // 2 counters per dispatch with an optional 3rd for feedback
+[[vk::binding(2)]] globallycoherent RWStructuredBuffer<U32> g_counterBuffer; // 2 counters per dispatch with an optional 3rd for feedback
 
 #if CPU_FEEDBACK
 // 1st element is a count. What follows is an array of UUIDs.
@@ -128,11 +128,11 @@ Vec4 getSphere(GpuSceneGlobalIlluminationProbe l)
 		U32 threadgroupIdx;
 		InterlockedAdd(g_counterBuffer[kThreadgroupCounterIdx], 1, threadgroupIdx);
 		const U32 threadgroupCount = (objectCount + NUMTHREADS - 1) / NUMTHREADS;
-		lastThreadgroupExecuting = (threadgroupIdx == threadgroupCount);
+		lastThreadgroupExecuting = (threadgroupIdx + 1 == threadgroupCount);
 	}
 
 	// Sync to make sure all the atomic ops have finished before the following code reads them
-	GroupMemoryBarrier();
+	AllMemoryBarrierWithGroupSync();
 
 	if(lastThreadgroupExecuting)
 	{