瀏覽代碼

Remove the insane ammount of GPU jobs required to render point lights

Panagiotis Christopoulos Charitos 1 年之前
父節點
當前提交
a47d177c4d

+ 2 - 1
AnKi/Gr/Vulkan/DescriptorSet.cpp

@@ -434,10 +434,11 @@ Bool DSStateTracker::flush(DSAllocator& allocator, VkDescriptorSet& dsHandle)
 	const Bool reallyBindless = m_bindlessDSBound && m_layout->m_hash == 0;
 	const Bool reallyBindless = m_bindlessDSBound && m_layout->m_hash == 0;
 	if(reallyBindless)
 	if(reallyBindless)
 	{
 	{
-		if(m_bindlessDSDirty)
+		if(m_bindlessDSDirty || m_layoutDirty)
 		{
 		{
 			dsHandle = DSBindless::getSingleton().m_dset;
 			dsHandle = DSBindless::getSingleton().m_dset;
 			m_bindlessDSDirty = false;
 			m_bindlessDSDirty = false;
+			m_layoutDirty = false;
 		}
 		}
 		return dsHandle != VK_NULL_HANDLE;
 		return dsHandle != VK_NULL_HANDLE;
 	}
 	}

+ 4 - 1
AnKi/Gr/Vulkan/DescriptorSet.h

@@ -150,14 +150,17 @@ public:
 		m_writeInfos = {pool};
 		m_writeInfos = {pool};
 	}
 	}
 
 
-	void setLayout(const DSLayout* layout)
+	Bool setLayout(const DSLayout* layout)
 	{
 	{
 		ANKI_ASSERT(layout);
 		ANKI_ASSERT(layout);
 		if(layout != m_layout)
 		if(layout != m_layout)
 		{
 		{
 			m_layoutDirty = true;
 			m_layoutDirty = true;
 			m_layout = layout;
 			m_layout = layout;
+			return true;
 		}
 		}
+
+		return false;
 	}
 	}
 
 
 	void setLayoutDirty()
 	void setLayoutDirty()

+ 106 - 53
AnKi/Renderer/ShadowMapping.cpp

@@ -380,7 +380,21 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 				clearTileIndirectArgs = createVetVisibilityPass(generateTempPassName("Shadows: Vet point light", lightIdx), *lightc, visOut, rgraph);
 				clearTileIndirectArgs = createVetVisibilityPass(generateTempPassName("Shadows: Vet point light", lightIdx), *lightc, visOut, rgraph);
 			}
 			}
 
 
-			// Add additional visibility and draw passes
+			// Additional visibility
+			GpuMeshletVisibilityOutput meshletVisOut;
+			if(getRenderer().runSoftwareMeshletRendering())
+			{
+				PassthroughGpuMeshletVisibilityInput meshIn;
+				meshIn.m_passesName = generateTempPassName("Shadows point light", lightIdx);
+				meshIn.m_technique = RenderingTechnique::kDepth;
+				meshIn.m_rgraph = &rgraph;
+				meshIn.fillBuffers(visOut);
+
+				getRenderer().getGpuVisibility().populateRenderGraph(meshIn, meshletVisOut);
+			}
+
+			// Draw
+			Array<ShadowSubpassInfo, 6> subpasses;
 			for(U32 face = 0; face < 6; ++face)
 			for(U32 face = 0; face < 6; ++face)
 			{
 			{
 				Frustum frustum;
 				Frustum frustum;
@@ -390,24 +404,13 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 					Transform(lightc->getWorldPosition().xyz0(), Frustum::getOmnidirectionalFrustumRotations()[face], Vec4(1.0f, 1.0f, 1.0f, 0.0f)));
 					Transform(lightc->getWorldPosition().xyz0(), Frustum::getOmnidirectionalFrustumRotations()[face], Vec4(1.0f, 1.0f, 1.0f, 0.0f)));
 				frustum.update();
 				frustum.update();
 
 
-				GpuMeshletVisibilityOutput meshletVisOut;
-				if(getRenderer().runSoftwareMeshletRendering())
-				{
-					GpuMeshletVisibilityInput meshIn;
-					meshIn.m_passesName = generateTempPassName("Shadows point light", lightIdx, "face", face);
-					meshIn.m_technique = RenderingTechnique::kDepth;
-					meshIn.m_viewProjectionMatrix = frustum.getViewProjectionMatrix();
-					meshIn.m_cameraTransform = frustum.getViewMatrix().getInverseTransformation();
-					meshIn.m_viewportSize = atlasViewports[face].zw();
-					meshIn.m_rgraph = &rgraph;
-					meshIn.fillBuffers(visOut);
-
-					getRenderer().getGpuVisibility().populateRenderGraph(meshIn, meshletVisOut);
-				}
-
-				createDrawShadowsPass(atlasViewports[face], frustum.getViewProjectionMatrix(), frustum.getViewMatrix(), visOut, meshletVisOut,
-									  clearTileIndirectArgs, {}, generateTempPassName("Shadows: Point light", lightIdx, "face", face), rgraph);
+				subpasses[face].m_clearTileIndirectArgs = clearTileIndirectArgs;
+				subpasses[face].m_viewMat = frustum.getViewMatrix();
+				subpasses[face].m_viewport = atlasViewports[face];
+				subpasses[face].m_viewProjMat = frustum.getViewProjectionMatrix();
 			}
 			}
+
+			createDrawShadowsPass(subpasses, visOut, meshletVisOut, generateTempPassName("Shadows: Point light", lightIdx), rgraph);
 		}
 		}
 		else
 		else
 		{
 		{
@@ -618,60 +621,110 @@ void ShadowMapping::createDrawShadowsPass(const UVec4& viewport, const Mat4& vie
 										  const GpuMeshletVisibilityOutput& meshletVisOut, const BufferOffsetRange& clearTileIndirectArgs,
 										  const GpuMeshletVisibilityOutput& meshletVisOut, const BufferOffsetRange& clearTileIndirectArgs,
 										  const RenderTargetHandle hzbRt, CString passName, RenderGraphDescription& rgraph)
 										  const RenderTargetHandle hzbRt, CString passName, RenderGraphDescription& rgraph)
 {
 {
-	const Bool loadFb = (clearTileIndirectArgs.m_buffer != nullptr);
+	ShadowSubpassInfo spass;
+	spass.m_clearTileIndirectArgs = clearTileIndirectArgs;
+	spass.m_hzbRt = hzbRt;
+	spass.m_viewMat = viewMat;
+	spass.m_viewport = viewport;
+	spass.m_viewProjMat = viewProjMat;
+
+	createDrawShadowsPass({&spass, 1}, visOut, meshletVisOut, passName, rgraph);
+}
+
+void ShadowMapping::createDrawShadowsPass(ConstWeakArray<ShadowSubpassInfo> subpasses_, const GpuVisibilityOutput& visOut,
+										  const GpuMeshletVisibilityOutput& meshletVisOut, CString passName, RenderGraphDescription& rgraph)
+{
+	WeakArray<ShadowSubpassInfo> subpasses;
+	newArray<ShadowSubpassInfo>(getRenderer().getFrameMemoryPool(), subpasses_.getSize(), subpasses);
+	memcpy(subpasses.getBegin(), subpasses_.getBegin(), subpasses.getSizeInBytes());
+
+	// Compute the whole viewport
+	UVec4 viewport;
+	if(subpasses.getSize() == 1)
+	{
+		viewport = subpasses[0].m_viewport;
+	}
+	else
+	{
+		viewport = UVec4(kMaxU32, kMaxU32, 0, 0);
+		for(const ShadowSubpassInfo& s : subpasses)
+		{
+			viewport.x() = min(viewport.x(), s.m_viewport.x());
+			viewport.y() = min(viewport.y(), s.m_viewport.y());
+			viewport.z() = max(viewport.z(), s.m_viewport.x() + s.m_viewport.z());
+			viewport.w() = max(viewport.w(), s.m_viewport.y() + s.m_viewport.w());
+		}
+		viewport.z() -= viewport.x();
+		viewport.w() -= viewport.y();
+	}
 
 
 	// Create the pass
 	// Create the pass
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passName);
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passName);
+
+	const Bool loadFb = !(subpasses.getSize() == 1 && subpasses[0].m_clearTileIndirectArgs.m_buffer == nullptr);
+
 	pass.setFramebufferInfo((loadFb) ? m_loadFbDescr : m_clearFbDescr, {}, m_runCtx.m_rt, {}, viewport[0], viewport[1], viewport[2], viewport[3]);
 	pass.setFramebufferInfo((loadFb) ? m_loadFbDescr : m_clearFbDescr, {}, m_runCtx.m_rt, {}, viewport[0], viewport[1], viewport[2], viewport[3]);
 
 
 	pass.newBufferDependency((meshletVisOut.isFilled()) ? meshletVisOut.m_dependency : visOut.m_dependency, BufferUsageBit::kIndirectDraw);
 	pass.newBufferDependency((meshletVisOut.isFilled()) ? meshletVisOut.m_dependency : visOut.m_dependency, BufferUsageBit::kIndirectDraw);
 	pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kFramebufferWrite, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
 	pass.newTextureDependency(m_runCtx.m_rt, TextureUsageBit::kFramebufferWrite, TextureSubresourceInfo(DepthStencilAspectBit::kDepth));
 
 
-	pass.setWork(1, [this, visOut, meshletVisOut, viewport, clearTileIndirectArgs, viewMat, viewProjMat, hzbRt](RenderPassWorkContext& rgraphCtx) {
+	pass.setWork(1 /*TODO*/, [this, visOut, meshletVisOut, subpasses, loadFb](RenderPassWorkContext& rgraphCtx) {
 		ANKI_TRACE_SCOPED_EVENT(ShadowMapping);
 		ANKI_TRACE_SCOPED_EVENT(ShadowMapping);
 
 
 		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
-		cmdb.setViewport(viewport[0], viewport[1], viewport[2], viewport[3]);
-
-		if(clearTileIndirectArgs.m_buffer)
+		for(U32 i = 0; i < subpasses.getSize(); ++i)
 		{
 		{
-			// Clear the depth buffer using a quad because it needs to be conditional
+			const ShadowSubpassInfo& spass = subpasses[i];
 
 
-			cmdb.bindShaderProgram(m_clearDepthGrProg.get());
-			cmdb.setDepthCompareOperation(CompareOperation::kAlways);
+			cmdb.setViewport(spass.m_viewport[0], spass.m_viewport[1], spass.m_viewport[2], spass.m_viewport[3]);
 
 
-			cmdb.drawIndirect(PrimitiveTopology::kTriangles, 1, clearTileIndirectArgs.m_offset, clearTileIndirectArgs.m_buffer);
+			if(loadFb)
+			{
+				cmdb.bindShaderProgram(m_clearDepthGrProg.get());
+				cmdb.setDepthCompareOperation(CompareOperation::kAlways);
 
 
-			cmdb.setDepthCompareOperation(CompareOperation::kLess);
-		}
+				if(spass.m_clearTileIndirectArgs.m_buffer)
+				{
 
 
-		// Set state
-		cmdb.setPolygonOffset(kShadowsPolygonOffsetFactor, kShadowsPolygonOffsetUnits);
-
-		RenderableDrawerArguments args;
-		args.m_renderingTechinuqe = RenderingTechnique::kDepth;
-		args.m_viewMatrix = viewMat;
-		args.m_cameraTransform = viewMat.getInverseTransformation();
-		args.m_viewProjectionMatrix = viewProjMat;
-		args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
-		args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
-		args.m_viewport = UVec4(viewport[0], viewport[1], viewport[2], viewport[3]);
-		args.fill(visOut);
-
-		TextureViewPtr hzbView;
-		if(hzbRt.isValid())
-		{
-			hzbView = rgraphCtx.createTextureView(hzbRt);
-			args.m_hzbTexture = hzbView.get();
-		}
+					cmdb.drawIndirect(PrimitiveTopology::kTriangles, 1, spass.m_clearTileIndirectArgs.m_offset,
+									  spass.m_clearTileIndirectArgs.m_buffer);
+				}
+				else
+				{
+					cmdb.draw(PrimitiveTopology::kTriangles, 3);
+				}
 
 
-		if(meshletVisOut.isFilled())
-		{
-			args.fill(meshletVisOut);
-		}
+				cmdb.setDepthCompareOperation(CompareOperation::kLess);
+			}
+
+			// Set state
+			cmdb.setPolygonOffset(kShadowsPolygonOffsetFactor, kShadowsPolygonOffsetUnits);
+
+			RenderableDrawerArguments args;
+			args.m_renderingTechinuqe = RenderingTechnique::kDepth;
+			args.m_viewMatrix = spass.m_viewMat;
+			args.m_cameraTransform = spass.m_viewMat.getInverseTransformation();
+			args.m_viewProjectionMatrix = spass.m_viewProjMat;
+			args.m_previousViewProjectionMatrix = Mat4::getIdentity(); // Don't care
+			args.m_sampler = getRenderer().getSamplers().m_trilinearRepeat.get();
+			args.m_viewport = UVec4(spass.m_viewport[0], spass.m_viewport[1], spass.m_viewport[2], spass.m_viewport[3]);
+			args.fill(visOut);
+
+			TextureViewPtr hzbView;
+			if(spass.m_hzbRt.isValid())
+			{
+				hzbView = rgraphCtx.createTextureView(spass.m_hzbRt);
+				args.m_hzbTexture = hzbView.get();
+			}
+
+			if(meshletVisOut.isFilled())
+			{
+				args.fill(meshletVisOut);
+			}
 
 
-		getRenderer().getSceneDrawer().drawMdi(args, cmdb);
+			getRenderer().getSceneDrawer().drawMdi(args, cmdb);
+		}
 	});
 	});
 }
 }
 
 

+ 13 - 0
AnKi/Renderer/ShadowMapping.h

@@ -35,6 +35,16 @@ public:
 	}
 	}
 
 
 private:
 private:
+	class ShadowSubpassInfo
+	{
+	public:
+		UVec4 m_viewport;
+		Mat4 m_viewProjMat;
+		Mat3x4 m_viewMat;
+		BufferOffsetRange m_clearTileIndirectArgs;
+		RenderTargetHandle m_hzbRt;
+	};
+
 	TileAllocator m_tileAlloc;
 	TileAllocator m_tileAlloc;
 	static constexpr U32 kTileAllocHierarchyCount = 4;
 	static constexpr U32 kTileAllocHierarchyCount = 4;
 	static constexpr U32 kPointLightMaxTileAllocHierarchy = 1;
 	static constexpr U32 kPointLightMaxTileAllocHierarchy = 1;
@@ -79,6 +89,9 @@ private:
 	void createDrawShadowsPass(const UVec4& viewport, const Mat4& viewProjMat, const Mat3x4& viewMat, const GpuVisibilityOutput& visOut,
 	void createDrawShadowsPass(const UVec4& viewport, const Mat4& viewProjMat, const Mat3x4& viewMat, const GpuVisibilityOutput& visOut,
 							   const GpuMeshletVisibilityOutput& meshletVisOut, const BufferOffsetRange& clearTileIndirectArgs,
 							   const GpuMeshletVisibilityOutput& meshletVisOut, const BufferOffsetRange& clearTileIndirectArgs,
 							   const RenderTargetHandle hzbRt, CString passName, RenderGraphDescription& rgraph);
 							   const RenderTargetHandle hzbRt, CString passName, RenderGraphDescription& rgraph);
+
+	void createDrawShadowsPass(ConstWeakArray<ShadowSubpassInfo> subPasses, const GpuVisibilityOutput& visOut,
+							   const GpuMeshletVisibilityOutput& meshletVisOut, CString passName, RenderGraphDescription& rgraph);
 };
 };
 /// @}
 /// @}
 
 

+ 37 - 11
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -90,8 +90,11 @@ Error GpuVisibility::init()
 
 
 	for(MutatorValue hzb = 0; hzb < 2; ++hzb)
 	for(MutatorValue hzb = 0; hzb < 2; ++hzb)
 	{
 	{
-		ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityMeshlet.ankiprogbin", {{"HZB_TEST", hzb}}, m_meshletCullingProg,
-									 m_meshletCullingGrProgs[hzb]));
+		for(MutatorValue passthrough = 0; passthrough < 2; ++passthrough)
+		{
+			ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibilityMeshlet.ankiprogbin", {{"HZB_TEST", hzb}, {"PASSTHROUGH", passthrough}},
+										 m_meshletCullingProg, m_meshletCullingGrProgs[hzb][passthrough]));
+		}
 	}
 	}
 
 
 	return Error::kNone;
 	return Error::kNone;
@@ -506,7 +509,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	});
 	});
 }
 }
 
 
-void GpuVisibility::populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out)
+void GpuVisibility::populateRenderGraphMeshletInternal(Bool passthrough, BaseGpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out)
 {
 {
 	RenderGraphDescription& rgraph = *in.m_rgraph;
 	RenderGraphDescription& rgraph = *in.m_rgraph;
 
 
@@ -516,6 +519,28 @@ void GpuVisibility::populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshle
 		return;
 		return;
 	}
 	}
 
 
+	class NonPassthrough
+	{
+	public:
+		Mat4 m_viewProjectionMatrix;
+		Mat3x4 m_cameraTransform;
+
+		UVec2 m_viewportSize;
+
+		RenderTargetHandle m_hzbRt;
+	}* nonPassthroughData = nullptr;
+
+	if(!passthrough)
+	{
+		GpuMeshletVisibilityInput& nonPassthroughIn = static_cast<GpuMeshletVisibilityInput&>(in);
+
+		nonPassthroughData = newInstance<NonPassthrough>(getRenderer().getFrameMemoryPool());
+		nonPassthroughData->m_viewProjectionMatrix = nonPassthroughIn.m_viewProjectionMatrix;
+		nonPassthroughData->m_cameraTransform = nonPassthroughIn.m_cameraTransform;
+		nonPassthroughData->m_viewportSize = nonPassthroughIn.m_viewportSize;
+		nonPassthroughData->m_hzbRt = nonPassthroughIn.m_hzbRt;
+	}
+
 	// Allocate memory
 	// Allocate memory
 	const U32 bucketCount = m_runCtx.m_renderableInstanceRanges[in.m_technique].getSize();
 	const U32 bucketCount = m_runCtx.m_renderableInstanceRanges[in.m_technique].getSize();
 	ANKI_ASSERT(RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique) == bucketCount);
 	ANKI_ASSERT(RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique) == bucketCount);
@@ -560,8 +585,8 @@ void GpuVisibility::populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshle
 	pass.newBufferDependency(mem.m_bufferDepedency, BufferUsageBit::kUavComputeWrite);
 	pass.newBufferDependency(mem.m_bufferDepedency, BufferUsageBit::kUavComputeWrite);
 	pass.newBufferDependency(in.m_dependency, BufferUsageBit::kIndirectCompute);
 	pass.newBufferDependency(in.m_dependency, BufferUsageBit::kIndirectCompute);
 
 
-	pass.setWork([this, hzbRt = in.m_hzbRt, viewProjMat = in.m_viewProjectionMatrix, camTrf = in.m_cameraTransform, viewportSize = in.m_viewportSize,
-				  computeIndirectArgs = in.m_taskShaderIndirectArgsBuffer, out, meshletGroupInstancesBuffer = in.m_meshletGroupInstancesBuffer,
+	pass.setWork([this, nonPassthroughData, computeIndirectArgs = in.m_taskShaderIndirectArgsBuffer, out,
+				  meshletGroupInstancesBuffer = in.m_meshletGroupInstancesBuffer,
 				  bucketMeshletGroupInstanceRanges = in.m_bucketMeshletGroupInstanceRanges](RenderPassWorkContext& rpass) {
 				  bucketMeshletGroupInstanceRanges = in.m_bucketMeshletGroupInstanceRanges](RenderPassWorkContext& rpass) {
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
 
@@ -574,9 +599,10 @@ void GpuVisibility::populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshle
 				continue;
 				continue;
 			}
 			}
 
 
-			const Bool hasHzb = hzbRt.isValid();
+			const Bool hasHzb = (nonPassthroughData) ? nonPassthroughData->m_hzbRt.isValid() : false;
+			const Bool isPassthrough = (nonPassthroughData == nullptr);
 
 
-			cmdb.bindShaderProgram(m_meshletCullingGrProgs[hasHzb].get());
+			cmdb.bindShaderProgram(m_meshletCullingGrProgs[hasHzb][isPassthrough].get());
 
 
 			cmdb.bindUavBuffer(0, 0, meshletGroupInstancesBuffer);
 			cmdb.bindUavBuffer(0, 0, meshletGroupInstancesBuffer);
 			cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
 			cmdb.bindUavBuffer(0, 1, GpuSceneArrays::Renderable::getSingleton().getBufferOffsetRange());
@@ -587,7 +613,7 @@ void GpuVisibility::populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshle
 			cmdb.bindUavBuffer(0, 6, out.m_meshletInstancesBuffer);
 			cmdb.bindUavBuffer(0, 6, out.m_meshletInstancesBuffer);
 			if(hasHzb)
 			if(hasHzb)
 			{
 			{
-				rpass.bindColorTexture(0, 7, hzbRt);
+				rpass.bindColorTexture(0, 7, nonPassthroughData->m_hzbRt);
 				cmdb.bindSampler(0, 8, getRenderer().getSamplers().m_nearestNearestClamp.get());
 				cmdb.bindSampler(0, 8, getRenderer().getSamplers().m_nearestNearestClamp.get());
 			}
 			}
 
 
@@ -608,10 +634,10 @@ void GpuVisibility::populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshle
 				U32 m_padding2;
 				U32 m_padding2;
 				U32 m_padding3;
 				U32 m_padding3;
 			} consts;
 			} consts;
-			consts.m_viewProjectionMatrix = viewProjMat;
-			consts.m_cameraPos = camTrf.getTranslationPart().xyz();
+			consts.m_viewProjectionMatrix = (!isPassthrough) ? nonPassthroughData->m_viewProjectionMatrix : Mat4::getIdentity();
+			consts.m_cameraPos = (!isPassthrough) ? nonPassthroughData->m_cameraTransform.getTranslationPart().xyz() : Vec3(0.0f);
 			consts.m_firstDrawArg = i;
 			consts.m_firstDrawArg = i;
-			consts.m_viewportSizef = Vec2(viewportSize);
+			consts.m_viewportSizef = (!isPassthrough) ? Vec2(nonPassthroughData->m_viewportSize) : Vec2(0.0f);
 			consts.m_firstMeshletGroup = bucketMeshletGroupInstanceRanges[i].getFirstInstance();
 			consts.m_firstMeshletGroup = bucketMeshletGroupInstanceRanges[i].getFirstInstance();
 			consts.m_firstMeshlet = out.m_bucketMeshletInstanceRanges[i].getFirstInstance();
 			consts.m_firstMeshlet = out.m_bucketMeshletInstanceRanges[i].getFirstInstance();
 			consts.m_meshletCount = out.m_bucketMeshletInstanceRanges[i].getInstanceCount();
 			consts.m_meshletCount = out.m_bucketMeshletInstanceRanges[i].getInstanceCount();

+ 33 - 11
AnKi/Renderer/Utils/GpuVisibility.h

@@ -116,19 +116,13 @@ public:
 };
 };
 
 
 /// @memberof GpuVisibility
 /// @memberof GpuVisibility
-class GpuMeshletVisibilityInput
+class BaseGpuMeshletVisibilityInput
 {
 {
 public:
 public:
 	CString m_passesName;
 	CString m_passesName;
 
 
 	RenderingTechnique m_technique = RenderingTechnique::kCount;
 	RenderingTechnique m_technique = RenderingTechnique::kCount;
 
 
-	Mat4 m_viewProjectionMatrix;
-	Mat3x4 m_cameraTransform;
-
-	/// The size of the viewport the visibility results will be used on. Used to kill objects that don't touch the sampling positions.
-	UVec2 m_viewportSize;
-
 	BufferOffsetRange m_taskShaderIndirectArgsBuffer; ///< Taken from GpuVisibilityOutput.
 	BufferOffsetRange m_taskShaderIndirectArgsBuffer; ///< Taken from GpuVisibilityOutput.
 	BufferOffsetRange m_meshletGroupInstancesBuffer; ///< Taken from GpuVisibilityOutput.
 	BufferOffsetRange m_meshletGroupInstancesBuffer; ///< Taken from GpuVisibilityOutput.
 	ConstWeakArray<InstanceRange> m_bucketMeshletGroupInstanceRanges; ///< Taken from GpuVisibilityOutput.
 	ConstWeakArray<InstanceRange> m_bucketMeshletGroupInstanceRanges; ///< Taken from GpuVisibilityOutput.
@@ -137,8 +131,6 @@ public:
 
 
 	RenderGraphDescription* m_rgraph = nullptr;
 	RenderGraphDescription* m_rgraph = nullptr;
 
 
-	RenderTargetHandle m_hzbRt; ///< Optional.
-
 	void fillBuffers(const GpuVisibilityOutput& perObjVisOut)
 	void fillBuffers(const GpuVisibilityOutput& perObjVisOut)
 	{
 	{
 		m_taskShaderIndirectArgsBuffer = perObjVisOut.m_mesh.m_taskShaderIndirectArgsBuffer;
 		m_taskShaderIndirectArgsBuffer = perObjVisOut.m_mesh.m_taskShaderIndirectArgsBuffer;
@@ -148,6 +140,24 @@ public:
 	}
 	}
 };
 };
 
 
+/// @memberof GpuVisibility
+class GpuMeshletVisibilityInput : public BaseGpuMeshletVisibilityInput
+{
+public:
+	Mat4 m_viewProjectionMatrix;
+	Mat3x4 m_cameraTransform;
+
+	/// The size of the viewport the visibility results will be used on. Used to kill objects that don't touch the sampling positions.
+	UVec2 m_viewportSize;
+
+	RenderTargetHandle m_hzbRt; ///< Optional.
+};
+
+/// @memberof GpuVisibility
+class PassthroughGpuMeshletVisibilityInput : public BaseGpuMeshletVisibilityInput
+{
+};
+
 /// @memberof GpuVisibility
 /// @memberof GpuVisibility
 class GpuMeshletVisibilityOutput
 class GpuMeshletVisibilityOutput
 {
 {
@@ -190,7 +200,17 @@ public:
 
 
 	/// Perform meshlet GPU visibility.
 	/// Perform meshlet GPU visibility.
 	/// @note Not thread-safe.
 	/// @note Not thread-safe.
-	void populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out);
+	void populateRenderGraph(GpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out)
+	{
+		populateRenderGraphMeshletInternal(false, in, out);
+	}
+
+	/// Perform meshlet GPU visibility.
+	/// @note Not thread-safe.
+	void populateRenderGraph(PassthroughGpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out)
+	{
+		populateRenderGraphMeshletInternal(true, in, out);
+	}
 
 
 private:
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;
@@ -198,7 +218,7 @@ private:
 	Array3d<ShaderProgramPtr, 2, 2, 3> m_distGrProgs;
 	Array3d<ShaderProgramPtr, 2, 2, 3> m_distGrProgs;
 
 
 	ShaderProgramResourcePtr m_meshletCullingProg;
 	ShaderProgramResourcePtr m_meshletCullingProg;
-	Array<ShaderProgramPtr, 2> m_meshletCullingGrProgs;
+	Array2d<ShaderProgramPtr, 2, 2> m_meshletCullingGrProgs;
 
 
 	// Contains quite large buffer that we want want to reuse muptiple times in a single frame.
 	// Contains quite large buffer that we want want to reuse muptiple times in a single frame.
 	class PersistentMemory
 	class PersistentMemory
@@ -265,6 +285,8 @@ private:
 
 
 	void populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out);
 	void populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out);
 
 
+	void populateRenderGraphMeshletInternal(Bool passthrough, BaseGpuMeshletVisibilityInput& in, GpuMeshletVisibilityOutput& out);
+
 	static void computeGpuVisibilityMemoryRequirements(RenderingTechnique t, MemoryRequirements& total, WeakArray<MemoryRequirements> perBucket);
 	static void computeGpuVisibilityMemoryRequirements(RenderingTechnique t, MemoryRequirements& total, WeakArray<MemoryRequirements> perBucket);
 };
 };
 
 

+ 21 - 1
AnKi/ShaderCompiler/MaliOfflineCompiler.cpp

@@ -86,7 +86,7 @@ String MaliOfflineCompilerOut::toString() const
 	return str;
 	return str;
 }
 }
 
 
-Error runMaliOfflineCompiler(CString maliocExecutable, ConstWeakArray<U8> spirv, ShaderType shaderType, MaliOfflineCompilerOut& out)
+Error runMaliOfflineCompiler(ConstWeakArray<U8> spirv, ShaderType shaderType, MaliOfflineCompilerOut& out)
 {
 {
 	out = {};
 	out = {};
 	const U32 rand = g_nextFileId.fetchAdd(1) + getCurrentProcessId();
 	const U32 rand = g_nextFileId.fetchAdd(1) + getCurrentProcessId();
@@ -122,6 +122,18 @@ Error runMaliOfflineCompiler(CString maliocExecutable, ConstWeakArray<U8> spirv,
 	case ShaderType::kCompute:
 	case ShaderType::kCompute:
 		args[0] = "-C";
 		args[0] = "-C";
 		break;
 		break;
+	case ShaderType::kRayGen:
+		args[0] = "--ray_generation";
+		break;
+	case ShaderType::kAnyHit:
+		args[0] = "--ray_any_hit";
+		break;
+	case ShaderType::kClosestHit:
+		args[0] = "--ray_closest_hit";
+		break;
+	case ShaderType::kMiss:
+		args[0] = "--ray_miss";
+		break;
 	default:
 	default:
 		ANKI_ASSERT(0 && "Unhandled case");
 		ANKI_ASSERT(0 && "Unhandled case");
 	}
 	}
@@ -135,6 +147,14 @@ Error runMaliOfflineCompiler(CString maliocExecutable, ConstWeakArray<U8> spirv,
 
 
 	// Execute
 	// Execute
 	I32 exitCode;
 	I32 exitCode;
+#if ANKI_OS_LINUX
+	CString maliocExecutable = ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Linux64/MaliOfflineCompiler/malioc";
+#elif ANKI_OS_WINDOWS
+	CString maliocExecutable = ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Windows64/MaliOfflineCompiler/malioc.exe";
+#else
+	CString maliocExecutable = "nothing";
+	ANKI_ASSERT(0);
+#endif
 	ANKI_CHECK(Process::callProcess(maliocExecutable, args, nullptr, nullptr, exitCode));
 	ANKI_CHECK(Process::callProcess(maliocExecutable, args, nullptr, nullptr, exitCode));
 	if(exitCode != 0)
 	if(exitCode != 0)
 	{
 	{

+ 1 - 1
AnKi/ShaderCompiler/MaliOfflineCompiler.h

@@ -45,7 +45,7 @@ public:
 };
 };
 
 
 /// Run the mali offline compiler and get some info back.
 /// Run the mali offline compiler and get some info back.
-Error runMaliOfflineCompiler(CString maliocExecutable, ConstWeakArray<U8> spirv, ShaderType shaderType, MaliOfflineCompilerOut& out);
+Error runMaliOfflineCompiler(ConstWeakArray<U8> spirv, ShaderType shaderType, MaliOfflineCompilerOut& out);
 /// @}
 /// @}
 
 
 } // end namespace anki
 } // end namespace anki

+ 9 - 1
AnKi/ShaderCompiler/RadeonGpuAnalyzer.cpp

@@ -34,7 +34,7 @@ static CString getPipelineStageString(ShaderType shaderType)
 	return out;
 	return out;
 }
 }
 
 
-Error runRadeonGpuAnalyzer(CString rgaExecutable, ConstWeakArray<U8> spirv, ShaderType shaderType, RgaOutput& out)
+Error runRadeonGpuAnalyzer(ConstWeakArray<U8> spirv, ShaderType shaderType, RgaOutput& out)
 {
 {
 	ANKI_ASSERT(spirv.getSize() > 0);
 	ANKI_ASSERT(spirv.getSize() > 0);
 	const U32 rand = g_nextFileId.fetchAdd(1) + getCurrentProcessId();
 	const U32 rand = g_nextFileId.fetchAdd(1) + getCurrentProcessId();
@@ -64,6 +64,14 @@ Error runRadeonGpuAnalyzer(CString rgaExecutable, ConstWeakArray<U8> spirv, Shad
 	args[6] = spvFilename;
 	args[6] = spvFilename;
 
 
 	I32 exitCode;
 	I32 exitCode;
+#if ANKI_OS_LINUX
+	CString rgaExecutable = ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Linux64/RadeonGpuAnalyzer/rga";
+#elif ANKI_OS_WINDOWS
+	CString rgaExecutable = ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Windows64/RadeonGpuAnalyzer/rga.exe";
+#else
+	CString rgaExecutable = "nothing";
+	ANKI_ASSERT(0);
+#endif
 	ANKI_CHECK(Process::callProcess(rgaExecutable, args, nullptr, nullptr, exitCode));
 	ANKI_CHECK(Process::callProcess(rgaExecutable, args, nullptr, nullptr, exitCode));
 
 
 	if(exitCode != 0)
 	if(exitCode != 0)

+ 1 - 1
AnKi/ShaderCompiler/RadeonGpuAnalyzer.h

@@ -25,7 +25,7 @@ public:
 };
 };
 
 
 /// Run the mali offline compiler and get some info back.
 /// Run the mali offline compiler and get some info back.
-Error runRadeonGpuAnalyzer(CString rgaExecutable, ConstWeakArray<U8> spirv, ShaderType shaderType, RgaOutput& out);
+Error runRadeonGpuAnalyzer(ConstWeakArray<U8> spirv, ShaderType shaderType, RgaOutput& out);
 /// @}
 /// @}
 
 
 } // end namespace anki
 } // end namespace anki

+ 14 - 8
AnKi/Shaders/GpuVisibilityMeshlet.ankiprog

@@ -4,6 +4,7 @@
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
 #pragma anki mutator HZB_TEST 0 1
 #pragma anki mutator HZB_TEST 0 1
+#pragma anki mutator PASSTHROUGH 0 1
 
 
 #pragma anki technique_start comp
 #pragma anki technique_start comp
 
 
@@ -69,12 +70,15 @@ struct Consts
 		Bool cull = false;
 		Bool cull = false;
 
 
 		const MeshletBoundingVolume meshletBoundingVol = g_meshletBoundingVolumes[firstMeshletBoundingVolume + svGroupIndex];
 		const MeshletBoundingVolume meshletBoundingVol = g_meshletBoundingVolumes[firstMeshletBoundingVolume + svGroupIndex];
+
+#if !PASSTHROUGH
+
 		const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 		const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 
 
-#if MESHLET_BACKFACE_CULLING
+#	if MESHLET_BACKFACE_CULLING
 		const Vec4 coneDirAndAng = unpackSnorm4x8(meshletBoundingVol.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
 		const Vec4 coneDirAndAng = unpackSnorm4x8(meshletBoundingVol.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
 		cull = cullBackfaceMeshlet(coneDirAndAng.xyz, coneDirAndAng.w, meshletBoundingVol.m_coneApex, worldTransform, g_consts.m_cameraPos);
 		cull = cullBackfaceMeshlet(coneDirAndAng.xyz, coneDirAndAng.w, meshletBoundingVol.m_coneApex, worldTransform, g_consts.m_cameraPos);
-#endif
+#	endif
 
 
 		const Mat4 wordTransform4 = {worldTransform.m_row0, worldTransform.m_row1, worldTransform.m_row2, Vec4(0.0f, 0.0f, 0.0f, 1.0f)};
 		const Mat4 wordTransform4 = {worldTransform.m_row0, worldTransform.m_row1, worldTransform.m_row2, Vec4(0.0f, 0.0f, 0.0f, 1.0f)};
 		const Mat4 mvp = mul(g_consts.m_viewProjectionMatrix, wordTransform4);
 		const Mat4 mvp = mul(g_consts.m_viewProjectionMatrix, wordTransform4);
@@ -83,21 +87,23 @@ struct Consts
 		F32 aabbMinDepth;
 		F32 aabbMinDepth;
 		projectAabb(meshletBoundingVol.m_aabbMin, meshletBoundingVol.m_aabbMax, mvp, minNdc, maxNdc, aabbMinDepth);
 		projectAabb(meshletBoundingVol.m_aabbMin, meshletBoundingVol.m_aabbMax, mvp, minNdc, maxNdc, aabbMinDepth);
 
 
-#if MESHLET_OUTSIDE_OF_SCREEN_CULLING
+#	if MESHLET_OUTSIDE_OF_SCREEN_CULLING
 		// Outside of the screen
 		// Outside of the screen
 		cull = cull || (any(minNdc > 1.0f) || any(maxNdc < -1.0f));
 		cull = cull || (any(minNdc > 1.0f) || any(maxNdc < -1.0f));
-#endif
+#	endif
 
 
-#if MESHLET_NO_SAMPLING_POINT_CULLING
+#	if MESHLET_NO_SAMPLING_POINT_CULLING
 		// Sampling points test
 		// Sampling points test
 		const Vec2 windowCoordsMin = ndcToUv(minNdc) * g_consts.m_viewportSizef;
 		const Vec2 windowCoordsMin = ndcToUv(minNdc) * g_consts.m_viewportSizef;
 		const Vec2 windowCoordsMax = ndcToUv(maxNdc) * g_consts.m_viewportSizef;
 		const Vec2 windowCoordsMax = ndcToUv(maxNdc) * g_consts.m_viewportSizef;
 		cull = cull || any(round(windowCoordsMin) == round(windowCoordsMax));
 		cull = cull || any(round(windowCoordsMin) == round(windowCoordsMax));
-#endif
+#	endif
 
 
-#if MESHLET_HZB_CULLING
+#	if MESHLET_HZB_CULLING
 		cull = cull || (renderable.m_boneTransformsOffset == 0u && cullHzb(minNdc, maxNdc, aabbMinDepth, g_hzbTexture, g_nearestClampSampler));
 		cull = cull || (renderable.m_boneTransformsOffset == 0u && cullHzb(minNdc, maxNdc, aabbMinDepth, g_hzbTexture, g_nearestClampSampler));
-#endif
+#	endif
+
+#endif // !PASSTHROUGH
 
 
 		if(!cull)
 		if(!cull)
 		{
 		{

二進制
ThirdParty/Bin/Windows64/MaliOfflineCompiler/graphics/Mali-Gxx_r41p0-00rel0.dll → ThirdParty/Bin/Windows64/MaliOfflineCompiler/graphics/Mali-Gxx_r45p0-00rel0.dll


二進制
ThirdParty/Bin/Windows64/MaliOfflineCompiler/graphics/Mali-T600_r23p0-00rel0.dll


二進制
ThirdParty/Bin/Windows64/MaliOfflineCompiler/malioc.exe


+ 11 - 23
Tools/Shader/ShaderProgramBinaryDumpMain.cpp

@@ -172,36 +172,24 @@ Error dumpStats(const ShaderProgramBinary& bin)
 
 
 					// Arm stats
 					// Arm stats
 					MaliOfflineCompilerOut maliocOut;
 					MaliOfflineCompilerOut maliocOut;
-					Error err = runMaliOfflineCompiler(
-#if ANKI_OS_LINUX
-						ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Linux64/MaliOfflineCompiler/malioc",
-#elif ANKI_OS_WINDOWS
-						ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Windows64/MaliOfflineCompiler/malioc.exe",
-#else
-#	error "Not supported"
-#endif
-						codeBlock.m_binary, shaderType, maliocOut);
+					Error err = Error::kNone;
 
 
-					if(err)
+					if(shaderType == ShaderType::kVertex || shaderType == ShaderType::kFragment || shaderType == ShaderType::kCompute)
 					{
 					{
-						ANKI_LOGE("Mali offline compiler failed");
-						ctx.m_error.store(1);
-						break;
+						err = runMaliOfflineCompiler(codeBlock.m_binary, shaderType, maliocOut);
+
+						if(err)
+						{
+							ANKI_LOGE("Mali offline compiler failed");
+							ctx.m_error.store(1);
+							break;
+						}
 					}
 					}
 
 
 					// AMD
 					// AMD
 					RgaOutput rgaOut = {};
 					RgaOutput rgaOut = {};
 #if 0
 #if 0
-					err = runRadeonGpuAnalyzer(
-#	if ANKI_OS_LINUX
-						ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Linux64/RadeonGpuAnalyzer/rga",
-#	elif ANKI_OS_WINDOWS
-						ANKI_SOURCE_DIRECTORY "/ThirdParty/Bin/Windows64/RadeonGpuAnalyzer/rga.exe",
-#	else
-#		error "Not supported"
-#	endif
-						codeBlock.m_binary, shaderType, rgaOut);
-
+					err = runRadeonGpuAnalyzer(codeBlock.m_binary, shaderType, rgaOut);
 					if(err)
 					if(err)
 					{
 					{
 						ANKI_LOGE("Radeon GPU Analyzer compiler failed");
 						ANKI_LOGE("Radeon GPU Analyzer compiler failed");