Parcourir la source

Optimize rendering of static lights

Panagiotis Christopoulos Charitos il y a 2 ans
Parent
commit
8ecc3cd0fe

+ 5 - 0
AnKi/Core/GpuMemory/GpuSceneBuffer.h

@@ -101,6 +101,11 @@ public:
 		return m_pool.getGpuBuffer();
 	}
 
+	BufferOffsetRange getBufferOffsetRange() const
+	{
+		return {&m_pool.getGpuBuffer(), 0, kMaxPtrSize};
+	}
+
 private:
 	SegregatedListsGpuMemoryPool m_pool;
 

+ 68 - 5
AnKi/Renderer/ShadowMapping.cpp

@@ -9,10 +9,12 @@
 #include <AnKi/Renderer/PrimaryNonRenderableVisibility.h>
 #include <AnKi/Core/App.h>
 #include <AnKi/Core/StatsSet.h>
+#include <AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h>
 #include <AnKi/Util/ThreadHive.h>
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Scene/Components/LightComponent.h>
 #include <AnKi/Scene/Components/CameraComponent.h>
+#include <AnKi/Scene/RenderStateBucket.h>
 
 namespace anki {
 
@@ -70,6 +72,8 @@ public:
 	Mat3x4 m_viewMat;
 
 	GpuVisibilityOutput m_visOut;
+
+	BufferOffsetRange m_clearTileIndirectArgs;
 };
 
 Error ShadowMapping::init()
@@ -109,10 +113,8 @@ Error ShadowMapping::initInternal()
 	m_fbDescr.m_depthStencilAttachment.m_loadOperation = AttachmentLoadOperation::kLoad;
 	m_fbDescr.bake();
 
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/ShadowmappingClearDepth.ankiprogbin", m_clearDepthProg));
-	const ShaderProgramResourceVariant* variant;
-	m_clearDepthProg->getOrCreateVariant(variant);
-	m_clearDepthGrProg.reset(&variant->getProgram());
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/ShadowMappingClearDepth.ankiprogbin", m_clearDepthProg, m_clearDepthGrProg));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/ShadowMappingVetVisibility.ankiprogbin", m_vetVisibilityProg, m_vetVisibilityGrProg));
 
 	for(U32 i = 0; i < kMaxShadowCascades; ++i)
 	{
@@ -457,10 +459,19 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			visIn.m_rgraph = &rgraph;
 			visIn.m_pointOfTest = lightc->getWorldPosition();
 			visIn.m_testRadius = lightc->getRadius();
+			visIn.m_hashVisibles = true;
 
 			GpuVisibilityOutput visOut;
 			getRenderer().getGpuVisibility().populateRenderGraph(visIn, visOut);
 
+			// Vet visibility
+			const Bool renderAllways = !(result & TileAllocatorResult2::kTileCached);
+			BufferOffsetRange clearTileIndirectArgs;
+			if(!renderAllways)
+			{
+				clearTileIndirectArgs = vetVisibilityPass("Shadows visibility: Vet point light", *lightc, visOut, rgraph);
+			}
+
 			// Add work
 			for(U32 face = 0; face < 6; ++face)
 			{
@@ -475,6 +486,7 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 				work.m_viewMat = frustum.getViewMatrix();
 				work.m_viewport = atlasViewports[face];
 				work.m_visOut = visOut;
+				work.m_clearTileIndirectArgs = clearTileIndirectArgs;
 			}
 		}
 		else
@@ -518,10 +530,19 @@ void ShadowMapping::processLights(RenderingContext& ctx)
 			visIn.m_lodDistances = lodDistances;
 			visIn.m_rgraph = &rgraph;
 			visIn.m_viewProjectionMatrix = lightc->getSpotLightViewProjectionMatrix();
+			visIn.m_hashVisibles = true;
 
 			GpuVisibilityOutput visOut;
 			getRenderer().getGpuVisibility().populateRenderGraph(visIn, visOut);
 
+			// Vet visibility
+			const Bool renderAllways = !(result & TileAllocatorResult2::kTileCached);
+			BufferOffsetRange clearTileIndirectArgs;
+			if(!renderAllways)
+			{
+				clearTileIndirectArgs = vetVisibilityPass("Shadows visibility: Vet spot light", *lightc, visOut, rgraph);
+			}
+
 			// Add work
 			ViewportWorkItem& work = *workItems.emplaceBack();
 			work.m_viewProjMat = lightc->getSpotLightViewProjectionMatrix();
@@ -568,7 +589,15 @@ void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 			cmdb.bindShaderProgram(m_clearDepthGrProg.get());
 			cmdb.setDepthCompareOperation(CompareOperation::kAlways);
 			cmdb.setPolygonOffset(0.0f, 0.0f);
-			cmdb.draw(PrimitiveTopology::kTriangles, 3, 1);
+
+			if(work.m_clearTileIndirectArgs.m_buffer)
+			{
+				cmdb.drawIndirect(PrimitiveTopology::kTriangles, 1, work.m_clearTileIndirectArgs.m_offset, work.m_clearTileIndirectArgs.m_buffer);
+			}
+			else
+			{
+				cmdb.draw(PrimitiveTopology::kTriangles, 3, 1);
+			}
 
 			// Restore state
 			cmdb.setDepthCompareOperation(CompareOperation::kLess);
@@ -588,4 +617,38 @@ void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 	}
 }
 
+BufferOffsetRange ShadowMapping::vetVisibilityPass(CString passName, const LightComponent& lightc, const GpuVisibilityOutput& visOut,
+												   RenderGraphDescription& rgraph) const
+{
+	BufferOffsetRange clearTileIndirectArgs;
+
+	clearTileIndirectArgs = GpuVisibleTransientMemoryPool::getSingleton().allocate(sizeof(DrawIndirectArgs));
+
+	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passName);
+
+	// The shader doesn't actually write to the handle but have it as a write dependency for the drawer to correctly wait for this pass
+	pass.newBufferDependency(visOut.m_someBufferHandle, BufferUsageBit::kStorageComputeWrite);
+
+	pass.setWork([this, &lightc, hashBuff = visOut.m_visiblesHashBuffer, mdiBuff = visOut.m_mdiDrawCountsBuffer,
+				  clearTileIndirectArgs](RenderPassWorkContext& rpass) {
+		CommandBuffer& cmdb = *rpass.m_commandBuffer;
+
+		cmdb.bindShaderProgram(m_vetVisibilityGrProg.get());
+
+		const UVec4 lightIndex(lightc.getArrayIndex());
+		cmdb.setPushConstants(&lightIndex, sizeof(lightIndex));
+
+		cmdb.bindStorageBuffer(0, 0, hashBuff);
+		cmdb.bindStorageBuffer(0, 1, mdiBuff);
+		cmdb.bindStorageBuffer(0, 2, GpuSceneArrays::Light::getSingleton().getBufferOffsetRange());
+		cmdb.bindStorageBuffer(0, 3, GpuSceneArrays::LightVisibleRenderablesHash::getSingleton().getBufferOffsetRange());
+		cmdb.bindStorageBuffer(0, 4, clearTileIndirectArgs);
+
+		ANKI_ASSERT(RenderStateBucketContainer::getSingleton().getBucketCount(RenderingTechnique::kDepth) <= 64 && "TODO");
+		cmdb.dispatchCompute(1, 1, 1);
+	});
+
+	return clearTileIndirectArgs;
+}
+
 } // end namespace anki

+ 7 - 0
AnKi/Renderer/ShadowMapping.h

@@ -13,6 +13,7 @@
 namespace anki {
 
 // Forward
+class GpuVisibilityOutput;
 extern NumericCVar<U32> g_shadowMappingPcfCVar;
 
 /// @addtogroup renderer
@@ -51,6 +52,9 @@ private:
 	ShaderProgramResourcePtr m_clearDepthProg;
 	ShaderProgramPtr m_clearDepthGrProg;
 
+	ShaderProgramResourcePtr m_vetVisibilityProg;
+	ShaderProgramPtr m_vetVisibilityGrProg;
+
 	Array<RenderTargetDescription, kMaxShadowCascades> m_cascadeHzbRtDescrs;
 
 	class
@@ -74,6 +78,9 @@ private:
 	void chooseDetail(const Vec3& cameraOrigin, const LightComponent& lightc, Vec2 lodDistances, U32& tileAllocatorHierarchy) const;
 
 	void runShadowMapping(RenderPassWorkContext& rgraphCtx);
+
+	BufferOffsetRange vetVisibilityPass(CString passName, const LightComponent& lightc, const GpuVisibilityOutput& visOut,
+										RenderGraphDescription& rgraph) const;
 };
 /// @}
 

+ 2 - 3
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -148,10 +148,9 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	const BufferHandle counterBufferHandle = in.m_rgraph->importBuffer(BufferUsageBit::kNone, counterBuffer);
 	out.m_someBufferHandle = counterBufferHandle;
 
-	BufferOffsetRange hashBuffer;
 	if(in.m_hashVisibles)
 	{
-		hashBuffer = {counterBuffer.m_buffer, 0, sizeof(GpuVisibilityHash)};
+		out.m_visiblesHashBuffer = {counterBuffer.m_buffer, counterBuffer.m_offset, sizeof(GpuVisibilityHash)};
 	}
 
 	// Zero some stuff
@@ -187,7 +186,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
 				  technique = in.m_technique, mdiDrawCountsBuffer = out.m_mdiDrawCountsBuffer,
 				  instanceRateRenderables = out.m_instanceRateRenderablesBuffer, indirectArgs = out.m_drawIndexedIndirectArgsBuffer, aabbCount,
-				  visibleAabbsBuffer = out.m_visibleAaabbIndicesBuffer, hashBuffer](RenderPassWorkContext& rpass) {
+				  visibleAabbsBuffer = out.m_visibleAaabbIndicesBuffer, hashBuffer = out.m_visiblesHashBuffer](RenderPassWorkContext& rpass) {
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
 		const Bool gatherAabbIndices = visibleAabbsBuffer.m_buffer != nullptr;

+ 39 - 6
AnKi/Scene/Components/LightComponent.cpp

@@ -43,7 +43,8 @@ void LightComponent::setLightComponentType(LightComponentType newType)
 	{
 		m_type = newType;
 		m_shadowAtlasUvViewportCount = 0;
-		m_dirty = true;
+		m_shapeDirty = true;
+		m_otherDirty = true;
 		m_uuid = 0;
 
 		if(newType == LightComponentType::kDirectional)
@@ -62,8 +63,7 @@ void LightComponent::setLightComponentType(LightComponentType newType)
 Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 {
 	const Bool moveUpdated = info.m_node->movedThisFrame();
-	updated = moveUpdated || m_dirty;
-	m_dirty = false;
+	updated = moveUpdated || m_shapeDirty || m_otherDirty;
 
 	if(moveUpdated)
 	{
@@ -83,12 +83,27 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 		const Bool reallyShadow = m_shadow && m_shadowAtlasUvViewportCount == 6;
 
+		// Upload the hash
+		if(reallyShadow)
+		{
+			if(!m_hash.isValid())
+			{
+				m_hash.allocate();
+			}
+
+			if(m_shapeDirty || moveUpdated)
+			{
+				GpuSceneLightVisibleRenderablesHash hash = {};
+				m_hash.uploadToGpuScene(hash);
+			}
+		}
+
 		// Upload to the GPU scene
 		GpuSceneLight gpuLight = {};
 		gpuLight.m_position = m_worldTransform.getOrigin().xyz();
 		gpuLight.m_radius = m_point.m_radius;
 		gpuLight.m_diffuseColor = m_diffColor.xyz();
-		gpuLight.m_squareRadiusOverOne = 1.0f / (m_point.m_radius * m_point.m_radius);
+		gpuLight.m_visibleRenderablesHashIndex = (reallyShadow) ? m_hash.getIndex() : 0;
 		gpuLight.m_flags = GpuSceneLightFlag::kPointLight;
 		gpuLight.m_flags |= (reallyShadow) ? GpuSceneLightFlag::kShadow : GpuSceneLightFlag::kNone;
 		gpuLight.m_arrayIndex = getArrayIndex();
@@ -117,12 +132,27 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 		const Bool reallyShadow = m_shadow && m_shadowAtlasUvViewportCount == 1;
 
+		// Upload the hash
+		if(reallyShadow)
+		{
+			if(!m_hash.isValid())
+			{
+				m_hash.allocate();
+			}
+
+			if(m_shapeDirty || moveUpdated)
+			{
+				GpuSceneLightVisibleRenderablesHash hash = {};
+				m_hash.uploadToGpuScene(hash);
+			}
+		}
+
 		// Upload to the GPU scene
 		GpuSceneLight gpuLight = {};
 		gpuLight.m_position = m_worldTransform.getOrigin().xyz();
 		gpuLight.m_radius = m_spot.m_distance;
 		gpuLight.m_diffuseColor = m_diffColor.xyz();
-		gpuLight.m_squareRadiusOverOne = 1.0f / (m_spot.m_distance * m_spot.m_distance);
+		gpuLight.m_visibleRenderablesHashIndex = (reallyShadow) ? m_hash.getIndex() : 0;
 		gpuLight.m_flags = GpuSceneLightFlag::kSpotLight;
 		gpuLight.m_flags |= (reallyShadow) ? GpuSceneLightFlag::kShadow : GpuSceneLightFlag::kNone;
 		gpuLight.m_arrayIndex = getArrayIndex();
@@ -168,6 +198,9 @@ Error LightComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 		m_gpuSceneLight.free();
 	}
 
+	m_shapeDirty = false;
+	m_otherDirty = false;
+
 	return Error::kNone;
 }
 
@@ -324,7 +357,7 @@ void LightComponent::setShadowAtlasUvViewports(ConstWeakArray<Vec4> viewports)
 			m_shadowAtlasUvViewports[i] = viewports[i];
 		}
 
-		m_dirty = true;
+		m_shapeDirty = true;
 	}
 }
 

+ 13 - 8
AnKi/Scene/Components/LightComponent.h

@@ -52,13 +52,13 @@ public:
 	void setDiffuseColor(const Vec4& x)
 	{
 		m_diffColor = x;
-		m_dirty = true;
+		m_otherDirty = true;
 	}
 
 	void setRadius(F32 x)
 	{
 		m_point.m_radius = x;
-		m_dirty = true;
+		m_shapeDirty = true;
 	}
 
 	F32 getRadius() const
@@ -69,7 +69,7 @@ public:
 	void setDistance(F32 x)
 	{
 		m_spot.m_distance = x;
-		m_dirty = true;
+		m_shapeDirty = true;
 	}
 
 	F32 getDistance() const
@@ -80,7 +80,7 @@ public:
 	void setInnerAngle(F32 ang)
 	{
 		m_spot.m_innerAngle = ang;
-		m_dirty = true;
+		m_shapeDirty = true;
 	}
 
 	F32 getInnerAngle() const
@@ -91,7 +91,7 @@ public:
 	void setOuterAngle(F32 ang)
 	{
 		m_spot.m_outerAngle = ang;
-		m_dirty = true;
+		m_shapeDirty = true;
 	}
 
 	F32 getOuterAngle() const
@@ -106,8 +106,11 @@ public:
 
 	void setShadowEnabled(const Bool x)
 	{
-		m_shadow = x;
-		m_dirty = true;
+		if(x != m_shadow)
+		{
+			m_shadow = x;
+			m_shapeDirty = m_otherDirty = true;
+		}
 	}
 
 	Vec3 getDirection() const
@@ -172,6 +175,7 @@ private:
 	Spot m_spot;
 
 	GpuSceneArrays::Light::Allocation m_gpuSceneLight;
+	GpuSceneArrays::LightVisibleRenderablesHash::Allocation m_hash;
 
 	Array<Vec4, 6> m_shadowAtlasUvViewports;
 
@@ -180,7 +184,8 @@ private:
 	LightComponentType m_type;
 
 	U8 m_shadow : 1 = false;
-	U8 m_dirty : 1 = true;
+	U8 m_shapeDirty : 1 = true;
+	U8 m_otherDirty : 1 = true;
 	U8 m_shadowAtlasUvViewportCount : 3 = 0;
 
 	Error update(SceneComponentUpdateInfo& info, Bool& updated) override;

+ 2 - 0
AnKi/Scene/GpuSceneArrays.def.h

@@ -19,6 +19,8 @@ ANKI_CAT_TYPE(MeshLod, ANKI_MESH_ARR, 0, g_minGpuSceneMeshesCVar)
 ANKI_CAT_SEPARATOR
 ANKI_CAT_TYPE(ParticleEmitter, GpuSceneParticleEmitter, 0, g_minGpuSceneParticleEmittersCVar)
 ANKI_CAT_SEPARATOR
+ANKI_CAT_TYPE(LightVisibleRenderablesHash, GpuSceneLightVisibleRenderablesHash, 0, g_minGpuSceneLightsCVar)
+ANKI_CAT_SEPARATOR
 ANKI_CAT_TYPE(Light, GpuSceneLight, 0, g_minGpuSceneLightsCVar)
 ANKI_CAT_SEPARATOR
 ANKI_CAT_TYPE(ReflectionProbe, GpuSceneReflectionProbe, 0, g_minGpuSceneReflectionProbesCVar)

+ 1 - 1
AnKi/Shaders/GpuVisibility.ankiprog

@@ -150,7 +150,7 @@ struct DrawIndirectArgsWithPadding
 	}
 #	endif // HZB_TEST
 #else // DISTANCE_TEST == 1
-	if(!testSphereSphereCollision(aabb.m_sphereCenter, aabb.m_sphereRadius, g_unis.m_pointOfTest, aabb.m_sphereRadius))
+	if(!testSphereSphereCollision(aabb.m_sphereCenter, aabb.m_sphereRadius, g_unis.m_pointOfTest, g_unis.m_testRadius))
 	{
 		return;
 	}

+ 7 - 1
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -81,6 +81,12 @@ enum class GpuSceneLightFlag : U32
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(GpuSceneLightFlag)
 
+/// A hash of all visible renderables. If it matches between vis tests then skip the drawcalls. Touched only by the GPU.
+struct GpuSceneLightVisibleRenderablesHash
+{
+	U32 m_hash;
+};
+
 /// Point or spot light.
 struct GpuSceneLight
 {
@@ -88,7 +94,7 @@ struct GpuSceneLight
 	RF32 m_radius ANKI_CPP_CODE(= 0.0f); ///< Radius.
 
 	RVec3 m_diffuseColor;
-	RF32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	U32 m_visibleRenderablesHashIndex; ///< Points to a GpuSceneLightVisibleRenderablesHash
 
 	GpuSceneLightFlag m_flags;
 	U32 m_arrayIndex; ///< Array index of the LightComponent in the CPU scene.

+ 0 - 0
AnKi/Shaders/ShadowmappingClearDepth.ankiprog → AnKi/Shaders/ShadowMappingClearDepth.ankiprog


+ 70 - 0
AnKi/Shaders/ShadowMappingVetVisibility.ankiprog

@@ -0,0 +1,70 @@
+// Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// A very simle compute shader that checks if the light shadows needs rendering or not.
+
+#pragma anki start comp
+
+#include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
+
+[[vk::binding(0)]] StructuredBuffer<GpuVisibilityHash> g_hash;
+[[vk::binding(1)]] RWStructuredBuffer<U32> g_mdiDrawCounts;
+[[vk::binding(2)]] RWStructuredBuffer<GpuSceneLight> g_lights;
+[[vk::binding(3)]] RWStructuredBuffer<GpuSceneLightVisibleRenderablesHash> g_lightHashes;
+[[vk::binding(4)]] RWStructuredBuffer<DrawIndirectArgs> g_clearTileIndirectArgs;
+
+struct Uniforms
+{
+	U32 m_lightIndex;
+	U32 m_padding0;
+	U32 m_padding1;
+	U32 m_padding2;
+};
+
+[[vk::push_constant]] ConstantBuffer<Uniforms> g_unis;
+
+groupshared U32 s_renderLight;
+
+[numthreads(64, 1, 1)] void main(U32 svGroupIndex : SV_GROUPINDEX)
+{
+	if(svGroupIndex == 0)
+	{
+		const GpuSceneLight light = g_lights[g_unis.m_lightIndex];
+		const U32 crntHash = g_lightHashes[light.m_visibleRenderablesHashIndex].m_hash;
+		s_renderLight = crntHash != g_hash[0].m_renderablesHash || g_hash[0].m_containsDeformable == 1;
+
+		if(s_renderLight)
+		{
+			g_lightHashes[light.m_visibleRenderablesHashIndex].m_hash = g_hash[0].m_renderablesHash;
+
+			g_clearTileIndirectArgs[0] = (DrawIndirectArgs)0;
+			g_clearTileIndirectArgs[0].m_vertexCount = 3;
+			g_clearTileIndirectArgs[0].m_instanceCount = 1;
+		}
+		else
+		{
+			g_clearTileIndirectArgs[0] = (DrawIndirectArgs)0;
+		}
+	}
+
+	GroupMemoryBarrierWithGroupSync();
+
+	if(s_renderLight == 0)
+	{
+		// Won't render, nullify MDI counts
+
+		U32 renderStateBucketCount, unused;
+		g_mdiDrawCounts.GetDimensions(renderStateBucketCount, unused);
+
+		if(svGroupIndex < renderStateBucketCount)
+		{
+			g_mdiDrawCounts[svGroupIndex] = 0;
+		}
+	}
+}
+
+#pragma anki end