Browse Source

Fix light binning

Panagiotis Christopoulos Charitos 4 months ago
parent
commit
e1f4d6d91e

+ 7 - 4
AnKi/Renderer/AccelerationStructureBuilder.cpp

@@ -65,16 +65,19 @@ void AccelerationStructureBuilder::populateRenderGraph(RenderingContext& ctx)
 	// Light visibility
 	// Light visibility
 	{
 	{
 		GpuVisibilityLocalLightsInput in;
 		GpuVisibilityLocalLightsInput in;
-		in.m_cellCounts = UVec3(g_lightGridSizeXYCVar, g_lightGridSizeXYCVar, g_lightGridSizeZCVar);
-		in.m_cellSize = Vec3(g_lightGridCellSizeXYCVar, g_lightGridCellSizeXYCVar, g_lightGridCellSizeZCVar);
+		in.m_cellCounts = UVec3(g_lightGridCellCountXZCVar, g_lightGridCellCountYCVar, g_lightGridCellCountXZCVar);
+		in.m_cellSize = Vec3(g_lightGridSizeXZCVar, g_lightGridSizeYCVar, g_lightGridSizeXZCVar) / Vec3(in.m_cellCounts);
 		in.m_cameraPosition = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
 		in.m_cameraPosition = ctx.m_matrices.m_cameraTransform.getTranslationPart().xyz();
 		in.m_lookDirection = -ctx.m_matrices.m_cameraTransform.getRotationPart().getZAxis();
 		in.m_lookDirection = -ctx.m_matrices.m_cameraTransform.getRotationPart().getZAxis();
 		in.m_lightIndexListSize = g_lightIndexListSizeCVar;
 		in.m_lightIndexListSize = g_lightIndexListSizeCVar;
 		in.m_rgraph = &ctx.m_renderGraphDescr;
 		in.m_rgraph = &ctx.m_renderGraphDescr;
 
 
-		GpuVisibilityLocalLightsOutput out;
+		getGpuVisibilityLocalLights().populateRenderGraph(in, m_runCtx.m_lightVisInfo);
 
 
-		getGpuVisibilityLocalLights().populateRenderGraph(in, out);
+		m_runCtx.m_lightGridConsts.m_volumeMin = m_runCtx.m_lightVisInfo.m_lightGridMin;
+		m_runCtx.m_lightGridConsts.m_volumeMax = m_runCtx.m_lightVisInfo.m_lightGridMax;
+		m_runCtx.m_lightGridConsts.m_cellCounts = in.m_cellCounts;
+		m_runCtx.m_lightGridConsts.m_cellSize = in.m_cellSize;
 	}
 	}
 }
 }
 
 

+ 30 - 10
AnKi/Renderer/AccelerationStructureBuilder.h

@@ -6,23 +6,34 @@
 #pragma once
 #pragma once
 
 
 #include <AnKi/Renderer/RendererObject.h>
 #include <AnKi/Renderer/RendererObject.h>
+#include <AnKi/Renderer/Utils/GpuVisibility.h>
 
 
 namespace anki {
 namespace anki {
 
 
 /// @addtogroup renderer
 /// @addtogroup renderer
 /// @{
 /// @{
-
 inline NumericCVar<F32>
 inline NumericCVar<F32>
 	g_rayTracingExtendedFrustumDistanceCVar("R", "RayTracingExtendedFrustumDistance", 200.0f, 10.0f, 10000.0f,
 	g_rayTracingExtendedFrustumDistanceCVar("R", "RayTracingExtendedFrustumDistance", 200.0f, 10.0f, 10000.0f,
 											"Every object that its distance from the camera is bellow that value will take part in ray tracing");
 											"Every object that its distance from the camera is bellow that value will take part in ray tracing");
 
 
-inline NumericCVar<U32> g_lightGridSizeXYCVar("R", "LightGridSizeXY", 128, 1, 1024, "The number of cells in the X and Y axis");
-inline NumericCVar<U32> g_lightGridSizeZCVar("R", "LightGridSizeZ", 4, 1, 1024, "The number of cells in the Z axis");
-inline NumericCVar<F32> g_lightGridCellSizeXYCVar("R", "LightGridCellSizeXY", 2.0f, 0.5f, 1000.0f, "The cell size in the X and Y dimensions");
-inline NumericCVar<F32> g_lightGridCellSizeZCVar("R", "LightGridCellSizeZ", 25.0f, 0.5f, 1000.0f, "The cell size in the Z dimension");
+inline NumericCVar<U32> g_lightGridCellCountXZCVar("R", "LightGridCellCountXZ", 64, 1, 1024, "The number of cells in the X and Z axis");
+inline NumericCVar<U32> g_lightGridCellCountYCVar("R", "LightGridCellCountY", 4, 1, 1024, "The number of cells in the Y axis");
+inline NumericCVar<F32> g_lightGridSizeXZCVar("R", "LightGridSizeXZ", 128.0f, 10.0f, 10000.0f,
+											  "The size of the grid volume in the X and Z dimensions");
+inline NumericCVar<F32> g_lightGridSizeYCVar("R", "LightGridSizeY", 64.0f, 10.0f, 10000.0f, "The size of the grid in the Y dimension");
 inline NumericCVar<U32> g_lightIndexListSizeCVar("R", "LightIndexListSize", 64 * 1024, 128, 256 * 1024, "The light index list size");
 inline NumericCVar<U32> g_lightIndexListSizeCVar("R", "LightIndexListSize", 64 * 1024, 128, 256 * 1024, "The light index list size");
 
 
-/// Build acceleration structures.
+/// @memberof AccelerationStructureBuilder
+class AccelerationStructureVisibilityInfo
+{
+public:
+	BufferHandle m_depedency; ///< Dependency for the buffer views bellow.
+
+	BufferView m_visibleRenderablesBuffer;
+	BufferView m_buildSbtIndirectArgsBuffer;
+};
+
+/// Builds acceleration structures and also bins lights to some sort of grid.
 class AccelerationStructureBuilder : public RendererObject
 class AccelerationStructureBuilder : public RendererObject
 {
 {
 public:
 public:
@@ -38,11 +49,17 @@ public:
 		return m_runCtx.m_tlasHandle;
 		return m_runCtx.m_tlasHandle;
 	}
 	}
 
 
-	void getVisibilityInfo(BufferHandle& depedency, BufferView& visibleRenderables, BufferView& buildSbtIndirectArgs) const
+	void getVisibilityInfo(AccelerationStructureVisibilityInfo& asVisInfo, GpuVisibilityLocalLightsOutput& lightVisInfo) const
 	{
 	{
-		depedency = m_runCtx.m_dependency;
-		visibleRenderables = m_runCtx.m_visibleRenderablesBuff;
-		buildSbtIndirectArgs = m_runCtx.m_buildSbtIndirectArgsBuff;
+		asVisInfo.m_depedency = m_runCtx.m_dependency;
+		asVisInfo.m_visibleRenderablesBuffer = m_runCtx.m_visibleRenderablesBuff;
+		asVisInfo.m_buildSbtIndirectArgsBuffer = m_runCtx.m_buildSbtIndirectArgsBuff;
+		lightVisInfo = m_runCtx.m_lightVisInfo;
+	}
+
+	const LocalLightsGridConstants& getLocalLightsGridConstants() const
+	{
+		return m_runCtx.m_lightGridConsts;
 	}
 	}
 
 
 public:
 public:
@@ -55,6 +72,9 @@ public:
 		BufferHandle m_dependency;
 		BufferHandle m_dependency;
 		BufferView m_visibleRenderablesBuff;
 		BufferView m_visibleRenderablesBuff;
 		BufferView m_buildSbtIndirectArgsBuff;
 		BufferView m_buildSbtIndirectArgsBuff;
+
+		GpuVisibilityLocalLightsOutput m_lightVisInfo;
+		LocalLightsGridConstants m_lightGridConsts = {};
 	} m_runCtx;
 	} m_runCtx;
 };
 };
 /// @}
 /// @}

+ 6 - 5
AnKi/Renderer/IndirectDiffuse.cpp

@@ -68,9 +68,9 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 	BufferHandle sbtHandle;
 	BufferHandle sbtHandle;
 	BufferView sbtBuffer;
 	BufferView sbtBuffer;
 	{
 	{
-		BufferHandle visibilityDep;
-		BufferView visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff;
-		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff);
+		AccelerationStructureVisibilityInfo asVis;
+		GpuVisibilityLocalLightsOutput localLightsVis;
+		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(asVis, localLightsVis);
 
 
 		// Allocate SBT
 		// Allocate SBT
 		U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
 		U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
@@ -91,10 +91,11 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 		// Create the pass
 		// Create the pass
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtIndirectDiffuse build SBT");
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtIndirectDiffuse build SBT");
 
 
-		rpass.newBufferDependency(visibilityDep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
+		rpass.newBufferDependency(asVis.m_depedency, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
 		rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
 
 
-		rpass.setWork([this, buildSbtIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
+		rpass.setWork([this, buildSbtIndirectArgsBuff = asVis.m_buildSbtIndirectArgsBuffer, sbtBuffer,
+					   visibleRenderableIndicesBuff = asVis.m_visibleRenderablesBuffer](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(IndirectDiffuseSbtBuild);
 			ANKI_TRACE_SCOPED_EVENT(IndirectDiffuseSbtBuild);
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 

+ 2 - 0
AnKi/Renderer/Reflections.cpp

@@ -241,6 +241,8 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSrv(7, 0, getClusterBinning().getClustersBuffer());
 			cmdb.bindSrv(7, 0, getClusterBinning().getClustersBuffer());
 			rgraphCtx.bindSrv(8, 0, getShadowMapping().getShadowmapRt());
 			rgraphCtx.bindSrv(8, 0, getShadowMapping().getShadowmapRt());
 			rgraphCtx.bindSrv(9, 0, classTileMapRt);
 			rgraphCtx.bindSrv(9, 0, classTileMapRt);
+			cmdb.bindSrv(10, 0, getClusterBinning().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kLight));
+			cmdb.bindSrv(11, 0, getClusterBinning().getPackedObjectsBuffer(GpuSceneNonRenderableObjectType::kLight));
 
 
 			rgraphCtx.bindUav(0, 0, transientRt1);
 			rgraphCtx.bindUav(0, 0, transientRt1);
 			rgraphCtx.bindUav(1, 0, hitPosAndDepthRt);
 			rgraphCtx.bindUav(1, 0, hitPosAndDepthRt);

+ 8 - 3
AnKi/Renderer/Renderer.cpp

@@ -400,8 +400,8 @@ void Renderer::writeGlobalRendererConstants(RenderingContext& ctx, GlobalRendere
 
 
 		out.m_diffuseColor = dirLight->getDiffuseColor().xyz();
 		out.m_diffuseColor = dirLight->getDiffuseColor().xyz();
 		out.m_power = dirLight->getLightPower();
 		out.m_power = dirLight->getLightPower();
-		out.m_shadowCascadeCount_31bit_active_1bit = shadowCascadeCount << 1u;
-		out.m_shadowCascadeCount_31bit_active_1bit |= 1;
+		out.m_shadowCascadeCount = shadowCascadeCount;
+		out.m_active = 1;
 		out.m_direction = dirLight->getDirection();
 		out.m_direction = dirLight->getDirection();
 		out.m_shadowCascadeDistances =
 		out.m_shadowCascadeDistances =
 			Vec4(g_shadowCascade0DistanceCVar, g_shadowCascade1DistanceCVar, g_shadowCascade2DistanceCVar, g_shadowCascade3DistanceCVar);
 			Vec4(g_shadowCascade0DistanceCVar, g_shadowCascade1DistanceCVar, g_shadowCascade2DistanceCVar, g_shadowCascade3DistanceCVar);
@@ -416,7 +416,7 @@ void Renderer::writeGlobalRendererConstants(RenderingContext& ctx, GlobalRendere
 	}
 	}
 	else
 	else
 	{
 	{
-		consts.m_directionalLight.m_shadowCascadeCount_31bit_active_1bit = 0;
+		zeroMemory(consts.m_directionalLight);
 	}
 	}
 
 
 	// Sky
 	// Sky
@@ -443,6 +443,11 @@ void Renderer::writeGlobalRendererConstants(RenderingContext& ctx, GlobalRendere
 		memcpy(&consts.m_indirectDiffuseClipmaps, &m_indirectDiffuseClipmaps->getClipmapConsts(), sizeof(consts.m_indirectDiffuseClipmaps));
 		memcpy(&consts.m_indirectDiffuseClipmaps, &m_indirectDiffuseClipmaps->getClipmapConsts(), sizeof(consts.m_indirectDiffuseClipmaps));
 	}
 	}
 
 
+	if(m_accelerationStructureBuilder)
+	{
+		memcpy(&consts.m_localLightsGrid, &m_accelerationStructureBuilder->getLocalLightsGridConstants(), sizeof(consts.m_localLightsGrid));
+	}
+
 	outConsts = consts;
 	outConsts = consts;
 }
 }
 
 

+ 29 - 6
AnKi/Renderer/RendererObject.cpp

@@ -176,9 +176,9 @@ void RtMaterialFetchRendererObject::buildShaderBindingTablePass(CString passName
 																U32 sbtRecordSize, RenderGraphBuilder& rgraph, BufferHandle& sbtHandle,
 																U32 sbtRecordSize, RenderGraphBuilder& rgraph, BufferHandle& sbtHandle,
 																BufferView& sbtBuffer)
 																BufferView& sbtBuffer)
 {
 {
-	BufferHandle visibilityDep;
-	BufferView visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff;
-	getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, buildSbtIndirectArgsBuff);
+	AccelerationStructureVisibilityInfo asVis;
+	GpuVisibilityLocalLightsOutput lightVis;
+	getAccelerationStructureBuilder().getVisibilityInfo(asVis, lightVis);
 
 
 	// Allocate SBT
 	// Allocate SBT
 	U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
 	U32 sbtAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
@@ -193,11 +193,12 @@ void RtMaterialFetchRendererObject::buildShaderBindingTablePass(CString passName
 	// Create the pass
 	// Create the pass
 	NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass(passName);
 	NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass(passName);
 
 
-	rpass.newBufferDependency(visibilityDep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
+	rpass.newBufferDependency(asVis.m_depedency, BufferUsageBit::kIndirectCompute | BufferUsageBit::kSrvCompute);
 	rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
 	rpass.newBufferDependency(sbtHandle, BufferUsageBit::kUavCompute);
 
 
-	rpass.setWork([this, buildSbtIndirectArgsBuff, sbtBuffer, visibleRenderableIndicesBuff, lib = ShaderProgramPtr(library), sbtRecordSize,
-				   raygenHandleIdx, missHandleIdx](RenderPassWorkContext& rgraphCtx) {
+	rpass.setWork([this, buildSbtIndirectArgsBuff = asVis.m_buildSbtIndirectArgsBuffer, sbtBuffer,
+				   visibleRenderableIndicesBuff = asVis.m_visibleRenderablesBuffer, lib = ShaderProgramPtr(library), sbtRecordSize, raygenHandleIdx,
+				   missHandleIdx](RenderPassWorkContext& rgraphCtx) {
 		ANKI_TRACE_SCOPED_EVENT(btBuild);
 		ANKI_TRACE_SCOPED_EVENT(btBuild);
 		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 		CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 
@@ -271,6 +272,14 @@ void RtMaterialFetchRendererObject::setRgenSpace2Dependencies(RenderPassBase& pa
 	pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvDispatchRays);
 	pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvDispatchRays);
 	pass.newTextureDependency(getGBuffer().getColorRt(1), TextureUsageBit::kSrvDispatchRays);
 	pass.newTextureDependency(getGBuffer().getColorRt(1), TextureUsageBit::kSrvDispatchRays);
 	pass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvDispatchRays);
 	pass.newTextureDependency(getGBuffer().getColorRt(2), TextureUsageBit::kSrvDispatchRays);
+
+	{
+		AccelerationStructureVisibilityInfo asVis;
+		GpuVisibilityLocalLightsOutput lightVis;
+		getAccelerationStructureBuilder().getVisibilityInfo(asVis, lightVis);
+
+		pass.newBufferDependency(lightVis.m_dependency, BufferUsageBit::kSrvDispatchRays);
+	}
 }
 }
 
 
 void RtMaterialFetchRendererObject::bindRgenSpace2Resources(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
 void RtMaterialFetchRendererObject::bindRgenSpace2Resources(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
@@ -311,6 +320,20 @@ void RtMaterialFetchRendererObject::bindRgenSpace2Resources(RenderingContext& ct
 	// Someone else will have to bind comething if they use it
 	// Someone else will have to bind comething if they use it
 	cmdb.bindSrv(srv++, space, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(PixelFailedSsr)));
 	cmdb.bindSrv(srv++, space, BufferView(getDummyGpuResources().m_buffer.get(), 0, sizeof(PixelFailedSsr)));
 
 
+	{
+		AccelerationStructureVisibilityInfo asVis;
+		GpuVisibilityLocalLightsOutput lightVis;
+		getAccelerationStructureBuilder().getVisibilityInfo(asVis, lightVis);
+
+		const auto& arr = GpuSceneArrays::Light::getSingleton();
+		cmdb.bindSrv(srv++, space,
+					 (arr.getElementCount()) ? arr.getBufferView() : BufferView(getDummyGpuResources().m_buffer.get(), 0, arr.getElementSize()));
+
+		cmdb.bindSrv(srv++, space, lightVis.m_lightIndexCountsPerCellBuffer);
+		cmdb.bindSrv(srv++, space, lightVis.m_lightIndexOffsetsPerCellBuffer);
+		cmdb.bindSrv(srv++, space, lightVis.m_lightIndexListBuffer);
+	}
+
 	cmdb.bindSampler(0, space, getRenderer().getSamplers().m_trilinearClamp.get());
 	cmdb.bindSampler(0, space, getRenderer().getSamplers().m_trilinearClamp.get());
 	cmdb.bindSampler(1, space, getRenderer().getSamplers().m_trilinearClampShadow.get());
 	cmdb.bindSampler(1, space, getRenderer().getSamplers().m_trilinearClampShadow.get());
 	cmdb.bindSampler(2, space, getRenderer().getSamplers().m_trilinearRepeat.get());
 	cmdb.bindSampler(2, space, getRenderer().getSamplers().m_trilinearRepeat.get());

+ 0 - 4
AnKi/Renderer/RtMaterialFetchDbg.cpp

@@ -54,10 +54,6 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 {
 {
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
 
-	BufferHandle visibilityDep;
-	BufferView visibleRenderableIndicesBuff, sbtBuildIndirectArgsBuff;
-	getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityDep, visibleRenderableIndicesBuff, sbtBuildIndirectArgsBuff);
-
 	// SBT build
 	// SBT build
 	BufferHandle sbtHandle;
 	BufferHandle sbtHandle;
 	BufferView sbtBuffer;
 	BufferView sbtBuffer;

+ 6 - 5
AnKi/Renderer/RtShadows.cpp

@@ -228,14 +228,15 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		// Create the pass
 		// Create the pass
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtShadows build SBT");
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("RtShadows build SBT");
 
 
-		BufferHandle visibilityHandle;
-		BufferView visibleRenderableIndicesBuff, sbtBuildIndirectArgsBuff;
-		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(visibilityHandle, visibleRenderableIndicesBuff, sbtBuildIndirectArgsBuff);
+		AccelerationStructureVisibilityInfo asVis;
+		GpuVisibilityLocalLightsOutput localLightsVis;
+		getRenderer().getAccelerationStructureBuilder().getVisibilityInfo(asVis, localLightsVis);
 
 
-		rpass.newBufferDependency(visibilityHandle, BufferUsageBit::kSrvCompute);
+		rpass.newBufferDependency(asVis.m_depedency, BufferUsageBit::kSrvCompute);
 		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kIndirectCompute);
 		rpass.newBufferDependency(sbtBuildIndirectArgsHandle, BufferUsageBit::kIndirectCompute);
 
 
-		rpass.setWork([this, sbtBuildIndirectArgsBuffer, sbtBuffer, visibleRenderableIndicesBuff](RenderPassWorkContext& rgraphCtx) {
+		rpass.setWork([this, sbtBuildIndirectArgsBuffer, sbtBuffer,
+					   visibleRenderableIndicesBuff = asVis.m_visibleRenderablesBuffer](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(RtShadows);
 			ANKI_TRACE_SCOPED_EVENT(RtShadows);
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 

+ 5 - 7
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -1210,13 +1210,11 @@ void GpuVisibilityLocalLights::populateRenderGraph(GpuVisibilityLocalLightsInput
 	RenderGraphBuilder& rgraph = *in.m_rgraph;
 	RenderGraphBuilder& rgraph = *in.m_rgraph;
 
 
 	// Compute the bounds
 	// Compute the bounds
-	{
-		const Vec3 newCamPos = in.m_cameraPosition + in.m_lookDirection * kForwardBias;
-		const Vec3 gridSize = Vec3(in.m_cellCounts) * in.m_cellSize;
+	const Vec3 newCamPos = in.m_cameraPosition + in.m_lookDirection * kForwardBias;
+	const Vec3 gridSize = Vec3(in.m_cellCounts) * in.m_cellSize;
 
 
-		out.m_lightGridMin = newCamPos - gridSize / 2.0f;
-		out.m_lightGridMax = out.m_lightGridMin + gridSize;
-	}
+	out.m_lightGridMin = newCamPos - gridSize / 2.0f;
+	out.m_lightGridMax = out.m_lightGridMin + gridSize;
 
 
 	const U32 cellCount = in.m_cellCounts.x() * in.m_cellCounts.y() * in.m_cellCounts.z();
 	const U32 cellCount = in.m_cellCounts.x() * in.m_cellCounts.y() * in.m_cellCounts.z();
 
 
@@ -1236,7 +1234,7 @@ void GpuVisibilityLocalLights::populateRenderGraph(GpuVisibilityLocalLightsInput
 	consts.m_cellSize = in.m_cellSize;
 	consts.m_cellSize = in.m_cellSize;
 	consts.m_maxLightIndices = in.m_lightIndexListSize;
 	consts.m_maxLightIndices = in.m_lightIndexListSize;
 	consts.m_gridVolumeMin = out.m_lightGridMin;
 	consts.m_gridVolumeMin = out.m_lightGridMin;
-	consts.m_gridVolumeMax = out.m_lightGridMax;
+	consts.m_gridVolumeSize = gridSize;
 	consts.m_cellCounts = Vec3(in.m_cellCounts);
 	consts.m_cellCounts = Vec3(in.m_cellCounts);
 
 
 	// Setup
 	// Setup

+ 49 - 5
AnKi/Shaders/GpuVisibilityLocalLights.ankiprog

@@ -18,9 +18,22 @@
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
 
+Bool insideFrustum(Vec4 planes[5], Vec3 aabbMin, Vec3 aabbMax)
+{
+	[unroll] for(U32 i = 0; i < 5; ++i)
+	{
+		if(testPlaneAabb(planes[i].xyz, planes[i].w, aabbMin, aabbMax) < 0.0)
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
+
 template<typename TFunc>
 template<typename TFunc>
 void lightVsCellVisibility(StructuredBuffer<GpuSceneLight> lights, U32 lightIdx, GpuVisibilityLocalLightsConsts consts,
 void lightVsCellVisibility(StructuredBuffer<GpuSceneLight> lights, U32 lightIdx, GpuVisibilityLocalLightsConsts consts,
-						   RWStructuredBuffer<U32> lightIndexCount, TFunc binLightToCellFunc)
+						   RWStructuredBuffer<U32> lightIndexCount, Bool detailedTests, TFunc binLightToCellFunc)
 {
 {
 	const U32 lightCount = getStructuredBufferElementCount(lights);
 	const U32 lightCount = getStructuredBufferElementCount(lights);
 	if(lightIdx >= lightCount)
 	if(lightIdx >= lightCount)
@@ -51,10 +64,10 @@ void lightVsCellVisibility(StructuredBuffer<GpuSceneLight> lights, U32 lightIdx,
 	}
 	}
 
 
 	Vec3 localLightAabbMin = worldLightAabbMin - consts.m_gridVolumeMin;
 	Vec3 localLightAabbMin = worldLightAabbMin - consts.m_gridVolumeMin;
-	localLightAabbMin = clamp(localLightAabbMin, 0.0, consts.m_gridVolumeMax - kEpsilonF32);
+	localLightAabbMin = clamp(localLightAabbMin, 0.0, consts.m_gridVolumeSize - kEpsilonF32);
 
 
 	Vec3 localLightAabbMax = worldLightAabbMax - consts.m_gridVolumeMin;
 	Vec3 localLightAabbMax = worldLightAabbMax - consts.m_gridVolumeMin;
-	localLightAabbMax = clamp(localLightAabbMax, 0.0, consts.m_gridVolumeMax - kEpsilonF32);
+	localLightAabbMax = clamp(localLightAabbMax, 0.0, consts.m_gridVolumeSize - kEpsilonF32);
 
 
 	if(any(localLightAabbMin == localLightAabbMax))
 	if(any(localLightAabbMin == localLightAabbMax))
 	{
 	{
@@ -62,6 +75,21 @@ void lightVsCellVisibility(StructuredBuffer<GpuSceneLight> lights, U32 lightIdx,
 		return;
 		return;
 	}
 	}
 
 
+	Vec4 spotLightPlanes[5];
+	if((U32)light.m_flags & (U32)GpuSceneLightFlag::kSpotLight)
+	{
+		const Vec3 pe = light.m_position;
+		const Vec3 p0 = light.m_edgePoints[0];
+		const Vec3 p1 = light.m_edgePoints[1];
+		const Vec3 p2 = light.m_edgePoints[2];
+		const Vec3 p3 = light.m_edgePoints[3];
+		spotLightPlanes[0] = computePlane(pe, p0, p3);
+		spotLightPlanes[1] = computePlane(pe, p1, p0);
+		spotLightPlanes[2] = computePlane(pe, p2, p1);
+		spotLightPlanes[3] = computePlane(pe, p3, p2);
+		spotLightPlanes[4] = computePlane(p3, p0, p1);
+	}
+
 	const Vec3 localLightFirstCell = floor(localLightAabbMin / consts.m_cellSize);
 	const Vec3 localLightFirstCell = floor(localLightAabbMin / consts.m_cellSize);
 	const Vec3 localLightEndCell = ceil(localLightAabbMax / consts.m_cellSize);
 	const Vec3 localLightEndCell = ceil(localLightAabbMax / consts.m_cellSize);
 
 
@@ -71,6 +99,22 @@ void lightVsCellVisibility(StructuredBuffer<GpuSceneLight> lights, U32 lightIdx,
 		{
 		{
 			for(F32 z = localLightFirstCell.z; z < localLightEndCell.z; z += 1.0)
 			for(F32 z = localLightFirstCell.z; z < localLightEndCell.z; z += 1.0)
 			{
 			{
+				const Vec3 cellMin = Vec3(x, y, z) * consts.m_cellSize + consts.m_gridVolumeMin;
+				const Vec3 cellMax = cellMin + consts.m_cellSize;
+
+				if(detailedTests)
+				{
+					if((U32)light.m_flags & (U32)GpuSceneLightFlag::kPointLight
+					   && !aabbSphereOverlap(cellMin, cellMax, light.m_position, light.m_radius))
+					{
+						continue;
+					}
+					else if((U32)light.m_flags & (U32)GpuSceneLightFlag::kSpotLight && !insideFrustum(spotLightPlanes, cellMin, cellMax))
+					{
+						continue;
+					}
+				}
+
 				U32 count;
 				U32 count;
 				InterlockedAdd(SBUFF(lightIndexCount, 0), 1, count);
 				InterlockedAdd(SBUFF(lightIndexCount, 0), 1, count);
 				++count;
 				++count;
@@ -140,7 +184,7 @@ struct Func
 [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
 [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
 {
 {
 	Func func;
 	Func func;
-	lightVsCellVisibility(g_lights, svDispatchThreadId.x, g_consts, g_lightIndexCount, func);
+	lightVsCellVisibility(g_lights, svDispatchThreadId.x, g_consts, g_lightIndexCount, false, func);
 }
 }
 #endif
 #endif
 
 
@@ -311,7 +355,7 @@ struct Func
 [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
 [numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
 {
 {
 	Func func;
 	Func func;
-	lightVsCellVisibility(g_lights, svDispatchThreadId.x, g_consts, g_lightIndexCount, func);
+	lightVsCellVisibility(g_lights, svDispatchThreadId.x, g_consts, g_lightIndexCount, true, func);
 }
 }
 
 
 #endif
 #endif

+ 1 - 1
AnKi/Shaders/Include/GpuVisibilityTypes.h

@@ -112,7 +112,7 @@ struct GpuVisibilityLocalLightsConsts
 	Vec3 m_gridVolumeMin;
 	Vec3 m_gridVolumeMin;
 	F32 m_padding2;
 	F32 m_padding2;
 
 
-	Vec3 m_gridVolumeMax;
+	Vec3 m_gridVolumeSize;
 	F32 m_padding3;
 	F32 m_padding3;
 
 
 	Vec3 m_cellCounts;
 	Vec3 m_cellCounts;

+ 19 - 1
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -16,7 +16,8 @@ struct DirectionalLight
 	F32 m_power;
 	F32 m_power;
 
 
 	Vec3 m_direction;
 	Vec3 m_direction;
-	U32 m_shadowCascadeCount_31bit_active_1bit; ///< If shadowCascadeCount is zero then it doesn't cast shadow.
+	U32 m_shadowCascadeCount : 31; ///< If shadowCascadeCount is zero then it doesn't cast shadow.
+	U32 m_active : 1;
 
 
 	Vec4 m_shadowCascadeDistances;
 	Vec4 m_shadowCascadeDistances;
 
 
@@ -100,6 +101,21 @@ struct IndirectDiffuseClipmapConstants
 	IndirectDiffuseClipmapTextures m_textures[kIndirectDiffuseClipmapCount];
 	IndirectDiffuseClipmapTextures m_textures[kIndirectDiffuseClipmapCount];
 };
 };
 
 
+struct LocalLightsGridConstants
+{
+	Vec3 m_volumeMin;
+	F32 m_padding1;
+
+	Vec3 m_volumeMax;
+	F32 m_padding2;
+
+	Vec3 m_cellSize;
+	F32 m_padding3;
+
+	UVec3 m_cellCounts;
+	F32 m_padding4;
+};
+
 /// Common constants for all passes.
 /// Common constants for all passes.
 struct GlobalRendererConstants
 struct GlobalRendererConstants
 {
 {
@@ -128,6 +144,8 @@ struct GlobalRendererConstants
 	Sky m_sky;
 	Sky m_sky;
 
 
 	IndirectDiffuseClipmapConstants m_indirectDiffuseClipmaps;
 	IndirectDiffuseClipmapConstants m_indirectDiffuseClipmaps;
+
+	LocalLightsGridConstants m_localLightsGrid;
 };
 };
 
 
 // RT shadows
 // RT shadows

+ 1 - 1
AnKi/Shaders/IndirectDiffuse.ankiprog

@@ -77,7 +77,7 @@ Vec3 lightShading(Vec3 rayOrigin, Vec3 rayDir, Vec3 hitPos, Vec3 hitNormal, Vec3
 		if(bInsideFrustum && kTryShadowmapFirst)
 		if(bInsideFrustum && kTryShadowmapFirst)
 		{
 		{
 			const F32 negativeZViewSpace = -mul(g_globalRendererConstants.m_matrices.m_view, Vec4(hitPos, 1.0)).z;
 			const F32 negativeZViewSpace = -mul(g_globalRendererConstants.m_matrices.m_view, Vec4(hitPos, 1.0)).z;
-			const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount_31bit_active_1bit >> 1u;
+			const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount;
 
 
 			const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
 			const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
 
 

+ 2 - 2
AnKi/Shaders/LightFunctions.hlsl

@@ -116,9 +116,9 @@ vector<T, 3> specularDFG(vector<T, 3> F0, T roughness, Texture2D<Vec4> integrati
 }
 }
 
 
 template<typename T>
 template<typename T>
-T computeSpotFactor(vector<T, 3> frag2Light, T outerCos, T innerCos, vector<T, 3> spotDir)
+T computeSpotFactor(vector<T, 3> normalizedFrag2Light, T outerCos, T innerCos, vector<T, 3> spotDir)
 {
 {
-	const T costheta = -dot(frag2Light, spotDir);
+	const T costheta = -dot(normalizedFrag2Light, spotDir);
 	const T spotFactor = smoothstep(outerCos, innerCos, costheta);
 	const T spotFactor = smoothstep(outerCos, innerCos, costheta);
 	return spotFactor;
 	return spotFactor;
 }
 }

+ 2 - 2
AnKi/Shaders/LightShading.ankiprog

@@ -112,10 +112,10 @@ Vec4 main(VertOut input) : SV_Target0
 
 
 	// Dir light
 	// Dir light
 	const DirectionalLight dirLight = g_globalConstants.m_directionalLight;
 	const DirectionalLight dirLight = g_globalConstants.m_directionalLight;
-	if(dirLight.m_shadowCascadeCount_31bit_active_1bit & 1u)
+	if(dirLight.m_active)
 	{
 	{
 		F16 shadowFactor;
 		F16 shadowFactor;
-		if(dirLight.m_shadowCascadeCount_31bit_active_1bit >> 1u)
+		if(dirLight.m_shadowCascadeCount)
 		{
 		{
 			shadowFactor = resolvedSm[0];
 			shadowFactor = resolvedSm[0];
 			++resolvedSmIdx;
 			++resolvedSmIdx;

+ 35 - 5
AnKi/Shaders/Reflections.ankiprog

@@ -194,6 +194,8 @@ StructuredBuffer<GlobalIlluminationProbe> g_giProbes : register(t6);
 StructuredBuffer<Cluster> g_clusters : register(t7);
 StructuredBuffer<Cluster> g_clusters : register(t7);
 Texture2D<Vec4> g_shadowAtlasTex : register(t8);
 Texture2D<Vec4> g_shadowAtlasTex : register(t8);
 Texture2D<UVec4> g_classTileMap : register(t9);
 Texture2D<UVec4> g_classTileMap : register(t9);
+StructuredBuffer<PointLight> g_pointLights : register(t10);
+StructuredBuffer<SpotLight> g_spotLights : register(t11);
 
 
 RWTexture2D<Vec4> g_colorAndPdfTex : register(u0);
 RWTexture2D<Vec4> g_colorAndPdfTex : register(u0);
 RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1);
 RWTexture2D<Vec4> g_hitPosAndDepthTex : register(u1);
@@ -220,22 +222,25 @@ Vec3 doLightShading(Vec3 worldPos, Vec3 viewPos, UVec2 coord, F32 depth)
 
 
 	Vec3 outColor = gbuffer.m_emission;
 	Vec3 outColor = gbuffer.m_emission;
 
 
+	Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, Vec3(coord.xy + 0.5, depth));
+
+	// GI
 #	if INDIRECT_DIFFUSE_CLIPMAPS
 #	if INDIRECT_DIFFUSE_CLIPMAPS
 	const Vec3 irradiance =
 	const Vec3 irradiance =
 		sampleClipmapIrradiance(worldPos, gbuffer.m_normal, g_globalRendererConstants.m_cameraPosition,
 		sampleClipmapIrradiance(worldPos, gbuffer.m_normal, g_globalRendererConstants.m_cameraPosition,
 								g_globalRendererConstants.m_indirectDiffuseClipmaps, g_linearAnyRepeatSampler, kSampleClipmapFlags);
 								g_globalRendererConstants.m_indirectDiffuseClipmaps, g_linearAnyRepeatSampler, kSampleClipmapFlags);
 	outColor += irradiance * gbuffer.m_diffuse;
 	outColor += irradiance * gbuffer.m_diffuse;
 #	else
 #	else
-	// GI
-	const Cluster cluster = getClusterFragCoord(g_clusters, g_globalRendererConstants, Vec3(coord.xy + 0.5, depth));
 	outColor += sampleGiProbes<F32>(cluster, g_giProbes, gbuffer.m_normal, worldPos.xyz, g_trilinearClampSampler) * gbuffer.m_diffuse;
 	outColor += sampleGiProbes<F32>(cluster, g_giProbes, gbuffer.m_normal, worldPos.xyz, g_trilinearClampSampler) * gbuffer.m_diffuse;
 #	endif
 #	endif
 
 
+	const Vec3 diffC = diffuseLobe(gbuffer.m_diffuse);
+
 	// Dir light
 	// Dir light
 	const DirectionalLight dirLight = g_globalRendererConstants.m_directionalLight;
 	const DirectionalLight dirLight = g_globalRendererConstants.m_directionalLight;
-	if(dirLight.m_shadowCascadeCount_31bit_active_1bit & 1u)
+	if(dirLight.m_active)
 	{
 	{
-		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount_31bit_active_1bit >> 1u;
+		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount;
 		F32 shadowFactor;
 		F32 shadowFactor;
 		if(shadowCascadeCount >> 1u)
 		if(shadowCascadeCount >> 1u)
 		{
 		{
@@ -252,10 +257,35 @@ Vec3 doLightShading(Vec3 worldPos, Vec3 viewPos, UVec2 coord, F32 depth)
 
 
 		const Vec3 l = -dirLight.m_direction;
 		const Vec3 l = -dirLight.m_direction;
 		const F32 lambert = max(0.0, dot(l, gbuffer.m_normal));
 		const F32 lambert = max(0.0, dot(l, gbuffer.m_normal));
-		const Vec3 diffC = diffuseLobe(gbuffer.m_diffuse);
 		outColor += diffC * dirLight.m_diffuseColor * lambert * shadowFactor;
 		outColor += diffC * dirLight.m_diffuseColor * lambert * shadowFactor;
 	}
 	}
 
 
+	// Point lights
+	U32 idx;
+	[loop] while((idx = iteratePointLights(cluster)) != kMaxU32)
+	{
+		const PointLight light = g_pointLights[idx];
+
+		const Vec3 frag2Light = light.m_position - worldPos;
+		const F32 attenuation = computeAttenuationFactor(light.m_radius, frag2Light);
+		const F32 lambert = max(0.0, dot(normalize(frag2Light), gbuffer.m_normal));
+
+		outColor += diffC * light.m_diffuseColor * (attenuation * lambert);
+	}
+
+	// Spot lights
+	[loop] while((idx = iterateSpotLights(cluster)) != kMaxU32)
+	{
+		const SpotLight light = g_spotLights[idx];
+
+		const Vec3 frag2Light = light.m_position - worldPos;
+		F32 attenuation = computeAttenuationFactor(light.m_radius, frag2Light);
+		attenuation *= computeSpotFactor(normalize(frag2Light), light.m_outerCos, light.m_innerCos, light.m_direction);
+		const F32 lambert = max(0.0, dot(normalize(frag2Light), gbuffer.m_normal));
+
+		outColor += diffC * light.m_diffuseColor * (attenuation * lambert);
+	}
+
 	return outColor;
 	return outColor;
 }
 }
 
 

+ 79 - 28
AnKi/Shaders/RtMaterialFetch.hlsl

@@ -48,6 +48,12 @@ Texture2D<Vec4> g_gbufferRt2 : register(t6, SPACE);
 StructuredBuffer<PixelFailedSsr> g_pixelsFailedSsr : register(t7, SPACE);
 StructuredBuffer<PixelFailedSsr> g_pixelsFailedSsr : register(t7, SPACE);
 #	endif
 #	endif
 
 
+// Output of GpuVisibilityLocalLights:
+StructuredBuffer<GpuSceneLight> g_lights : register(t8, SPACE);
+StructuredBuffer<U32> g_lightIndexCountsPerCell : register(t9, SPACE);
+StructuredBuffer<U32> g_lightIndexOffsetsPerCell : register(t10, SPACE);
+StructuredBuffer<U32> g_lightIndexList : register(t11, SPACE);
+
 // UAVs
 // UAVs
 #	if defined(CLIPMAP_VOLUME)
 #	if defined(CLIPMAP_VOLUME)
 RWTexture2D<Vec4> g_lightResultTex : register(u0, SPACE);
 RWTexture2D<Vec4> g_lightResultTex : register(u0, SPACE);
@@ -121,37 +127,45 @@ vector<T, 3> directLighting(GBufferLight<T> gbuffer, Vec3 hitPos, Bool isSky, Bo
 {
 {
 	vector<T, 3> color = gbuffer.m_emission;
 	vector<T, 3> color = gbuffer.m_emission;
 
 
-	if(!isSky)
+	if(isSky)
 	{
 	{
-		const DirectionalLight dirLight = g_globalRendererConstants.m_directionalLight;
-
-		// Trace shadow
-		Vec4 vv4 = mul(g_globalRendererConstants.m_matrices.m_viewProjection, Vec4(hitPos, 1.0));
-		vv4.xy /= vv4.w;
-		const Bool bInsideFrustum = all(vv4.xy > -1.0) && all(vv4.xy < 1.0) && vv4.w > 0.0;
-
-		F32 shadow;
-		if(bInsideFrustum && tryShadowmapFirst)
-		{
-			const F32 negativeZViewSpace = -mul(g_globalRendererConstants.m_matrices.m_view, Vec4(hitPos, 1.0)).z;
-			const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount_31bit_active_1bit >> 1u;
-
-			const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
+		return color;
+	}
 
 
-			shadow = computeShadowFactorDirLight<F32>(dirLight, cascadeIdx, hitPos, g_shadowAtlasTex, g_shadowSampler);
-		}
-		else
+	// Sun
+	const DirectionalLight dirLight = g_globalRendererConstants.m_directionalLight;
+	if(dirLight.m_active)
+	{
+		F32 shadow = 1.0;
+		if(dirLight.m_shadowCascadeCount)
 		{
 		{
-			RayQuery<RAY_FLAG_NONE> q;
-			const U32 cullMask = 0xFFu;
-			RayDesc ray;
-			ray.Origin = hitPos;
-			ray.TMin = 0.01;
-			ray.Direction = -dirLight.m_direction;
-			ray.TMax = shadowTMax;
-			q.TraceRayInline(g_tlas, traceFlags, cullMask, ray);
-			q.Proceed();
-			shadow = (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) ? 0.0 : 1.0;
+			// Trace shadow
+			Vec4 vv4 = mul(g_globalRendererConstants.m_matrices.m_viewProjection, Vec4(hitPos, 1.0));
+			vv4.xy /= vv4.w;
+			const Bool bInsideFrustum = all(vv4.xy > -1.0) && all(vv4.xy < 1.0) && vv4.w > 0.0;
+
+			if(bInsideFrustum && tryShadowmapFirst)
+			{
+				const F32 negativeZViewSpace = -mul(g_globalRendererConstants.m_matrices.m_view, Vec4(hitPos, 1.0)).z;
+				const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount;
+
+				const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
+
+				shadow = computeShadowFactorDirLight<F32>(dirLight, cascadeIdx, hitPos, g_shadowAtlasTex, g_shadowSampler);
+			}
+			else
+			{
+				RayQuery<RAY_FLAG_NONE> q;
+				const U32 cullMask = 0xFFu;
+				RayDesc ray;
+				ray.Origin = hitPos;
+				ray.TMin = 0.01;
+				ray.Direction = -dirLight.m_direction;
+				ray.TMax = shadowTMax;
+				q.TraceRayInline(g_tlas, traceFlags, cullMask, ray);
+				q.Proceed();
+				shadow = (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) ? 0.0 : 1.0;
+			}
 		}
 		}
 
 
 		// Do simple light shading
 		// Do simple light shading
@@ -162,6 +176,43 @@ vector<T, 3> directLighting(GBufferLight<T> gbuffer, Vec3 hitPos, Bool isSky, Bo
 		color += diffC * dirLight.m_diffuseColor * lambert * shadow;
 		color += diffC * dirLight.m_diffuseColor * lambert * shadow;
 	}
 	}
 
 
+	// Local lights
+	const LocalLightsGridConstants lightGrid = g_globalRendererConstants.m_localLightsGrid;
+	if(all(hitPos < lightGrid.m_volumeMax) && all(hitPos > lightGrid.m_volumeMin))
+	{
+		const UVec3 cellId = (hitPos - lightGrid.m_volumeMin) / lightGrid.m_cellSize;
+		const U32 cellIdx = cellId.z * lightGrid.m_cellCounts.x * lightGrid.m_cellCounts.y + cellId.y * lightGrid.m_cellCounts.x + cellId.x;
+
+		// Compute an attenuation factor that will fade out the resulting color at the edges of the grid
+		Vec3 a = (hitPos - lightGrid.m_volumeMin) / (lightGrid.m_volumeMax - lightGrid.m_volumeMin);
+		a = abs(a * 2.0 - 1.0);
+		a = 1.0 - a;
+		const F32 gridEdgesAttenuation = sqrt(a.x * a.y * a.z);
+
+		const U32 lightCount = SBUFF(g_lightIndexCountsPerCell, cellIdx);
+		const U32 listOffset = SBUFF(g_lightIndexOffsetsPerCell, cellIdx);
+
+		for(U32 i = 0; i < lightCount; ++i)
+		{
+			const U32 lightIdx = SBUFF(g_lightIndexList, listOffset + i);
+			const GpuSceneLight light = SBUFF(g_lights, lightIdx);
+
+			const Vec3 frag2Light = light.m_position - hitPos;
+			const Vec3 nFrag2Light = normalize(frag2Light);
+
+			F32 attenuation = computeAttenuationFactor(light.m_radius, frag2Light);
+			if((U32)light.m_flags & (U32)GpuSceneLightFlag::kSpotLight)
+			{
+				attenuation *= computeSpotFactor(nFrag2Light, light.m_outerCos, light.m_innerCos, light.m_direction);
+			}
+
+			const T lambert = max(T(0), dot(nFrag2Light, gbuffer.m_worldNormal));
+			const vector<T, 3> diffC = diffuseLobe(gbuffer.m_diffuse);
+			color += diffC * light.m_diffuseColor * lambert * attenuation * gridEdgesAttenuation;
+			// color += Vec3(0.5, 0, 0);
+		}
+	}
+
 	return color;
 	return color;
 }
 }
 #endif // ANKI_RAY_GEN_SHADER
 #endif // ANKI_RAY_GEN_SHADER

+ 6 - 2
AnKi/Shaders/RtMaterialFetchDbg.ankiprog

@@ -7,6 +7,8 @@
 
 
 #pragma anki technique RtMaterialFetch rgen
 #pragma anki technique RtMaterialFetch rgen
 
 
+#define ANKI_ASSERTIONS_ENABLED 1
+
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 
 
@@ -23,7 +25,7 @@
 	const Vec3 rayDir = normalize(worldPos - rayOrigin);
 	const Vec3 rayDir = normalize(worldPos - rayOrigin);
 	const F32 tMax = 1000.0;
 	const F32 tMax = 1000.0;
 	const F32 tMin = 0.1;
 	const F32 tMin = 0.1;
-	const F32 texLod = 1000.0;
+	const F32 texLod = 0.0;
 	constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
 	constexpr U32 traceFlags = RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES;
 	GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
 	GBufferLight<F16> gbuffer = (GBufferLight<F16>)0;
 	F32 rayT = 0.0;
 	F32 rayT = 0.0;
@@ -41,7 +43,9 @@
 	}
 	}
 	else
 	else
 	{
 	{
-		col = gbuffer.m_diffuse * 1.0 + (gbuffer.m_worldNormal / 2.0 + 0.5) * 0.0 + rayT * 0.0 + gbuffer.m_emission * 0.0;
+		// col = gbuffer.m_diffuse * 1.0 + (gbuffer.m_worldNormal / 2.0 + 0.5) * 0.0 + rayT * 0.0 + gbuffer.m_emission * 0.0;
+		col = directLighting(gbuffer, rayOrigin + rayDir * rayT, false, true, 100.0);
+		col += gbuffer.m_diffuse * 0.3;
 	}
 	}
 
 
 	g_colorAndPdfTex[DispatchRaysIndex().xy] = Vec4(col, 0.0);
 	g_colorAndPdfTex[DispatchRaysIndex().xy] = Vec4(col, 0.0);

+ 2 - 2
AnKi/Shaders/ShadowmapsResolve.ankiprog

@@ -116,9 +116,9 @@ RVec4 main(VertOut input) : SV_TARGET0
 	++shadowCasterCountPerFragment;
 	++shadowCasterCountPerFragment;
 #	else
 #	else
 	const DirectionalLight dirLight = g_globalConstants.m_directionalLight;
 	const DirectionalLight dirLight = g_globalConstants.m_directionalLight;
-	if(dirLight.m_shadowCascadeCount_31bit_active_1bit != 0u)
+	if(dirLight.m_active && dirLight.m_shadowCascadeCount)
 	{
 	{
-		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount_31bit_active_1bit >> 1u;
+		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount;
 
 
 		const RF32 positiveZViewSpace = testPlanePoint(g_globalConstants.m_nearPlaneWSpace.xyz, g_globalConstants.m_nearPlaneWSpace.w, worldPos)
 		const RF32 positiveZViewSpace = testPlanePoint(g_globalConstants.m_nearPlaneWSpace.xyz, g_globalConstants.m_nearPlaneWSpace.w, worldPos)
 										+ g_globalConstants.m_matrices.m_near;
 										+ g_globalConstants.m_matrices.m_near;

+ 1 - 1
AnKi/Shaders/TraditionalDeferredShading.ankiprog

@@ -96,7 +96,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 #	endif
 #	endif
 
 
 	// Dir light
 	// Dir light
-	if(g_globalRendererConsts.m_directionalLight.m_shadowCascadeCount_31bit_active_1bit & 1u)
+	if(g_globalRendererConsts.m_directionalLight.m_active)
 	{
 	{
 		const F32 dist = length(g_consts.m_cameraPos - worldPos);
 		const F32 dist = length(g_consts.m_cameraPos - worldPos);
 		RF32 shadowFactor;
 		RF32 shadowFactor;

+ 36 - 0
AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl

@@ -174,6 +174,31 @@ F32 testPlaneSphere(Vec3 planeNormal, F32 planeOffset, Vec3 sphereCenter, F32 sp
 	return (dist < 0.0f) ? dist : 0.0f;
 	return (dist < 0.0f) ? dist : 0.0f;
 }
 }
 
 
+Bool aabbSphereOverlap(Vec3 aabbMin, Vec3 aabbMax, Vec3 sphereCenter, F32 sphereRadius)
+{
+	Vec3 closestPoint = sphereCenter;
+
+#if 0
+	[unroll] for(U32 i = 0; i < 3; ++i)
+	{
+		if(sphereCenter[i] < aabbMin[i])
+		{
+			closestPoint[i] = aabbMin[i];
+		}
+		else if(sphereCenter[i] > aabbMax[i])
+		{
+			closestPoint[i] = aabbMax[i];
+		}
+	}
+#else
+	closestPoint = select(sphereCenter > aabbMax, aabbMax, sphereCenter);
+	closestPoint = select(closestPoint < aabbMin, aabbMin, closestPoint);
+#endif
+
+	const Vec3 sub = sphereCenter - closestPoint;
+	return dot(sub, sub) <= square(sphereRadius);
+}
+
 Bool frustumTest(Vec4 frustumPlanes[6], Vec3 sphereCenter, F32 sphereRadius)
 Bool frustumTest(Vec4 frustumPlanes[6], Vec3 sphereCenter, F32 sphereRadius)
 {
 {
 	F32 minPlaneDistance = testPlanePoint(frustumPlanes[0].xyz, frustumPlanes[0].w, sphereCenter);
 	F32 minPlaneDistance = testPlanePoint(frustumPlanes[0].xyz, frustumPlanes[0].w, sphereCenter);
@@ -287,3 +312,14 @@ F32 distancePointToLineSegment(Vec2 p, Vec2 lineSegmentA, Vec2 lineSegmentB)
 	const Vec2 closestPoint = lineSegmentA + t * ab;
 	const Vec2 closestPoint = lineSegmentA + t * ab;
 	return length(p - closestPoint);
 	return length(p - closestPoint);
 }
 }
+
+Vec4 computePlane(Vec3 p0, Vec3 p1, Vec3 p2)
+{
+	const Vec3 u = p1 - p0;
+	const Vec3 v = p2 - p0;
+
+	const Vec3 normal = normalize(cross(u, v));
+	const F32 offset = dot(normal, p1);
+
+	return Vec4(normal, offset);
+}

+ 2 - 2
AnKi/Shaders/VolumetricLightingAccumulation.ankiprog

@@ -94,12 +94,12 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 
 
 	// Dir light
 	// Dir light
 	const DirectionalLight dirLight = g_globalConstants.m_directionalLight;
 	const DirectionalLight dirLight = g_globalConstants.m_directionalLight;
-	if(dirLight.m_shadowCascadeCount_31bit_active_1bit & 1u)
+	if(dirLight.m_active)
 	{
 	{
 		F32 factor = phaseFunction(viewDir, dirLight.m_direction, kPhaseFunctionAnisotropy);
 		F32 factor = phaseFunction(viewDir, dirLight.m_direction, kPhaseFunctionAnisotropy);
 
 
 #if ENABLE_SHADOWS
 #if ENABLE_SHADOWS
-		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount_31bit_active_1bit >> 1u;
+		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount;
 
 
 		if(shadowCascadeCount > 0u && negativeZViewSpace < dirLight.m_shadowCascadeDistances[shadowCascadeCount - 1u])
 		if(shadowCascadeCount > 0u && negativeZViewSpace < dirLight.m_shadowCascadeDistances[shadowCascadeCount - 1u])
 		{
 		{

+ 1 - 1
CMakeLists.txt

@@ -271,7 +271,7 @@ else()
 		/GR-) # Disable RTTI
 		/GR-) # Disable RTTI
 
 
 	if(${CMAKE_BUILD_TYPE} STREQUAL "Release" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
 	if(${CMAKE_BUILD_TYPE} STREQUAL "Release" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
-		#add_definitions(/Ox)
+		add_compile_options(/GS-) # Disable "Control stack checking calls"
 	endif()
 	endif()
 
 
 	add_definitions(
 	add_definitions(

+ 1 - 1
Samples/Common/SampleApp.cpp

@@ -71,7 +71,7 @@ Error SampleApp::userMainLoop(Bool& quit, Second elapsedTime)
 
 
 	if(in.getKey(KeyCode::kI) == 1)
 	if(in.getKey(KeyCode::kI) == 1)
 	{
 	{
-		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "Ssao") ? "" : "Ssao");
+		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "RtMaterialFetchDbg") ? "" : "RtMaterialFetchDbg");
 	}
 	}
 
 
 	if(in.getKey(KeyCode::kO) == 1)
 	if(in.getKey(KeyCode::kO) == 1)