4 months ago · bbbbb09d7d
--- a/AnKi/Gr/Vulkan/VkGrManager.cpp
+++ b/AnKi/Gr/Vulkan/VkGrManager.cpp
@@ -598,6 +598,12 @@ Error GrManagerImpl::initInstance()
 
				 	m_capabilities.m_minWaveSize = props13.minSubgroupSize;
			
 
				 	m_capabilities.m_maxWaveSize = props13.maxSubgroupSize;
			
 
				 
			
 
				+	if(props2.properties.limits.maxComputeWorkGroupInvocations < 1024)
			
 
				+	{
			
 
				+		ANKI_VK_LOGE("GPU doesn't support at least 1024 workgroup invocations");
			
 
				+		return Error::kFunctionFailed;
			
 
				+	}
			
 
				+
			
 
				 	// Find vendor
			
 
				 	switch(props2.properties.vendorID)
			
 
				 	{
			
--- a/AnKi/Renderer/RendererObject.def.h
+++ b/AnKi/Renderer/RendererObject.def.h
@@ -50,7 +50,9 @@ ANKI_RENDERER_OBJECT_DEF(IndirectDiffuse, indirectDiffuse,
 
				 ANKI_RENDERER_OBJECT_DEF(RenderableDrawer, drawer, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(GpuVisibility, gpuVisibility, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(GpuVisibilityNonRenderables, gpuVisibilityNonRenderables, 1)
			
 
				-ANKI_RENDERER_OBJECT_DEF(GpuVisibilityAccelerationStructures, gpuVisibilityAccelerationStructures, 1)
			
 
				+ANKI_RENDERER_OBJECT_DEF(GpuVisibilityAccelerationStructures, gpuVisibilityAccelerationStructures,
			
 
				+						 GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
			
 
				+ANKI_RENDERER_OBJECT_DEF(GpuVisibilityLocalLights, gpuVisibilityLocalLights, GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
			
 
				 ANKI_RENDERER_OBJECT_DEF(HzbGenerator, hzbGenerator, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(ReadbackManager, readbackManager, 1)
			
 
				 ANKI_RENDERER_OBJECT_DEF(MipmapGenerator, mipmapGenerator, 1)
			
--- a/AnKi/Renderer/Utils/GpuVisibility.cpp
+++ b/AnKi/Renderer/Utils/GpuVisibility.cpp
@@ -1195,4 +1195,58 @@ void GpuVisibilityAccelerationStructures::pupulateRenderGraph(GpuVisibilityAccel
 
				 	}
			
 
				 }
			
 
				 
			
 
				+Error GpuVisibilityLocalLights::init()
			
 
				+{
			
 
				+	const CString fname = "ShaderBinaries/GpuVisibilityLocalLights.ankiprogbin";
			
 
				+	ANKI_CHECK(loadShaderProgram(fname, {}, m_visibilityProg, m_setupGrProg, "Setup"));
			
 
				+	ANKI_CHECK(loadShaderProgram(fname, {}, m_visibilityProg, m_countGrProg, "Count"));
			
 
				+	ANKI_CHECK(loadShaderProgram(fname, {}, m_visibilityProg, m_prefixSumGrProg, "PrefixSum"));
			
 
				+	ANKI_CHECK(loadShaderProgram(fname, {}, m_visibilityProg, m_fillGrProg, "Fill"));
			
 
				+	return Error::kNone;
			
 
				+}
			
 
				+
			
 
				+void GpuVisibilityLocalLights::populateRenderGraph(GpuVisibilityLocalLightsInput& in, GpuVisibilityLocalLightsOutput& out)
			
 
				+{
			
 
				+	RenderGraphBuilder& rgraph = *in.m_rgraph;
			
 
				+
			
 
				+	// Compute the bounds
			
 
				+	{
			
 
				+		const Vec3 newCamPos = in.m_cameraPosition + in.m_lookDirection * kForwardBias;
			
 
				+		const Vec3 gridSize = Vec3(in.m_cellCounts) * in.m_cellSize;
			
 
				+
			
 
				+		out.m_lightGridMin = newCamPos - gridSize / 2.0f;
			
 
				+		out.m_lightGridMax = out.m_lightGridMin + gridSize;
			
 
				+	}
			
 
				+
			
 
				+	const U32 cellCount = in.m_cellCounts.x() * in.m_cellCounts.y() * in.m_cellCounts.z();
			
 
				+
			
 
				+	const BufferView lightIndexCountsPerCellBuff = allocateStructuredBuffer<U32>(cellCount);
			
 
				+	const BufferView lightIndexOffsetsPerCellBuff = allocateStructuredBuffer<U32>(cellCount);
			
 
				+	const BufferView lightIndexCountBuff = allocateStructuredBuffer<U32>(1);
			
 
				+
			
 
				+	const BufferHandle dep = rgraph.importBuffer(lightIndexCountBuff, BufferUsageBit::kNone);
			
 
				+
			
 
				+	// Setup
			
 
				+	{
			
 
				+		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass(generateTempPassName("Setup: %s", in.m_passesName.cstr()));
			
 
				+
			
 
				+		pass.newBufferDependency(dep, BufferUsageBit::kUavCompute);
			
 
				+		pass.newBufferDependency(getRenderer().getGpuSceneBufferHandle(), BufferUsageBit::kSrvCompute);
			
 
				+
			
 
				+		pass.setWork([this, lightIndexCountsPerCellBuff, lightIndexCountBuff, cellCount](RenderPassWorkContext& rgraph) {
			
 
				+			ANKI_TRACE_SCOPED_EVENT(GpuVisibilityLocalLightsSetup);
			
 
				+			CommandBuffer& cmdb = *rgraph.m_commandBuffer;
			
 
				+
			
 
				+			cmdb.bindUav(0, 0, lightIndexCountsPerCellBuff);
			
 
				+			cmdb.bindUav(1, 0, lightIndexCountBuff);
			
 
				+
			
 
				+			dispatchPPCompute(cmdb, 64, 1, cellCount, 1);
			
 
				+		});
			
 
				+	}
			
 
				+
			
 
				+	// Count
			
 
				+	{
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 } // end namespace anki
			
--- a/AnKi/Renderer/Utils/GpuVisibility.h
+++ b/AnKi/Renderer/Utils/GpuVisibility.h
@@ -315,6 +315,59 @@ private:
 
				 	U64 m_lastFrameIdx = kMaxU64;
			
 
				 #endif
			
 
				 };
			
 
				+
			
 
				+/// @memberof GpuVisibilityLocalLights
			
 
				+class GpuVisibilityLocalLightsInput
			
 
				+{
			
 
				+public:
			
 
				+	UVec3 m_cellCounts;
			
 
				+	Vec3 m_cellSize;
			
 
				+
			
 
				+	Vec3 m_cameraPosition;
			
 
				+	Vec3 m_lookDirection;
			
 
				+
			
 
				+	U32 m_lightIndexListSize = 0; ///< The number of light indices to store.
			
 
				+
			
 
				+	CString m_passesName = "GpuVisibilityLocalLights";
			
 
				+
			
 
				+	RenderGraphBuilder* m_rgraph = nullptr;
			
 
				+};
			
 
				+
			
 
				+/// @memberof GpuVisibilityLocalLights
			
 
				+class GpuVisibilityLocalLightsOutput
			
 
				+{
			
 
				+public:
			
 
				+	BufferHandle m_dependency; ///< Some handle to track dependencies. No need to track every buffer.
			
 
				+
			
 
				+	BufferView m_lightIndexOffsetsPerCellBuffer; ///< One offset to the m_lightIndexBuffer. One offset per cell.
			
 
				+	BufferView m_lightIndexCountPerCellBuffer; ///< Number of lights per cell.
			
 
				+	BufferView m_lightIndexBuffer; ///< Contains indexes to the GPU scene lights array.
			
 
				+
			
 
				+	/// @{
			
 
				+	/// The volume of the grid.
			
 
				+	Vec3 m_lightGridMin;
			
 
				+	Vec3 m_lightGridMax;
			
 
				+	/// @}
			
 
				+};
			
 
				+
			
 
				+/// Gathers the local lights around the camera to a grid.
			
 
				+class GpuVisibilityLocalLights : public RendererObject
			
 
				+{
			
 
				+public:
			
 
				+	Error init();
			
 
				+
			
 
				+	void populateRenderGraph(GpuVisibilityLocalLightsInput& in, GpuVisibilityLocalLightsOutput& out);
			
 
				+
			
 
				+private:
			
 
				+	static constexpr F32 kForwardBias = 4.0f;
			
 
				+
			
 
				+	ShaderProgramResourcePtr m_visibilityProg;
			
 
				+
			
 
				+	ShaderProgramPtr m_setupGrProg;
			
 
				+	ShaderProgramPtr m_countGrProg;
			
 
				+	ShaderProgramPtr m_prefixSumGrProg;
			
 
				+	ShaderProgramPtr m_fillGrProg;
			
 
				+};
			
 
				 /// @}
			
 
				 
			
 
				 } // end namespace anki
			
--- a/AnKi/Shaders/GpuVisibilityLocalLights.ankiprog
+++ b/AnKi/Shaders/GpuVisibilityLocalLights.ankiprog
@@ -0,0 +1,312 @@
 
				+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
			
 
				+// All rights reserved.
			
 
				+// Code licensed under the BSD License.
			
 
				+// http://www.anki3d.org/LICENSE
			
 
				+
			
 
				+// Terminology:
			
 
				+// - Grid: The volume we are looking to gather lights for
			
 
				+// - Cell: The grid is dividied in cells
			
 
				+// - Light index list: An array of indices that point the GPU scene lights. Each cell points to a part of this list
			
 
				+
			
 
				+#pragma anki technique Setup comp
			
 
				+#pragma anki technique Count comp
			
 
				+#pragma anki technique PrefixSum comp
			
 
				+#pragma anki technique Fill comp
			
 
				+
			
 
				+#include <AnKi/Shaders/Common.hlsl>
			
 
				+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
			
 
				+#include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
			
 
				+#include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
			
 
				+
			
 
				+template<typename TFunc>
			
 
				+void lightVsCellVisibility(StructuredBuffer<GpuSceneLight> lights, U32 lightIdx, GpuVisibilityLocalLightsConsts consts,
			
 
				+						   RWStructuredBuffer<U32> lightIndexCount, TFunc binLightToCellFunc)
			
 
				+{
			
 
				+	const U32 lightCount = getStructuredBufferElementCount(lights);
			
 
				+	if(lightIdx >= lightCount)
			
 
				+	{
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	const GpuSceneLight light = SBUFF(lights, lightIdx);
			
 
				+
			
 
				+	// Get the light bounds
			
 
				+	Vec3 worldLightAabbMin;
			
 
				+	Vec3 worldLightAabbMax;
			
 
				+	if((U32)light.m_flags & (U32)GpuSceneLightFlag::kPointLight)
			
 
				+	{
			
 
				+		worldLightAabbMin = light.m_position - light.m_radius;
			
 
				+		worldLightAabbMax = light.m_position + light.m_radius;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		worldLightAabbMin = light.m_position;
			
 
				+		worldLightAabbMax = light.m_position;
			
 
				+
			
 
				+		[unroll] for(U32 i = 0; i < 4; ++i)
			
 
				+		{
			
 
				+			worldLightAabbMin = min(worldLightAabbMin, light.m_edgePoints[i]);
			
 
				+			worldLightAabbMax = max(worldLightAabbMax, light.m_edgePoints[i]);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	Vec3 localLightAabbMin = worldLightAabbMin - consts.m_gridVolumeMin;
			
 
				+	localLightAabbMin = clamp(localLightAabbMin, 0.0, consts.m_gridVolumeMax - kEpsilonF32);
			
 
				+
			
 
				+	Vec3 localLightAabbMax = worldLightAabbMax - consts.m_gridVolumeMin;
			
 
				+	localLightAabbMax = clamp(localLightAabbMax, 0.0, consts.m_gridVolumeMax - kEpsilonF32);
			
 
				+
			
 
				+	if(any(localLightAabbMin == localLightAabbMax))
			
 
				+	{
			
 
				+		// Outside the volume
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	const Vec3 localLightFirstCell = floor(localLightAabbMin / consts.m_cellSize);
			
 
				+	const Vec3 localLightEndCell = ceil(localLightAabbMax / consts.m_cellSize);
			
 
				+
			
 
				+	for(F32 x = localLightFirstCell.x; x < localLightEndCell.x; x += 1.0)
			
 
				+	{
			
 
				+		for(F32 y = localLightFirstCell.y; y < localLightEndCell.y; y += 1.0)
			
 
				+		{
			
 
				+			for(F32 z = localLightFirstCell.z; z < localLightEndCell.z; z += 1.0)
			
 
				+			{
			
 
				+				U32 count;
			
 
				+				InterlockedAdd(SBUFF(lightIndexCount, 0), 1, count);
			
 
				+				++count;
			
 
				+
			
 
				+				if(count > consts.m_maxLightIndices)
			
 
				+				{
			
 
				+					// Light index list is too small
			
 
				+					break;
			
 
				+				}
			
 
				+
			
 
				+				const F32 cellIdx = z * consts.m_cellCounts.y * consts.m_cellCounts.x + y * consts.m_cellCounts.x + x;
			
 
				+
			
 
				+				binLightToCellFunc(cellIdx, lightIdx);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// ===========================================================================
			
 
				+// Setup                                                                     =
			
 
				+// ===========================================================================
			
 
				+#if NOT_ZERO(ANKI_TECHNIQUE_Setup)
			
 
				+
			
 
				+RWStructuredBuffer<U32> g_lightIndexCountsPerCell : register(u0);
			
 
				+RWStructuredBuffer<U32> g_lightIndexCount : register(u1);
			
 
				+
			
 
				+ANKI_FAST_CONSTANTS(GpuVisibilityLocalLightsConsts, g_consts)
			
 
				+
			
 
				+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
			
 
				+{
			
 
				+	if(svDispatchThreadId.x == 0)
			
 
				+	{
			
 
				+		SBUFF(g_lightIndexCount, 0) = 0;
			
 
				+	}
			
 
				+
			
 
				+	const U32 elementCount = getStructuredBufferElementCount(g_lightIndexCountsPerCell);
			
 
				+	if(svDispatchThreadId.x < elementCount)
			
 
				+	{
			
 
				+		SBUFF(g_lightIndexCountsPerCell, svDispatchThreadId.x) = 0;
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// ===========================================================================
			
 
				+// Count                                                                     =
			
 
				+// ===========================================================================
			
 
				+
			
 
				+// Counts the light indices per cell
			
 
				+
			
 
				+#if NOT_ZERO(ANKI_TECHNIQUE_Count)
			
 
				+
			
 
				+StructuredBuffer<GpuSceneLight> g_lights : register(t0);
			
 
				+
			
 
				+RWStructuredBuffer<U32> g_lightIndexCountsPerCell : register(u0);
			
 
				+RWStructuredBuffer<U32> g_lightIndexCount : register(u1);
			
 
				+
			
 
				+ANKI_FAST_CONSTANTS(GpuVisibilityLocalLightsConsts, g_consts)
			
 
				+
			
 
				+struct Func
			
 
				+{
			
 
				+	void operator()(U32 cellIdx, U32 lightIdx)
			
 
				+	{
			
 
				+		InterlockedAdd(SBUFF(g_lightIndexCountsPerCell, cellIdx), 1);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
			
 
				+{
			
 
				+	Func func;
			
 
				+	lightVsCellVisibility(g_lights, svDispatchThreadId.x, g_consts, g_lightIndexCount, func);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// ===========================================================================
			
 
				+// PrefixSum                                                                 =
			
 
				+// ===========================================================================
			
 
				+
			
 
				+// Parallel prefix based on: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
			
 
				+// But it runs multiple iterations to support bigger arrays
			
 
				+
			
 
				+#if NOT_ZERO(ANKI_TECHNIQUE_PrefixSum)
			
 
				+
			
 
				+constexpr U32 kThreadCount = 1024; // Common for most GPUs
			
 
				+constexpr U32 kMaxElementCountPerIteration = kThreadCount * 2;
			
 
				+
			
 
				+RWStructuredBuffer<U32> g_inputElements : register(u0); // It's the g_lightIndexCountsPerCell
			
 
				+
			
 
				+RWStructuredBuffer<U32> g_outputElements : register(u1);
			
 
				+
			
 
				+// Some stuff to zero
			
 
				+RWStructuredBuffer<U32> g_lightIndexCount : register(u2);
			
 
				+
			
 
				+ANKI_FAST_CONSTANTS(GpuVisibilityLocalLightsConsts, g_consts)
			
 
				+
			
 
				+groupshared U32 g_tmp[kMaxElementCountPerIteration];
			
 
				+groupshared U32 g_valueSum;
			
 
				+
			
 
				+[numthreads(kThreadCount, 1, 1)] void main(COMPUTE_ARGS)
			
 
				+{
			
 
				+	const U32 elementCount = g_consts.m_cellCounts.x * g_consts.m_cellCounts.y * g_consts.m_cellCounts.z;
			
 
				+	const U32 iterationCount = (elementCount + kMaxElementCountPerIteration - 1) / kMaxElementCountPerIteration;
			
 
				+
			
 
				+	const U32 tid = svGroupIndex;
			
 
				+
			
 
				+	g_valueSum = 0; // No need for barrier, there are plenty bellow
			
 
				+
			
 
				+	for(U32 it = 0; it < iterationCount; ++it)
			
 
				+	{
			
 
				+		GroupMemoryBarrierWithGroupSync(); // Barrier because of the loop
			
 
				+
			
 
				+		const U32 firstElement = it * kMaxElementCountPerIteration;
			
 
				+		const U32 endElement = min((it + 1) * kMaxElementCountPerIteration, elementCount);
			
 
				+
			
 
				+		// load input into shared memory
			
 
				+		const U32 inIdx1 = 2 * tid + firstElement;
			
 
				+		const U32 value1 = (inIdx1 < endElement) ? SBUFF(g_inputElements, inIdx1) : 0;
			
 
				+		g_tmp[2 * tid] = value1;
			
 
				+
			
 
				+		const U32 inIdx2 = 2 * tid + 1 + firstElement;
			
 
				+		const U32 value2 = (inIdx2 < endElement) ? SBUFF(g_inputElements, inIdx2) : 0;
			
 
				+		g_tmp[2 * tid + 1] = value2;
			
 
				+
			
 
				+		// Perform reduction
			
 
				+		U32 offset = 1;
			
 
				+		for(U32 d = kMaxElementCountPerIteration >> 1; d > 0; d >>= 1)
			
 
				+		{
			
 
				+			GroupMemoryBarrierWithGroupSync();
			
 
				+
			
 
				+			if(tid < d)
			
 
				+			{
			
 
				+				const U32 ai = offset * (2 * tid + 1) - 1;
			
 
				+				const U32 bi = offset * (2 * tid + 2) - 1;
			
 
				+				g_tmp[bi] += g_tmp[ai];
			
 
				+			}
			
 
				+
			
 
				+			offset *= 2;
			
 
				+		}
			
 
				+
			
 
				+		// Update the g_valueSum now that enough barriers have happened
			
 
				+		InterlockedAdd(g_valueSum, value1 + value2);
			
 
				+
			
 
				+		// Clear the last element
			
 
				+		if(tid == 0)
			
 
				+		{
			
 
				+			g_tmp[kMaxElementCountPerIteration - 1] = 0;
			
 
				+		}
			
 
				+
			
 
				+		// Perform downsweep and build scan
			
 
				+		for(U32 d = 1; d < kMaxElementCountPerIteration; d *= 2)
			
 
				+		{
			
 
				+			offset >>= 1;
			
 
				+
			
 
				+			GroupMemoryBarrierWithGroupSync();
			
 
				+
			
 
				+			if(tid < d)
			
 
				+			{
			
 
				+				const U32 ai = offset * (2 * tid + 1) - 1;
			
 
				+				const U32 bi = offset * (2 * tid + 2) - 1;
			
 
				+				const U32 t = g_tmp[ai];
			
 
				+				g_tmp[ai] = g_tmp[bi];
			
 
				+				g_tmp[bi] += t;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		GroupMemoryBarrierWithGroupSync();
			
 
				+
			
 
				+		// Write to output buffer
			
 
				+		if(inIdx1 < endElement)
			
 
				+		{
			
 
				+			SBUFF(g_outputElements, inIdx1) = g_tmp[2 * tid] + g_valueSum;
			
 
				+		}
			
 
				+
			
 
				+		if(inIdx2 < endElement)
			
 
				+		{
			
 
				+			SBUFF(g_outputElements, inIdx2) = g_tmp[2 * tid + 1] + g_valueSum;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Abuse this compute job to also reset some buffers
			
 
				+	if(tid == 0)
			
 
				+	{
			
 
				+		SBUFF(g_lightIndexCount, 0) = 0;
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		const U32 elementsPerThread = (elementCount + kThreadCount - 1) / kThreadCount;
			
 
				+
			
 
				+		for(U32 i = 0; i < elementsPerThread; ++i)
			
 
				+		{
			
 
				+			const U32 idx = tid * elementsPerThread + i;
			
 
				+			if(idx >= elementCount)
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			SBUFF(g_inputElements, idx) = 0;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// ===========================================================================
			
 
				+// Fill                                                                      =
			
 
				+// ===========================================================================
			
 
				+
			
 
				+// After the prefix sum is complete this job can store the results
			
 
				+
			
 
				+#if NOT_ZERO(ANKI_TECHNIQUE_Fill)
			
 
				+
			
 
				+StructuredBuffer<GpuSceneLight> g_lights : register(t0);
			
 
				+
			
 
				+StructuredBuffer<U32> g_lightIndexListOffsets : register(t1); // Basically the prefix sum. One per cell
			
 
				+
			
 
				+RWStructuredBuffer<U32> g_lightIndexCount : register(u0);
			
 
				+RWStructuredBuffer<U32> g_lightIndexCountsPerCell : register(u1);
			
 
				+RWStructuredBuffer<U32> g_lightIndexList : register(u2);
			
 
				+
			
 
				+ANKI_FAST_CONSTANTS(GpuVisibilityLocalLightsConsts, g_consts)
			
 
				+
			
 
				+struct Func
			
 
				+{
			
 
				+	void operator()(U32 clusterIdx, U32 lightIdx)
			
 
				+	{
			
 
				+		U32 offset;
			
 
				+		InterlockedAdd(SBUFF(g_lightIndexCountsPerCell, clusterIdx), offset);
			
 
				+
			
 
				+		offset += SBUFF(g_lightIndexListOffsets, clusterIdx);
			
 
				+
			
 
				+		SBUFF(g_lightIndexList, offset) = lightIdx;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
			
 
				+{
			
 
				+	Func func;
			
 
				+	lightVsCellVisibility(g_lights, svDispatchThreadId.x, g_consts, g_lightIndexCount, func);
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/AnKi/Shaders/Include/GpuVisibilityTypes.h
+++ b/AnKi/Shaders/Include/GpuVisibilityTypes.h
@@ -104,4 +104,19 @@ struct GpuVisibilityNonRenderablesCounters
 
				 	U32 m_feedbackObjectCount; ///< Counts the visbile objects that need feedback
			
 
				 };
			
 
				 
			
 
				+struct GpuVisibilityLocalLightsConsts
			
 
				+{
			
 
				+	Vec3 m_cellSize;
			
 
				+	U32 m_maxLightIndices;
			
 
				+
			
 
				+	Vec3 m_gridVolumeMin;
			
 
				+	F32 m_padding2;
			
 
				+
			
 
				+	Vec3 m_gridVolumeMax;
			
 
				+	F32 m_padding3;
			
 
				+
			
 
				+	Vec3 m_cellCounts;
			
 
				+	F32 m_padding4;
			
 
				+};
			
 
				+
			
 
				 ANKI_END_NAMESPACE