Browse Source

Simplify the dependencies of cluster shading data

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
85043cc23d

+ 9 - 20
AnKi/Renderer/ClusterBinning.cpp

@@ -27,17 +27,15 @@ ClusterBinning::~ClusterBinning()
 
 Error ClusterBinning::init()
 {
-	ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinningSetup.ankiprogbin", m_jobSetupProg, m_jobSetupGrProg));
-
-	ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/ClusterBinning.ankiprogbin", m_binningProg));
+	ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinning.ankiprogbin", {{"OBJECT_TYPE", 0}}, m_prog, m_jobSetupGrProg, "Setup"));
 
 	for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
 	{
-		ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinning.ankiprogbin", {{"OBJECT_TYPE", MutatorValue(type)}}, m_binningProg,
-									 m_binningGrProgs[type]));
+		ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinning.ankiprogbin", {{"OBJECT_TYPE", MutatorValue(type)}}, m_prog,
+									 m_binningGrProgs[type], "Binning"));
 
-		ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinningPackVisibles.ankiprogbin", {{"OBJECT_TYPE", MutatorValue(type)}}, m_packingProg,
-									 m_packingGrProgs[type]));
+		ANKI_CHECK(loadShaderProgram("ShaderBinaries/ClusterBinning.ankiprogbin", {{"OBJECT_TYPE", MutatorValue(type)}}, m_prog,
+									 m_packingGrProgs[type], "PackVisibles"));
 	}
 
 	return Error::kNone;
@@ -53,17 +51,15 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 	{
 		const U32 clusterCount = getRenderer().getTileCounts().x() * getRenderer().getTileCounts().y() + getRenderer().getZSplitCount();
 		m_runCtx.m_clustersBuffer = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<Cluster>(clusterCount);
-		m_runCtx.m_clustersHandle = rgraph.importBuffer(m_runCtx.m_clustersBuffer, BufferUsageBit::kNone);
+		m_runCtx.m_dep = rgraph.importBuffer(m_runCtx.m_clustersBuffer, BufferUsageBit::kNone);
 	}
 
 	// Setup the indirect dispatches and zero the clusters buffer
 	BufferView indirectArgsBuff;
-	BufferHandle indirectArgsHandle;
 	{
 		// Allocate memory for the indirect args
 		constexpr U32 dispatchCount = U32(GpuSceneNonRenderableObjectType::kCount) * 2;
 		indirectArgsBuff = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer<DispatchIndirectArgs>(dispatchCount);
-		indirectArgsHandle = rgraph.importBuffer(indirectArgsBuff, BufferUsageBit::kNone);
 
 		// Create the pass
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("Cluster binning setup");
@@ -73,8 +69,7 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 			rpass.newBufferDependency(getRenderer().getPrimaryNonRenderableVisibility().getVisibleIndicesBufferHandle(type),
 									  BufferUsageBit::kSrvCompute);
 		}
-		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kUavCompute);
-		rpass.newBufferDependency(m_runCtx.m_clustersHandle, BufferUsageBit::kCopyDestination);
+		rpass.newBufferDependency(m_runCtx.m_dep, BufferUsageBit::kCopyDestination | BufferUsageBit::kUavCompute);
 
 		rpass.setWork([this, indirectArgsBuff](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
@@ -104,8 +99,7 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 		// Create the pass
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("Cluster binning");
 
-		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kIndirectCompute);
-		rpass.newBufferDependency(m_runCtx.m_clustersHandle, BufferUsageBit::kUavCompute);
+		rpass.newBufferDependency(m_runCtx.m_dep, BufferUsageBit::kUavCompute | BufferUsageBit::kIndirectCompute);
 
 		rpass.setWork([this, &ctx, indirectArgsBuff](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
@@ -210,16 +204,11 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 		{
 			m_runCtx.m_packedObjectsBuffers[type] = GpuVisibleTransientMemoryPool::getSingleton().allocateStructuredBuffer(
 				kMaxVisibleClusteredObjects[type], kClusteredObjectSizes[type]);
-			m_runCtx.m_packedObjectsHandles[type] = rgraph.importBuffer(m_runCtx.m_packedObjectsBuffers[type], BufferUsageBit::kNone);
 		}
 
 		// Create the pass
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("Cluster object packing");
-		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kIndirectCompute);
-		for(GpuSceneNonRenderableObjectType type : EnumIterable<GpuSceneNonRenderableObjectType>())
-		{
-			rpass.newBufferDependency(m_runCtx.m_packedObjectsHandles[type], BufferUsageBit::kUavCompute);
-		}
+		rpass.newBufferDependency(m_runCtx.m_dep, BufferUsageBit::kIndirectCompute | BufferUsageBit::kUavCompute);
 
 		rpass.setWork([this, indirectArgsBuff](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;

+ 5 - 15
AnKi/Renderer/ClusterBinning.h

@@ -30,38 +30,28 @@ public:
 		return m_runCtx.m_packedObjectsBuffers[type];
 	}
 
-	BufferHandle getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType type) const
-	{
-		return m_runCtx.m_packedObjectsHandles[type];
-	}
-
 	const BufferView& getClustersBuffer() const
 	{
 		return m_runCtx.m_clustersBuffer;
 	}
 
-	BufferHandle getClustersBufferHandle() const
+	BufferHandle getDependency() const
 	{
-		return m_runCtx.m_clustersHandle;
+		return m_runCtx.m_dep;
 	}
 
 private:
-	ShaderProgramResourcePtr m_jobSetupProg;
+	ShaderProgramResourcePtr m_prog;
 	ShaderProgramPtr m_jobSetupGrProg;
-
-	ShaderProgramResourcePtr m_binningProg;
 	Array<ShaderProgramPtr, U32(GpuSceneNonRenderableObjectType::kCount)> m_binningGrProgs;
-
-	ShaderProgramResourcePtr m_packingProg;
 	Array<ShaderProgramPtr, U32(GpuSceneNonRenderableObjectType::kCount)> m_packingGrProgs;
 
 	class
 	{
 	public:
-		BufferHandle m_clustersHandle;
-		BufferView m_clustersBuffer;
+		BufferHandle m_dep;
 
-		Array<BufferHandle, U32(GpuSceneNonRenderableObjectType::kCount)> m_packedObjectsHandles;
+		BufferView m_clustersBuffer;
 		Array<BufferView, U32(GpuSceneNonRenderableObjectType::kCount)> m_packedObjectsBuffers;
 
 		RenderingContext* m_rctx = nullptr;

+ 1 - 3
AnKi/Renderer/GBufferPost.cpp

@@ -70,9 +70,7 @@ void GBufferPost::populateRenderGraph(RenderingContext& ctx)
 	rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(1), TextureUsageBit::kAllRtvDsv);
 	rpass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvPixel);
 
-	rpass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvPixel);
-	rpass.newBufferDependency(getRenderer().getClusterBinning().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kDecal),
-							  BufferUsageBit::kSrvPixel);
+	rpass.newBufferDependency(getRenderer().getClusterBinning().getDependency(), BufferUsageBit::kSrvPixel);
 }
 
 } // end namespace anki

+ 1 - 3
AnKi/Renderer/LightShading.cpp

@@ -260,9 +260,7 @@ void LightShading::populateRenderGraph(RenderingContext& ctx)
 	pass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), readUsage);
 	pass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvPixel | TextureUsageBit::kRtvDsvRead);
 	pass.newTextureDependency(getRenderer().getShadowmapsResolve().getRt(), readUsage);
-	pass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvPixel);
-	pass.newBufferDependency(getRenderer().getClusterBinning().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kLight),
-							 BufferUsageBit::kSrvPixel);
+	pass.newBufferDependency(getRenderer().getClusterBinning().getDependency(), BufferUsageBit::kSrvPixel);
 	pass.newTextureDependency(getRenderer().getSsao().getRt(), readUsage);
 	pass.newTextureDependency(getRenderer().getReflections().getRt(), readUsage);
 

+ 2 - 2
AnKi/Renderer/Reflections.cpp

@@ -215,7 +215,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(getRenderer().getGBuffer().getColorRt(2), TextureUsageBit::kSrvCompute);
 		rpass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
 		rpass.newTextureDependency(getRenderer().getBloom().getPyramidRt(), TextureUsageBit::kSrvCompute);
-		rpass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvCompute);
+		rpass.newBufferDependency(getRenderer().getClusterBinning().getDependency(), BufferUsageBit::kSrvCompute);
 		rpass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvCompute);
 		rpass.newTextureDependency(classTileMapRt, TextureUsageBit::kSrvCompute);
 
@@ -397,7 +397,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 		NonGraphicsRenderPass& rpass = rgraph.newNonGraphicsRenderPass("ReflectionProbeFallback");
 
 		rpass.newTextureDependency(getRenderer().getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
-		rpass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvCompute);
+		rpass.newBufferDependency(getRenderer().getClusterBinning().getDependency(), BufferUsageBit::kSrvCompute);
 		rpass.newBufferDependency(indirectArgsHandle, BufferUsageBit::kIndirectCompute);
 		rpass.newTextureDependency(transientRt1, TextureUsageBit::kUavCompute);
 		rpass.newTextureDependency(hitPosAndDepthRt, TextureUsageBit::kUavCompute);

+ 1 - 1
AnKi/Renderer/RtShadows.cpp

@@ -275,7 +275,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 		rpass.newTextureDependency(m_runCtx.m_prevMomentsRt, TextureUsageBit::kSrvTraceRays);
 		rpass.newTextureDependency(m_runCtx.m_currentMomentsRt, TextureUsageBit::kUavTraceRays);
 
-		rpass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvTraceRays);
+		rpass.newBufferDependency(getRenderer().getClusterBinning().getDependency(), BufferUsageBit::kSrvTraceRays);
 
 		rpass.setWork([this, sbtBuffer, &ctx](RenderPassWorkContext& rgraphCtx) {
 			ANKI_TRACE_SCOPED_EVENT(RtShadows);

+ 2 - 6
AnKi/Renderer/ShadowmapsResolve.cpp

@@ -57,9 +57,7 @@ void ShadowmapsResolve::populateRenderGraph(RenderingContext& ctx)
 								   TextureUsageBit::kSrvCompute);
 		rpass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvCompute);
 
-		rpass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvCompute);
-		rpass.newBufferDependency(getRenderer().getClusterBinning().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kLight),
-								  BufferUsageBit::kSrvCompute);
+		rpass.newBufferDependency(getRenderer().getClusterBinning().getDependency(), BufferUsageBit::kSrvCompute);
 
 		if(getRenderer().getRtShadowsEnabled())
 		{
@@ -81,9 +79,7 @@ void ShadowmapsResolve::populateRenderGraph(RenderingContext& ctx)
 								   TextureUsageBit::kSrvPixel);
 		rpass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvPixel);
 
-		rpass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvPixel);
-		rpass.newBufferDependency(getRenderer().getClusterBinning().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kLight),
-								  BufferUsageBit::kSrvPixel);
+		rpass.newBufferDependency(getRenderer().getClusterBinning().getDependency(), BufferUsageBit::kSrvPixel);
 
 		if(getRenderer().getRtShadowsEnabled())
 		{

+ 1 - 8
AnKi/Renderer/VolumetricLightingAccumulation.cpp

@@ -66,14 +66,7 @@ void VolumetricLightingAccumulation::populateRenderGraph(RenderingContext& ctx)
 	pass.newTextureDependency(m_runCtx.m_rts[1], TextureUsageBit::kUavCompute);
 	pass.newTextureDependency(getRenderer().getShadowMapping().getShadowmapRt(), TextureUsageBit::kSrvCompute);
 
-	pass.newBufferDependency(getRenderer().getClusterBinning().getClustersBufferHandle(), BufferUsageBit::kSrvCompute);
-	pass.newBufferDependency(getRenderer().getClusterBinning().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kLight),
-							 BufferUsageBit::kSrvCompute);
-	pass.newBufferDependency(
-		getRenderer().getClusterBinning().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe),
-		BufferUsageBit::kSrvCompute);
-	pass.newBufferDependency(getRenderer().getClusterBinning().getPackedObjectsBufferHandle(GpuSceneNonRenderableObjectType::kFogDensityVolume),
-							 BufferUsageBit::kSrvCompute);
+	pass.newBufferDependency(getRenderer().getClusterBinning().getDependency(), BufferUsageBit::kSrvCompute);
 
 	if(getRenderer().getIndirectDiffuseProbes().hasCurrentlyRefreshedVolumeRt())
 	{

+ 178 - 36
AnKi/Shaders/ClusterBinning.ankiprog

@@ -7,24 +7,95 @@
 
 #pragma anki mutator OBJECT_TYPE 0 1 2 3 4 // Same as GpuSceneNonRenderableObjectType
 
-#pragma anki technique comp
+#pragma anki technique Setup comp mutators
+#pragma anki technique Binning comp
+#pragma anki technique PackVisibles comp
 
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
-#if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
+// ===========================================================================
+// Setup                                                                     =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_Setup)
+
+StructuredBuffer<U32> g_visibleIndices[(U32)GpuSceneNonRenderableObjectType::kCount] : register(t0);
+
+// This has a size of 2*GpuSceneNonRenderableObjectType::kCount. The first GpuSceneNonRenderableObjectType::kCount elements are for the cluster
+// binning dispatches and the rest GpuSceneNonRenderableObjectType::kCount for the packing dispatches
+RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u0);
+
+struct Constants
+{
+	U32 m_tileCount;
+	U32 m_padding1;
+	U32 m_padding2;
+	U32 m_padding3;
+};
+ANKI_FAST_CONSTANTS(Constants, g_consts)
+
+constexpr U32 kSampleCount = 8;
+constexpr U32 kClusterBinningThreadgroupSize = 64;
+constexpr U32 kPackVisiblesThreadgroupSize = 64;
+
+#	define THREADGROUP_SIZE 16
+
+[numthreads(THREADGROUP_SIZE, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
+{
+	if(svDispatchThreadId < (U32)GpuSceneNonRenderableObjectType::kCount)
+	{
+		// First threads set the dispatch args of cluster binning
+
+		const GpuSceneNonRenderableObjectType type = (GpuSceneNonRenderableObjectType)svDispatchThreadId;
+		const U32 objCount = min(kMaxVisibleClusteredObjects[(U32)type], g_visibleIndices[(U32)type][0]);
+
+		DispatchIndirectArgs args;
+		args.m_threadGroupCountX = (g_consts.m_tileCount * kSampleCount + kClusterBinningThreadgroupSize - 1) / kClusterBinningThreadgroupSize;
+		args.m_threadGroupCountY = objCount;
+		args.m_threadGroupCountZ = 1;
+
+		g_indirectArgs[svDispatchThreadId] = args;
+	}
+	else if(svDispatchThreadId < (U32)GpuSceneNonRenderableObjectType::kCount * 2)
+	{
+		// Next threads set the dispatch args of packing
+
+		const GpuSceneNonRenderableObjectType type =
+			(GpuSceneNonRenderableObjectType)(svDispatchThreadId - (U32)GpuSceneNonRenderableObjectType::kCount);
+		const U32 objCount = min(kMaxVisibleClusteredObjects[(U32)type], g_visibleIndices[(U32)type][0]);
+
+		DispatchIndirectArgs args;
+		args.m_threadGroupCountX = (objCount + kPackVisiblesThreadgroupSize - 1) / kPackVisiblesThreadgroupSize;
+		args.m_threadGroupCountY = 1;
+		args.m_threadGroupCountZ = 1;
+
+		g_indirectArgs[svDispatchThreadId] = args;
+	}
+	else
+	{
+		// Skip remaining threads
+	}
+}
+#endif
+
+// ===========================================================================
+// Binning                                                                   =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_Binning)
+
+#	if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 typedef GpuSceneLight GpuSceneType;
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
 typedef GpuSceneDecal GpuSceneType;
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
 typedef GpuSceneFogDensityVolume GpuSceneType;
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
 typedef GpuSceneReflectionProbe GpuSceneType;
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
 typedef GpuSceneGlobalIlluminationProbe GpuSceneType;
-#else
-#	error See file
-#endif
+#	else
+#		error See file
+#	endif
 
 struct ClusterBinningConstants
 {
@@ -51,14 +122,14 @@ StructuredBuffer<GpuSceneType> g_objects : register(t1);
 
 RWStructuredBuffer<Cluster> g_clusters : register(u0);
 
-#define THREADGROUP_SIZE 64
+#	define THREADGROUP_SIZE 64
 
 // ALMOST like DX Sample locations (https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_standard_multisample_quality_levels)
 constexpr U32 kSampleCount = 8u;
-#define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(kClusteredShadingTileSize))
+#	define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(kClusteredShadingTileSize))
 constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1, 3),  LOCATION(5, 1), LOCATION(-6, -6),
 												  LOCATION(-6, 6), LOCATION(-7, -1), LOCATION(6, 7), LOCATION(7, -7)};
-#undef LOCATION
+#	undef LOCATION
 
 [numthreads(THREADGROUP_SIZE, 1, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
@@ -88,7 +159,7 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 	Bool collides;
 	const GpuSceneType obj = g_objects[g_visibleObjectIds[visibleObjectIdx + 1]];
 
-#if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
+#	if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 	if((U32)obj.m_flags & (U32)GpuSceneLightFlag::kPointLight)
 	{
 		collides = testRaySphere(rayOrigin, rayDir, obj.m_position, obj.m_radius, t0, t1);
@@ -131,9 +202,9 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 
 		collides = (hits != 0u);
 	}
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
 	collides = testRaySphere(rayOrigin, rayDir, obj.m_sphereCenter, obj.m_sphereRadius, t0, t1);
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
 	if(obj.m_isBox != 0u)
 	{
 		collides = testRayAabb(rayOrigin, rayDir, obj.m_aabbMinOrSphereCenter, obj.m_aabbMaxOrSphereRadius, t0, t1);
@@ -142,12 +213,12 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 	{
 		collides = testRaySphere(rayOrigin, rayDir, obj.m_aabbMinOrSphereCenter, obj.m_aabbMaxOrSphereRadius.x, t0, t1);
 	}
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE \
-	|| OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE \
+		|| OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
 	collides = testRayAabb(rayOrigin, rayDir, obj.m_aabbMin, obj.m_aabbMax, t0, t1);
-#else
-#	error See file
-#endif
+#	else
+#		error See file
+#	endif
 
 	// Update the masks
 	if(collides)
@@ -157,7 +228,7 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 		ANKI_MAYBE_UNUSED(maskArrayIdx);
 
 		// Set the tile
-#if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
+#	if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 		if((U32)obj.m_flags & (U32)GpuSceneLightFlag::kPointLight)
 		{
 			InterlockedOr(g_clusters[tileIdx].m_pointLightsMask[maskArrayIdx], mask);
@@ -166,17 +237,17 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 		{
 			InterlockedOr(g_clusters[tileIdx].m_spotLightsMask[maskArrayIdx], mask);
 		}
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
 		InterlockedOr(g_clusters[tileIdx].m_decalsMask[maskArrayIdx], mask);
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
 		InterlockedOr(g_clusters[tileIdx].m_fogDensityVolumesMask, mask);
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
 		InterlockedOr(g_clusters[tileIdx].m_reflectionProbesMask, mask);
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
 		InterlockedOr(g_clusters[tileIdx].m_giProbesMask, mask);
-#else
-#	error See file
-#endif
+#	else
+#		error See file
+#	endif
 
 		// Compute and set the Z splits
 		const Vec3 hitpointA = rayDir * t0 + rayOrigin;
@@ -201,7 +272,7 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 		const I32 endZSplit = clamp(I32(maxDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0, g_consts.m_zSplitCountMinusOne);
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		{
-#if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
+#	if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 			if((U32)obj.m_flags & (U32)GpuSceneLightFlag::kPointLight)
 			{
 				InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_pointLightsMask[maskArrayIdx], mask);
@@ -210,17 +281,88 @@ constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1,
 			{
 				InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_spotLightsMask[maskArrayIdx], mask);
 			}
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
 			InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_decalsMask[maskArrayIdx], mask);
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
 			InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_fogDensityVolumesMask, mask);
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
 			InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_reflectionProbesMask, mask);
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
 			InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_giProbesMask, mask);
-#else
-#	error See file
-#endif
+#	else
+#		error See file
+#	endif
 		}
 	}
 }
+#endif
+
+// ===========================================================================
+// PackVisibles                                                              =
+// ===========================================================================
+#if NOT_ZERO(ANKI_TECHNIQUE_PackVisibles)
+
+#	if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
+typedef LightUnion ClusteredType;
+typedef GpuSceneLight GpuSceneType;
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
+typedef Decal ClusteredType;
+typedef GpuSceneDecal GpuSceneType;
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
+typedef FogDensityVolume ClusteredType;
+typedef GpuSceneFogDensityVolume GpuSceneType;
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
+typedef ReflectionProbe ClusteredType;
+typedef GpuSceneReflectionProbe GpuSceneType;
+#	elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
+typedef GlobalIlluminationProbe ClusteredType;
+typedef GpuSceneGlobalIlluminationProbe GpuSceneType;
+#	else
+#		error See file
+#	endif
+
+StructuredBuffer<GpuSceneType> g_inBuffer : register(t0);
+RWStructuredBuffer<ClusteredType> g_outBuffer : register(u0);
+StructuredBuffer<U32> g_visibles : register(t1);
+
+#	define THREAD_GROUP_SIZE 64
+
+[numthreads(THREAD_GROUP_SIZE, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
+{
+	const U32 visibleObjCount = min(g_visibles[0], kMaxVisibleClusteredObjects[OBJECT_TYPE]);
+	const U32 idxOut = svDispatchThreadId.x;
+	if(idxOut >= visibleObjCount)
+	{
+		return;
+	}
+
+#	if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
+	const GpuSceneLight input = g_inBuffer[g_visibles[idxOut + 1]];
+
+	const Bool isPoint = ((U32)input.m_flags & (U32)GpuSceneLightFlag::kPointLight) ? true : false;
+
+	LightUnion output = (LightUnion)0;
+	output.m_position = input.m_position;
+	output.m_radius = input.m_radius;
+
+	output.m_diffuseColor = input.m_diffuseColor;
+	output.m_lightType = (isPoint) ? 0 : 1;
+
+	output.m_shadow = ((U32)input.m_flags & (U32)GpuSceneLightFlag::kShadow) ? 1 : 0;
+	output.m_innerCos = input.m_innerCos;
+	output.m_outerCos = input.m_outerCos;
+
+	output.m_direction = input.m_direction;
+	output.m_shadowAtlasTileScale = input.m_spotLightMatrixOrPointLightUvViewports[0].z; // Scale should be the same for all
+
+	for(U32 i = 0; i < 6; ++i)
+	{
+		output.m_spotLightMatrixOrPointLightUvViewports[i] = input.m_spotLightMatrixOrPointLightUvViewports[i];
+	}
+
+	g_outBuffer[idxOut] = output;
+#	else
+	g_outBuffer[idxOut] = g_inBuffer[g_visibles[idxOut + 1]];
+#	endif
+}
+#endif

+ 0 - 75
AnKi/Shaders/ClusterBinningPackVisibles.ankiprog

@@ -1,75 +0,0 @@
-// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#pragma anki mutator OBJECT_TYPE 0 1 2 3 4 // Same as GpuSceneNonRenderableObjectType
-
-#pragma anki technique comp
-
-#include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
-#include <AnKi/Shaders/Include/GpuSceneTypes.h>
-
-#if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
-typedef LightUnion ClusteredType;
-typedef GpuSceneLight GpuSceneType;
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
-typedef Decal ClusteredType;
-typedef GpuSceneDecal GpuSceneType;
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
-typedef FogDensityVolume ClusteredType;
-typedef GpuSceneFogDensityVolume GpuSceneType;
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
-typedef ReflectionProbe ClusteredType;
-typedef GpuSceneReflectionProbe GpuSceneType;
-#elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
-typedef GlobalIlluminationProbe ClusteredType;
-typedef GpuSceneGlobalIlluminationProbe GpuSceneType;
-#else
-#	error See file
-#endif
-
-StructuredBuffer<GpuSceneType> g_inBuffer : register(t0);
-RWStructuredBuffer<ClusteredType> g_outBuffer : register(u0);
-StructuredBuffer<U32> g_visibles : register(t1);
-
-#define THREAD_GROUP_SIZE 64
-
-[numthreads(THREAD_GROUP_SIZE, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
-{
-	const U32 visibleObjCount = min(g_visibles[0], kMaxVisibleClusteredObjects[OBJECT_TYPE]);
-	const U32 idxOut = svDispatchThreadId.x;
-	if(idxOut >= visibleObjCount)
-	{
-		return;
-	}
-
-#if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
-	const GpuSceneLight input = g_inBuffer[g_visibles[idxOut + 1]];
-
-	const Bool isPoint = ((U32)input.m_flags & (U32)GpuSceneLightFlag::kPointLight) ? true : false;
-
-	LightUnion output = (LightUnion)0;
-	output.m_position = input.m_position;
-	output.m_radius = input.m_radius;
-
-	output.m_diffuseColor = input.m_diffuseColor;
-	output.m_lightType = (isPoint) ? 0 : 1;
-
-	output.m_shadow = ((U32)input.m_flags & (U32)GpuSceneLightFlag::kShadow) ? 1 : 0;
-	output.m_innerCos = input.m_innerCos;
-	output.m_outerCos = input.m_outerCos;
-
-	output.m_direction = input.m_direction;
-	output.m_shadowAtlasTileScale = input.m_spotLightMatrixOrPointLightUvViewports[0].z; // Scale should be the same for all
-
-	for(U32 i = 0; i < 6; ++i)
-	{
-		output.m_spotLightMatrixOrPointLightUvViewports[i] = input.m_spotLightMatrixOrPointLightUvViewports[i];
-	}
-
-	g_outBuffer[idxOut] = output;
-#else
-	g_outBuffer[idxOut] = g_inBuffer[g_visibles[idxOut + 1]];
-#endif
-}

+ 0 - 69
AnKi/Shaders/ClusterBinningSetup.ankiprog

@@ -1,69 +0,0 @@
-// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-// This shader prepares the indirect args of future dispatches
-
-#pragma anki technique comp
-
-#include <AnKi/Shaders/Common.hlsl>
-#include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
-
-StructuredBuffer<U32> g_visibleIndices[(U32)GpuSceneNonRenderableObjectType::kCount] : register(t0);
-
-// This has a size of 2*GpuSceneNonRenderableObjectType::kCount. The first GpuSceneNonRenderableObjectType::kCount elements are for the cluster
-// binning dispatches and the rest GpuSceneNonRenderableObjectType::kCount for the packing dispatches
-RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u0);
-
-struct Constants
-{
-	U32 m_tileCount;
-	U32 m_padding1;
-	U32 m_padding2;
-	U32 m_padding3;
-};
-ANKI_FAST_CONSTANTS(Constants, g_consts)
-
-constexpr U32 kSampleCount = 8;
-constexpr U32 kClusterBinningThreadgroupSize = 64;
-constexpr U32 kPackVisiblesThreadgroupSize = 64;
-
-#define THREADGROUP_SIZE 16
-
-[numthreads(THREADGROUP_SIZE, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
-{
-	if(svDispatchThreadId < (U32)GpuSceneNonRenderableObjectType::kCount)
-	{
-		// First threads set the dispatch args of cluster binning
-
-		const GpuSceneNonRenderableObjectType type = (GpuSceneNonRenderableObjectType)svDispatchThreadId;
-		const U32 objCount = min(kMaxVisibleClusteredObjects[(U32)type], g_visibleIndices[(U32)type][0]);
-
-		DispatchIndirectArgs args;
-		args.m_threadGroupCountX = (g_consts.m_tileCount * kSampleCount + kClusterBinningThreadgroupSize - 1) / kClusterBinningThreadgroupSize;
-		args.m_threadGroupCountY = objCount;
-		args.m_threadGroupCountZ = 1;
-
-		g_indirectArgs[svDispatchThreadId] = args;
-	}
-	else if(svDispatchThreadId < (U32)GpuSceneNonRenderableObjectType::kCount * 2)
-	{
-		// Next threads set the dispatch args of packing
-
-		const GpuSceneNonRenderableObjectType type =
-			(GpuSceneNonRenderableObjectType)(svDispatchThreadId - (U32)GpuSceneNonRenderableObjectType::kCount);
-		const U32 objCount = min(kMaxVisibleClusteredObjects[(U32)type], g_visibleIndices[(U32)type][0]);
-
-		DispatchIndirectArgs args;
-		args.m_threadGroupCountX = (objCount + kPackVisiblesThreadgroupSize - 1) / kPackVisiblesThreadgroupSize;
-		args.m_threadGroupCountY = 1;
-		args.m_threadGroupCountZ = 1;
-
-		g_indirectArgs[svDispatchThreadId] = args;
-	}
-	else
-	{
-		// Skip remaining threads
-	}
-}