Browse Source

Add the C++ code of the cluster binning

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
48ccf6fa38

+ 1 - 1
AnKi/Gr/Vulkan/BufferImpl.cpp

@@ -102,7 +102,7 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 																			 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
 																			 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
 		}
 		}
 	}
 	}
-	else if((access & BufferMapAccessBit::READ) != BufferMapAccessBit::NONE)
+	else if(!!(access & BufferMapAccessBit::READ))
 	{
 	{
 		// Read or read/write
 		// Read or read/write
 
 

+ 83 - 0
AnKi/Renderer/ClusterBinning.cpp

@@ -4,3 +4,86 @@
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
 #include <AnKi/Renderer/ClusterBinning.h>
 #include <AnKi/Renderer/ClusterBinning.h>
+#include <AnKi/Renderer/Renderer.h>
+#include <AnKi/Renderer/RenderQueue.h>
+#include <AnKi/Core/ConfigSet.h>
+
+namespace anki
+{
+
+ClusterBinning::ClusterBinning(Renderer* r)
+	: RendererObject(r)
+{
+}
+
+ClusterBinning::~ClusterBinning()
+{
+}
+
+Error ClusterBinning::init(const ConfigSet& config)
+{
+	ANKI_R_LOGI("Initializing clusterer binning");
+
+	ANKI_CHECK(getResourceManager().loadResource("Shaders/ClusterBinning.ankiprog", m_prog));
+
+	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
+	variantInitInfo.addConstant("TILE_SIZE", m_r->getTileSize());
+	variantInitInfo.addConstant("TILE_COUNT_X", m_r->getTileCounts().x());
+	variantInitInfo.addConstant("TILE_COUNT_Y", m_r->getTileCounts().y());
+	variantInitInfo.addConstant("Z_SPLIT_COUNT", m_r->getZSplitCount());
+	variantInitInfo.addConstant("RENDERING_SIZE", UVec2(m_r->getWidth(), m_r->getHeight()));
+
+	const ShaderProgramResourceVariant* variant;
+	m_prog->getOrCreateVariant(variantInitInfo, variant);
+	m_grProg = variant->getProgram();
+
+	m_clusterCount = m_r->getTileCounts().x() * m_r->getTileCounts().y() * m_r->getZSplitCount();
+
+	return Error::NONE;
+}
+
+void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
+{
+	m_runCtx.m_ctx = &ctx;
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Cluster Binning");
+
+	const RenderQueue& rqueue = *m_runCtx.m_ctx->m_renderQueue;
+	if(ANKI_LIKELY(rqueue.m_pointLights.getSize() || rqueue.m_spotLights.getSize() || rqueue.m_decals.getSize()
+				   || rqueue.m_reflectionProbes.getSize() || rqueue.m_fogDensityVolumes.getSize()
+				   || rqueue.m_giProbes.getSize()))
+	{
+		pass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				static_cast<ClusterBinning*>(rgraphCtx.m_userData)->run(rgraphCtx);
+			},
+			this, 0);
+	}
+
+	// Allocate clusters. Store to a 8byte aligned ptr. Maybe that will trick the compiler to memset faster
+	U64* clusters = static_cast<U64*>(m_r->getStagingGpuMemoryManager().allocateFrame(
+		sizeof(Cluster) * m_clusterCount, StagingGpuMemoryType::STORAGE, ctx.m_clustererGpuObjects.m_clusterersToken));
+
+	// Zero the memory because atomics will happen
+	memset(clusters, 0, sizeof(Cluster) * m_clusterCount);
+}
+
+void ClusterBinning::run(RenderPassWorkContext& rgraphCtx)
+{
+	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+
+	const ClustererGpuObjects& tokens = m_runCtx.m_ctx->m_clustererGpuObjects;
+
+	cmdb->bindShaderProgram(m_grProg);
+	bindUniforms(cmdb, 0, 0, tokens.m_lightingUniformsToken);
+	bindStorage(cmdb, 0, 1, tokens.m_clusterersToken);
+	bindStorage(cmdb, 0, 2, tokens.m_pointLightsToken);
+
+	const U32 sampleCount = 8;
+	const U32 sizex = m_clusterCount * sampleCount;
+	const RenderQueue& rqueue = *m_runCtx.m_ctx->m_renderQueue;
+	U32 clusterObjectCounts = min(MAX_VISIBLE_POINT_LIGHTS, rqueue.m_pointLights.getSize());
+	cmdb->dispatchCompute((sizex - 64 - 1) / 64, 1, clusterObjectCounts);
+}
+
+} // end namespace anki

+ 12 - 1
AnKi/Renderer/ClusterBinning.h

@@ -27,7 +27,18 @@ public:
 	void populateRenderGraph(RenderingContext& ctx);
 	void populateRenderGraph(RenderingContext& ctx);
 
 
 private:
 private:
-	void run(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
+	ShaderProgramResourcePtr m_prog;
+	ShaderProgramPtr m_grProg;
+
+	U32 m_clusterCount = 0;
+
+	class
+	{
+	public:
+		const RenderingContext* m_ctx = nullptr;
+	} m_runCtx;
+
+	void run(RenderPassWorkContext& rgraphCtx);
 };
 };
 /// @}
 /// @}
 
 

+ 2 - 2
AnKi/Renderer/Common.h

@@ -49,6 +49,7 @@ class ShadowmapsResolve;
 class RtShadows;
 class RtShadows;
 class AccelerationStructureBuilder;
 class AccelerationStructureBuilder;
 class MotionVectors;
 class MotionVectors;
+class ClusterBinning;
 
 
 class DebugDrawer;
 class DebugDrawer;
 
 
@@ -145,9 +146,8 @@ public:
 	StagingGpuMemoryToken m_decalsToken;
 	StagingGpuMemoryToken m_decalsToken;
 	StagingGpuMemoryToken m_fogDensityVolumesToken;
 	StagingGpuMemoryToken m_fogDensityVolumesToken;
 	StagingGpuMemoryToken m_globalIlluminationProbesToken;
 	StagingGpuMemoryToken m_globalIlluminationProbesToken;
-	StagingGpuMemoryToken m_tilesToken;
-	StagingGpuMemoryToken m_zSplitsToken;
 	StagingGpuMemoryToken m_lightingUniformsToken;
 	StagingGpuMemoryToken m_lightingUniformsToken;
+	StagingGpuMemoryToken m_clusterersToken;
 
 
 	TextureViewPtr m_diffuseDecalTextureView;
 	TextureViewPtr m_diffuseDecalTextureView;
 	TextureViewPtr m_specularRoughnessDecalTextureView;
 	TextureViewPtr m_specularRoughnessDecalTextureView;

+ 16 - 4
AnKi/Renderer/Renderer.cpp

@@ -10,6 +10,8 @@
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Util/HighRezTimer.h>
 #include <AnKi/Util/HighRezTimer.h>
 #include <AnKi/Collision/Aabb.h>
 #include <AnKi/Collision/Aabb.h>
+#include <AnKi/Collision/Plane.h>
+#include <AnKi/Collision/Functions.h>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 
 
 #include <AnKi/Renderer/ProbeReflections.h>
 #include <AnKi/Renderer/ProbeReflections.h>
@@ -38,6 +40,7 @@
 #include <AnKi/Renderer/RtShadows.h>
 #include <AnKi/Renderer/RtShadows.h>
 #include <AnKi/Renderer/AccelerationStructureBuilder.h>
 #include <AnKi/Renderer/AccelerationStructureBuilder.h>
 #include <AnKi/Renderer/MotionVectors.h>
 #include <AnKi/Renderer/MotionVectors.h>
+#include <AnKi/Renderer/ClusterBinning.h>
 
 
 namespace anki
 namespace anki
 {
 {
@@ -81,13 +84,13 @@ Error Renderer::init(ThreadHive* hive, ResourceManager* resources, GrManager* gl
 
 
 Error Renderer::initInternal(const ConfigSet& config)
 Error Renderer::initInternal(const ConfigSet& config)
 {
 {
+	m_frameCount = 0;
+
 	// Set from the config
 	// Set from the config
 	m_width = config.getNumberU32("width");
 	m_width = config.getNumberU32("width");
 	m_height = config.getNumberU32("height");
 	m_height = config.getNumberU32("height");
 	ANKI_R_LOGI("Initializing offscreen renderer. Size %ux%u", m_width, m_height);
 	ANKI_R_LOGI("Initializing offscreen renderer. Size %ux%u", m_width, m_height);
 
 
-	m_frameCount = 0;
-
 	m_clusterCount[0] = config.getNumberU32("r_clusterSizeX");
 	m_clusterCount[0] = config.getNumberU32("r_clusterSizeX");
 	m_clusterCount[1] = config.getNumberU32("r_clusterSizeY");
 	m_clusterCount[1] = config.getNumberU32("r_clusterSizeY");
 	m_clusterCount[2] = config.getNumberU32("r_clusterSizeZ");
 	m_clusterCount[2] = config.getNumberU32("r_clusterSizeZ");
@@ -96,6 +99,8 @@ Error Renderer::initInternal(const ConfigSet& config)
 	m_clusterBin.init(m_alloc, m_clusterCount[0], m_clusterCount[1], m_clusterCount[2], config);
 	m_clusterBin.init(m_alloc, m_clusterCount[0], m_clusterCount[1], m_clusterCount[2], config);
 
 
 	m_tileSize = config.getNumberU32("r_tileSize");
 	m_tileSize = config.getNumberU32("r_tileSize");
+	m_tileCounts.x() = (m_width + m_tileSize - 1) / m_tileSize;
+	m_tileCounts.y() = (m_height + m_tileSize - 1) / m_tileSize;
 	m_zSplitCount = config.getNumberU32("r_zSplitCount");
 	m_zSplitCount = config.getNumberU32("r_zSplitCount");
 
 
 	// A few sanity checks
 	// A few sanity checks
@@ -212,6 +217,9 @@ Error Renderer::initInternal(const ConfigSet& config)
 	m_motionVectors.reset(m_alloc.newInstance<MotionVectors>(this));
 	m_motionVectors.reset(m_alloc.newInstance<MotionVectors>(this));
 	ANKI_CHECK(m_motionVectors->init(config));
 	ANKI_CHECK(m_motionVectors->init(config));
 
 
+	m_clusterBinning.reset(m_alloc.newInstance<ClusterBinning>(this));
+	ANKI_CHECK(m_clusterBinning->init(config));
+
 	// Init samplers
 	// Init samplers
 	{
 	{
 		SamplerInitInfo sinit("Renderer");
 		SamplerInitInfo sinit("Renderer");
@@ -286,6 +294,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	ctx.m_matrices.m_projectionJitter = ctx.m_matrices.m_jitter * ctx.m_matrices.m_projection;
 	ctx.m_matrices.m_projectionJitter = ctx.m_matrices.m_jitter * ctx.m_matrices.m_projection;
 	ctx.m_matrices.m_viewProjectionJitter = ctx.m_matrices.m_projectionJitter * ctx.m_matrices.m_view;
 	ctx.m_matrices.m_viewProjectionJitter = ctx.m_matrices.m_projectionJitter * ctx.m_matrices.m_view;
 	ctx.m_matrices.m_invertedViewProjectionJitter = ctx.m_matrices.m_viewProjectionJitter.getInverse();
 	ctx.m_matrices.m_invertedViewProjectionJitter = ctx.m_matrices.m_viewProjectionJitter.getInverse();
+	ctx.m_matrices.m_invertedViewProjection = ctx.m_matrices.m_viewProjection.getInverse();
 
 
 	ctx.m_matrices.m_unprojectionParameters = ctx.m_matrices.m_projection.extractPerspectiveUnprojectionParams();
 	ctx.m_matrices.m_unprojectionParameters = ctx.m_matrices.m_projection.extractPerspectiveUnprojectionParams();
 
 
@@ -724,12 +733,15 @@ void Renderer::writeClustererBuffersTask(RenderingContext& ctx)
 		unis->m_time = F32(HighRezTimer::getCurrentTime());
 		unis->m_time = F32(HighRezTimer::getCurrentTime());
 		unis->m_frame = m_frameCount & MAX_U32;
 		unis->m_frame = m_frameCount & MAX_U32;
 
 
+		Plane nearPlane;
+		extractClipPlane(rqueue.m_viewProjectionMatrix, FrustumPlaneType::NEAR, nearPlane);
+		unis->m_nearPlaneWSpace = Vec4(nearPlane.getNormal().xyz(), nearPlane.getOffset());
 		unis->m_near = rqueue.m_cameraNear;
 		unis->m_near = rqueue.m_cameraNear;
 		unis->m_far = rqueue.m_cameraFar;
 		unis->m_far = rqueue.m_cameraFar;
+		unis->m_oneOverFrustumLength = 1.0f / (rqueue.m_cameraFar - rqueue.m_cameraNear);
 		unis->m_cameraPosition = rqueue.m_cameraTransform.getTranslationPart().xyz();
 		unis->m_cameraPosition = rqueue.m_cameraTransform.getTranslationPart().xyz();
 
 
-		unis->m_tileCounts.x() = (m_width + m_tileSize - 1) / m_tileSize;
-		unis->m_tileCounts.y() = (m_height + m_tileSize - 1) / m_tileSize;
+		unis->m_tileCounts = m_tileCounts;
 		unis->m_zSplitCount = m_zSplitCount;
 		unis->m_zSplitCount = m_zSplitCount;
 		unis->m_lightVolumeLastCluster = m_volLighting->getFinalClusterInZ();
 		unis->m_lightVolumeLastCluster = m_volLighting->getFinalClusterInZ();
 
 

+ 19 - 2
AnKi/Renderer/Renderer.h

@@ -321,6 +321,21 @@ public:
 		return *m_threadHive;
 		return *m_threadHive;
 	}
 	}
 
 
+	U32 getTileSize() const
+	{
+		return m_tileSize;
+	}
+
+	const UVec2& getTileCounts() const
+	{
+		return m_tileCounts;
+	}
+
+	U32 getZSplitCount() const
+	{
+		return m_zSplitCount;
+	}
+
 	/// @name Debug render targets
 	/// @name Debug render targets
 	/// @{
 	/// @{
 
 
@@ -378,12 +393,14 @@ private:
 	UniquePtr<AccelerationStructureBuilder> m_accelerationStructureBuilder;
 	UniquePtr<AccelerationStructureBuilder> m_accelerationStructureBuilder;
 	UniquePtr<RtShadows> m_rtShadows;
 	UniquePtr<RtShadows> m_rtShadows;
 	UniquePtr<MotionVectors> m_motionVectors;
 	UniquePtr<MotionVectors> m_motionVectors;
+	UniquePtr<ClusterBinning> m_clusterBinning;
 	/// @}
 	/// @}
 
 
 	Array<U32, 4> m_clusterCount;
 	Array<U32, 4> m_clusterCount;
 	ClusterBin m_clusterBin;
 	ClusterBin m_clusterBin;
-	U32 m_tileSize = 64;
-	U32 m_zSplitCount = 1;
+	U32 m_tileSize = 0;
+	UVec2 m_tileCounts = UVec2(0u);
+	U32 m_zSplitCount = 0;
 
 
 	U32 m_width;
 	U32 m_width;
 	U32 m_height;
 	U32 m_height;

+ 73 - 40
AnKi/Shaders/ClusterBinning.ankiprog

@@ -7,6 +7,7 @@ ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 3, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 3, 1u);
+ANKI_SPECIALIZATION_CONSTANT_UVEC2(RENDERING_SIZE, 4, UVec2(1u));
 
 
 #pragma anki start comp
 #pragma anki start comp
 
 
@@ -17,17 +18,17 @@ ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 3, 1u);
 const U32 WORKGROUP_SIZE = 64u;
 const U32 WORKGROUP_SIZE = 64u;
 layout(local_size_x = WORKGROUP_SIZE) in;
 layout(local_size_x = WORKGROUP_SIZE) in;
 
 
-layout(set = 0, binding = 0) uniform b_unis
+layout(set = 0, binding = 0, scalar) uniform b_unis
 {
 {
 	ClustererUniforms u_unis;
 	ClustererUniforms u_unis;
 };
 };
 
 
-layout(set = 0, binding = 1) writeonly buffer b_tiles
+layout(set = 0, binding = 1, scalar) writeonly buffer b_tiles
 {
 {
-	TileOrZSplit u_tilesAndZSplits[];
+	Cluster u_clusters[];
 };
 };
 
 
-layout(set = 0, binding = 2) readonly buffer b_plights
+layout(set = 0, binding = 2, scalar) readonly buffer b_plights
 {
 {
 	PointLight2 u_pointLights[];
 	PointLight2 u_pointLights[];
 };
 };
@@ -37,8 +38,10 @@ const U32 TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
 // DX Sample locations
 // DX Sample locations
 const U32 SAMPLE_COUNT = 4u;
 const U32 SAMPLE_COUNT = 4u;
 const I32 SAMPLE_OFFSET = 8;
 const I32 SAMPLE_OFFSET = 8;
-const IVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] =
-	IVec2[SAMPLE_COUNT](IVec2(-2, -6), IVec2(6, -2), IVec2(-6, 2), IVec2(2, 6));
+#define LOCATION(x, y) UVec2(IVec2(x, y) + SAMPLE_OFFSET)
+const UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] =
+	UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
+#undef LOCATION
 
 
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
 const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
 const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
@@ -47,6 +50,12 @@ shared U64 s_tileMasks[TILES_PER_WORKGROUP];
 // A mask for each Z split for a specific clusterer object
 // A mask for each Z split for a specific clusterer object
 shared U64 s_zSplitMasks[Z_SPLIT_COUNT];
 shared U64 s_zSplitMasks[Z_SPLIT_COUNT];
 
 
+Bool isPointLight()
+{
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	return clustererObjectIdx < u_unis.m_pointLightCount;
+}
+
 void main()
 void main()
 {
 {
 	const U32 tileIdx = gl_GlobalInvocationID.x / SAMPLE_COUNT;
 	const U32 tileIdx = gl_GlobalInvocationID.x / SAMPLE_COUNT;
@@ -62,9 +71,9 @@ void main()
 	const UVec2 tileXY = UVec2(tileIdx % TILE_COUNT_X, tileIdx / TILE_COUNT_X);
 	const UVec2 tileXY = UVec2(tileIdx % TILE_COUNT_X, tileIdx / TILE_COUNT_X);
 
 
 	// This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
 	// This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
-	const UVec2 pixel = tileXY * TILE_SIZE + UVec2(SAMPLE_LOCATIONS[sampleIdx] + SAMPLE_OFFSET);
+	const UVec2 pixel = tileXY * TILE_SIZE + SAMPLE_LOCATIONS[sampleIdx];
 
 
-	const Vec2 uv = Vec2(pixel) / Vec2(u_unis.m_renderingSize);
+	const Vec2 uv = Vec2(pixel) / Vec2(RENDERING_SIZE);
 	const Vec2 ndc = UV_TO_NDC(uv);
 	const Vec2 ndc = UV_TO_NDC(uv);
 
 
 	// Unproject the sample in view space
 	// Unproject the sample in view space
@@ -81,60 +90,84 @@ void main()
 	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
 	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
 		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
 		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
 	{
 	{
-		s_zSplitMasks[i];
+		s_zSplitMasks[i] = 0u;
 	}
 	}
 	memoryBarrierShared();
 	memoryBarrierShared();
 	barrier();
 	barrier();
 
 
 	// Do collision
 	// Do collision
 	F32 t0, t1;
 	F32 t0, t1;
-	U64 mask;
-	if(clustererObjectIdx < u_unis.m_pointLightCount)
+	U32 objectArrayIdx;
+	Bool collides;
+	if(isPointLight())
 	{
 	{
-		const U32 lightIdx = clustererObjectIdx;
-		const PointLight2 light = u_pointLights[lightIdx];
-		const Bool collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
-		mask = (collides) ? (1u << U64(lightIdx)) : 0u;
-		atomicOr(s_tileMasks[localTileIdx], mask);
+		objectArrayIdx = clustererObjectIdx;
+		const PointLight2 light = u_pointLights[objectArrayIdx];
+		collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
 	}
 	}
 
 
-	// Compute the Z splits
-	const Vec3 hitpointA = rayDir * t0 + rayOrigin;
-	const Vec3 hitpointB = rayDir * t1 + rayOrigin;
-	const F32 distFromNearPlaneA = testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointA);
-	const F32 distFromNearPlaneB = testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointB);
-	const F32 minDistFromNearPlane = min(distFromNearPlaneA, distFromNearPlaneB);
-	const F32 maxDistFromNearPlane = max(distFromNearPlaneA, distFromNearPlaneB);
-	const U32 startZSplit = max(0u, U32(minDistFromNearPlane * u_unis.m_oneOverFrustumLength));
-	const U32 endZSplit = min(Z_SPLIT_COUNT - 1u, U32(maxDistFromNearPlane * u_unis.m_oneOverFrustumLength));
-	for(U32 i = startZSplit; i <= endZSplit; ++i)
+	// Update the masks
+	if(collides)
 	{
 	{
-		atomicOr(s_zSplitMasks[i], mask);
+		// Set the tile
+		const U64 mask = U64(1u) << U64(objectArrayIdx);
+		atomicOr(s_tileMasks[localTileIdx], mask);
+
+		// Compute and set the Z splits
+		const Vec3 hitpointA = rayDir * t0 + rayOrigin;
+		const Vec3 hitpointB = rayDir * t1 + rayOrigin;
+		const F32 distFromNearPlaneA =
+			testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointA);
+		const F32 distFromNearPlaneB =
+			testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointB);
+
+		F32 minDistFromNearPlane;
+		F32 maxDistFromNearPlane;
+		if(distFromNearPlaneA < distFromNearPlaneB)
+		{
+			minDistFromNearPlane = distFromNearPlaneA;
+			maxDistFromNearPlane = distFromNearPlaneB;
+		}
+		else
+		{
+			minDistFromNearPlane = distFromNearPlaneB;
+			maxDistFromNearPlane = distFromNearPlaneA;
+		}
+		minDistFromNearPlane = max(0.0, minDistFromNearPlane);
+		maxDistFromNearPlane = max(0.0, maxDistFromNearPlane);
+
+		const U32 startZSplit = U32(minDistFromNearPlane * u_unis.m_oneOverFrustumLength);
+		const U32 endZSplit = min(Z_SPLIT_COUNT - 1u, U32(maxDistFromNearPlane * u_unis.m_oneOverFrustumLength));
+		for(U32 i = startZSplit; i <= endZSplit; ++i)
+		{
+			atomicOr(s_zSplitMasks[i], mask);
+		}
 	}
 	}
 
 
 	// Sync
 	// Sync
 	memoryBarrierShared();
 	memoryBarrierShared();
 	barrier();
 	barrier();
 
 
-	// All invocations write a Z split
-	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
-		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
+	// First sample writes the tile
+	if(sampleIdx == 0u && s_tileMasks[localTileIdx] != 0u)
 	{
 	{
-		if(clustererObjectIdx < u_unis.m_pointLightCount)
+		if(isPointLight())
 		{
 		{
-			atomicOr(u_tilesAndZSplits[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
+			atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
 		}
 		}
 	}
 	}
 
 
-	// First sample writes the tile
-	if((gl_LocalInvocationIndex % SAMPLE_COUNT) != 0)
-	{
-		return;
-	}
-
-	if(clustererObjectIdx < u_unis.m_pointLightCount)
+	// All invocations write at least one Z split
+	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
+		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
 	{
 	{
-		atomicOr(u_tilesAndZSplits[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
+		if(s_zSplitMasks[i] != 0u)
+		{
+			if(isPointLight())
+			{
+				atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
+			}
+		}
 	}
 	}
 }
 }
 
 

+ 3 - 3
AnKi/Shaders/Include/ClustererTypes.h

@@ -182,7 +182,7 @@ const U32 _ANKI_SIZEOF_ClustererUniforms =
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClustererUniforms) == _ANKI_SIZEOF_ClustererUniforms);
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClustererUniforms) == _ANKI_SIZEOF_ClustererUniforms);
 
 
 /// Information that a tile or a Z-split will contain.
 /// Information that a tile or a Z-split will contain.
-struct TileOrZSplit
+struct Cluster
 {
 {
 	U64 m_pointLightsMask;
 	U64 m_pointLightsMask;
 	U64 m_spotLightsMask;
 	U64 m_spotLightsMask;
@@ -192,7 +192,7 @@ struct TileOrZSplit
 	U32 m_giProbesMask;
 	U32 m_giProbesMask;
 	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
 	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
 };
 };
-const U32 _ANKI_SIZEOF_TileOrZSplit = 5u * ANKI_SIZEOF(U64);
-ANKI_SHADER_STATIC_ASSERT(sizeof(TileOrZSplit) == _ANKI_SIZEOF_TileOrZSplit);
+const U32 _ANKI_SIZEOF_Cluster = 5u * ANKI_SIZEOF(U64);
+ANKI_SHADER_STATIC_ASSERT(sizeof(Cluster) == _ANKI_SIZEOF_Cluster);
 
 
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE