瀏覽代碼

Moving ClusterBinning shader to HLSL

Panagiotis Christopoulos Charitos 3 年之前
父節點
當前提交
88f33be96f

+ 6 - 6
AnKi/Renderer/ClusterBinning.cpp

@@ -78,12 +78,12 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 			cmdb->bindShaderProgram(m_grProg);
 			bindUniforms(cmdb, 0, 0, tokens.m_clusteredShadingUniformsToken);
 			bindStorage(cmdb, 0, 1, tokens.m_clustersToken);
-			bindUniforms(cmdb, 0, 2, tokens.m_pointLightsToken);
-			bindUniforms(cmdb, 0, 3, tokens.m_spotLightsToken);
-			bindUniforms(cmdb, 0, 4, tokens.m_reflectionProbesToken);
-			bindUniforms(cmdb, 0, 5, tokens.m_globalIlluminationProbesToken);
-			bindUniforms(cmdb, 0, 6, tokens.m_fogDensityVolumesToken);
-			bindUniforms(cmdb, 0, 7, tokens.m_decalsToken);
+			bindStorage(cmdb, 0, 2, tokens.m_pointLightsToken);
+			bindStorage(cmdb, 0, 3, tokens.m_spotLightsToken);
+			bindStorage(cmdb, 0, 4, tokens.m_reflectionProbesToken);
+			bindStorage(cmdb, 0, 5, tokens.m_globalIlluminationProbesToken);
+			bindStorage(cmdb, 0, 6, tokens.m_fogDensityVolumesToken);
+			bindStorage(cmdb, 0, 7, tokens.m_decalsToken);
 
 			const U32 sampleCount = 4;
 			const U32 sizex = m_tileCount * sampleCount;

+ 1 - 1
AnKi/Renderer/ConfigVars.defs.h

@@ -8,7 +8,7 @@ ANKI_CONFIG_VAR_GROUP(R)
 ANKI_CONFIG_VAR_U8(RTextureAnisotropy, ((ANKI_PLATFORM_MOBILE) ? 1 : 8), 1, 16,
 				   "Texture anisotropy for the main passes")
 ANKI_CONFIG_VAR_U32(RTileSize, 64, 8, 256, "Tile lighting tile size")
-ANKI_CONFIG_VAR_U32(RZSplitCount, 64, 8, 1024, "Clusterer number of Z splits")
+ANKI_CONFIG_VAR_U32(RZSplitCount, 64, 8, kMaxZsplitCount, "Clusterer number of Z splits")
 ANKI_CONFIG_VAR_BOOL(RPreferCompute, !ANKI_PLATFORM_MOBILE, "Prefer compute shaders")
 ANKI_CONFIG_VAR_BOOL(RHighQualityHdr, !ANKI_PLATFORM_MOBILE,
 					 "If true use R16G16B16 for HDR images. Alternatively use B10G11R11")

+ 1 - 0
AnKi/ShaderCompiler/RadeonGpuAnalyzer.cpp

@@ -92,6 +92,7 @@ Error runRadeonGpuAnalyzer(CString rgaExecutable, ConstWeakArray<U8> spirv, Shad
 	ANKI_CHECK(analysisFile.open(outFilename, FileOpenFlag::kRead));
 	StringRaii analysisText(&tmpPool);
 	ANKI_CHECK(analysisFile.readAllText(analysisText));
+	analysisText.replaceAll("\r", "");
 
 	// Parse the text
 	StringListRaii lines(&tmpPool);

+ 96 - 130
AnKi/Shaders/ClusterBinning.ankiprog

@@ -3,11 +3,12 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#pragma anki hlsl
+
 #pragma anki start comp
 
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
-#include <AnKi/Shaders/Common.glsl>
-#include <AnKi/Shaders/CollisionFunctions.glsl>
+#include <AnKi/Shaders/CollisionFunctions.hlsl>
 
 ANKI_SPECIALIZATION_CONSTANT_U32(kTileSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kTileCountX, 1u);
@@ -15,151 +16,118 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kTileCountY, 2u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 3u);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kRenderingSize, 4u);
 
-const U32 kWorkgroupSize = 64u;
-layout(local_size_x = kWorkgroupSize) in;
-
-layout(set = 0, binding = 0) uniform b_unis
-{
-	ClusteredShadingUniforms u_unis;
-};
-
-layout(set = 0, binding = 1) writeonly buffer b_clusters
-{
-	Cluster u_clusters[];
-};
-
-layout(set = 0, binding = 2) uniform b_pointLights
-{
-	PointLight u_pointLights[kMaxVisiblePointLights];
-};
-
-layout(set = 0, binding = 3) uniform b_spotLights
-{
-	SpotLightBinning u_spotLights[kMaxVisibleSpotLights];
-};
-
-layout(set = 0, binding = 4) uniform b_reflectionProbes
-{
-	ReflectionProbe u_reflectionProbes[kMaxVisibleReflectionProbes];
-};
+[[vk::binding(0)]] ConstantBuffer<ClusteredShadingUniforms> g_unis;
+[[vk::binding(1)]] RWStructuredBuffer<Cluster> g_clusters;
+[[vk::binding(2)]] StructuredBuffer<PointLight> g_pointLights;
+[[vk::binding(3)]] StructuredBuffer<SpotLightBinning> g_spotLights;
+[[vk::binding(4)]] StructuredBuffer<ReflectionProbe> g_reflectionProbes;
+[[vk::binding(5)]] StructuredBuffer<GlobalIlluminationProbe> g_giProbes;
+[[vk::binding(6)]] StructuredBuffer<FogDensityVolume> g_fogVolumes;
+[[vk::binding(7)]] StructuredBuffer<Decal> g_decals;
 
-layout(set = 0, binding = 5) uniform b_giProbes
-{
-	GlobalIlluminationProbe u_giProbes[kMaxVisibleGlobalIlluminationProbes];
-};
-
-layout(set = 0, binding = 6) uniform b_fogVolumes
-{
-	FogDensityVolume u_fogVolumes[kMaxVisibleFogDensityVolumes];
-};
-
-layout(set = 0, binding = 7) uniform b_decals
-{
-	Decal u_decals[kMaxVisibleDecals];
-};
-
-const U32 kTileCount = kTileCountX * kTileCountY;
+#define THREADGROUP_SIZE 64
+constexpr U32 kTileCount = kTileCountX * kTileCountY;
 
 // DX Sample locations
-const U32 kSampleCount = 4u;
+constexpr U32 kSampleCount = 4u;
 #define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(kTileSize))
-UVec2 SAMPLE_LOCATIONS[kSampleCount] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
+constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6)};
 #undef LOCATION
 
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
-const U32 kTilesPerWorkgroup = kWorkgroupSize / kSampleCount;
-shared ExtendedClusterObjectMask s_tileMasks[kTilesPerWorkgroup];
+constexpr U32 kTilesPerThreadgroup = THREADGROUP_SIZE / kSampleCount;
+groupshared ExtendedClusterObjectMask s_tileMasks[kTilesPerThreadgroup];
 
 // A mask for each Z split for a specific clusterer object
-shared ExtendedClusterObjectMask s_zSplitMasks[kZSplitCount];
+groupshared ExtendedClusterObjectMask s_zSplitMasks[kMaxZsplitCount];
 
-Bool isPointLight()
+Bool isPointLight(U32 objectIdx)
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypePointLight].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypePointLight].x;
 }
 
-Bool isSpotLight()
+Bool isSpotLight(U32 objectIdx)
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeSpotLight].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeSpotLight].x;
 }
 
-Bool isDecal()
+Bool isDecal(U32 objectIdx)
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeDecal].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeDecal].x;
 }
 
-Bool isFogVolume()
+Bool isFogVolume(U32 objectIdx)
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeFogDensityVolume].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeFogDensityVolume].x;
 }
 
-Bool isReflectionProbe()
+Bool isReflectionProbe(U32 objectIdx)
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeReflectionProbe].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeReflectionProbe].x;
 }
 
-Bool isGiProbe()
+Bool isGiProbe(U32 objectIdx)
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeGlobalIlluminationProbe].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeGlobalIlluminationProbe].x;
 }
 
-void main()
+ANKI_NUMTHREADS(THREADGROUP_SIZE, 1, 1)
+void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID, U32 svGroupIdx : SV_GROUPINDEX)
 {
-	const U32 globalInvocationIDX = min(gl_GlobalInvocationID.x, kTileCount * kSampleCount);
-	const U32 tileIdx = globalInvocationIDX / kSampleCount;
-	const U32 sampleIdx = globalInvocationIDX % kSampleCount;
-	const U32 localTileIdx = gl_LocalInvocationIndex / kSampleCount;
-	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	const U32 dispatchThreadIdX = min(svDispatchThreadId.x, kTileCount * kSampleCount);
+	const U32 tileIdx = dispatchThreadIdX / kSampleCount;
+	const U32 sampleIdx = dispatchThreadIdX % kSampleCount;
+	const U32 localTileIdx = svGroupIdx / kSampleCount;
+	const U32 clustererObjectIdx = svDispatchThreadId.y;
 
 	const UVec2 tileXY = UVec2(tileIdx % kTileCountX, tileIdx / kTileCountX);
 
 	// This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
-	const UVec2 pixel = tileXY * kTileSize + SAMPLE_LOCATIONS[sampleIdx];
+	const UVec2 pixel = tileXY * kTileSize + kSampleLocations[sampleIdx];
 
 	const Vec2 uv = Vec2(pixel) / Vec2(kRenderingSize);
-	const Vec2 ndc = UV_TO_NDC(uv);
+	const Vec2 ndc = uvToNdc(uv);
 
 	// Unproject the sample in view space
-	const Vec4 farWorldPos4 = u_unis.m_matrices.m_invertedViewProjection * Vec4(ndc, 1.0, 1.0);
+	const Vec4 farWorldPos4 = mul(g_unis.m_matrices.m_invertedViewProjection, Vec4(ndc, 1.0, 1.0));
 	const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
 
 	// Create the ray that will test the clusterer objects
-	const Vec3 rayOrigin = u_unis.m_cameraPosition;
+	const Vec3 rayOrigin = g_unis.m_cameraPosition;
 	const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
 
 	// Zero shared memory
-	s_tileMasks[localTileIdx] = ExtendedClusterObjectMask(0);
-	const U32 splitsPerInvocation = max(1u, kZSplitCount / kWorkgroupSize);
-	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
-		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < kZSplitCount; ++i)
+	s_tileMasks[localTileIdx] = 0;
+	const U32 splitsPerInvocation = max(1u, kZSplitCount / THREADGROUP_SIZE);
+	for(U32 i = svGroupIdx * splitsPerInvocation; i < (svGroupIdx + 1u) * splitsPerInvocation && i < kZSplitCount; ++i)
 	{
-		s_zSplitMasks[i] = ExtendedClusterObjectMask(0);
+		s_zSplitMasks[i] = 0;
 	}
-	memoryBarrierShared();
-	barrier();
+
+	GroupMemoryBarrierWithGroupSync();
 
 	// Do collision
 	F32 t0, t1;
 	U32 objectArrayIdx;
 	Bool collides;
 	// Point light
-	if(isPointLight())
+	if(isPointLight(clustererObjectIdx))
 	{
 		objectArrayIdx = clustererObjectIdx;
-		const PointLight light = u_pointLights[objectArrayIdx];
+		const PointLight light = g_pointLights[objectArrayIdx];
 		collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
 	}
 	// Spot light
-	else if(isSpotLight())
+	else if(isSpotLight(clustererObjectIdx))
 	{
-		objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeSpotLight - 1u].x;
-		const SpotLightBinning light = u_spotLights[objectArrayIdx];
+		objectArrayIdx = clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeSpotLight - 1u].x;
+		const SpotLightBinning light = g_spotLights[objectArrayIdx];
 
 		t0 = 10000.0;
 		t1 = -10000.0;
 
 		// Iterate all triangles
-		const U32 indices[6u * 3u] = U32[](0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u);
+		const U32 indices[6u * 3u] = {0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u};
 		U32 hits = 0u;
 		U32 idx = 0u;
 		do
@@ -188,18 +156,18 @@ void main()
 		collides = (hits != 0u);
 	}
 	// Decal
-	else if(isDecal())
+	else if(isDecal(clustererObjectIdx))
 	{
-		objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeDecal - 1u].x;
-		const Decal decal = u_decals[objectArrayIdx];
+		objectArrayIdx = clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeDecal - 1u].x;
+		const Decal decal = g_decals[objectArrayIdx];
 
 		collides = testRayObb(rayOrigin, rayDir, decal.m_obbExtend, decal.m_invertedTransform, t0, t1);
 	}
 	// Fog volume
-	else if(isFogVolume())
+	else if(isFogVolume(clustererObjectIdx))
 	{
-		objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeFogDensityVolume - 1u].x;
-		const FogDensityVolume vol = u_fogVolumes[objectArrayIdx];
+		objectArrayIdx = clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeFogDensityVolume - 1u].x;
+		const FogDensityVolume vol = g_fogVolumes[objectArrayIdx];
 
 		if(vol.m_isBox != 0u)
 		{
@@ -213,10 +181,10 @@ void main()
 		}
 	}
 	// Reflection probe
-	else if(isReflectionProbe())
+	else if(isReflectionProbe(clustererObjectIdx))
 	{
-		objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeReflectionProbe - 1u].x;
-		const ReflectionProbe probe = u_reflectionProbes[objectArrayIdx];
+		objectArrayIdx = clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeReflectionProbe - 1u].x;
+		const ReflectionProbe probe = g_reflectionProbes[objectArrayIdx];
 
 		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
 	}
@@ -224,8 +192,8 @@ void main()
 	else
 	{
 		objectArrayIdx =
-			clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeGlobalIlluminationProbe - 1u].x;
-		const GlobalIlluminationProbe probe = u_giProbes[objectArrayIdx];
+			clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeGlobalIlluminationProbe - 1u].x;
+		const GlobalIlluminationProbe probe = g_giProbes[objectArrayIdx];
 
 		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
 	}
@@ -236,15 +204,15 @@ void main()
 		// Set the tile
 		const ExtendedClusterObjectMask mask = ExtendedClusterObjectMask(1)
 											   << ExtendedClusterObjectMask(objectArrayIdx);
-		atomicOr(s_tileMasks[localTileIdx], mask);
+		InterlockedOr(s_tileMasks[localTileIdx], mask);
 
 		// Compute and set the Z splits
 		const Vec3 hitpointA = rayDir * t0 + rayOrigin;
 		const Vec3 hitpointB = rayDir * t1 + rayOrigin;
 		const F32 distFromNearPlaneA =
-			testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointA);
+			testPlanePoint(g_unis.m_nearPlaneWSpace.xyz, g_unis.m_nearPlaneWSpace.w, hitpointA);
 		const F32 distFromNearPlaneB =
-			testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointB);
+			testPlanePoint(g_unis.m_nearPlaneWSpace.xyz, g_unis.m_nearPlaneWSpace.w, hitpointB);
 
 		F32 minDistFromNearPlane;
 		F32 maxDistFromNearPlane;
@@ -259,77 +227,75 @@ void main()
 			maxDistFromNearPlane = distFromNearPlaneA;
 		}
 
-		const I32 startZSplit = max(I32(minDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0);
+		const I32 startZSplit = max(I32(minDistFromNearPlane * g_unis.m_zSplitCountOverFrustumLength), 0);
 		const I32 endZSplit =
-			clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(kZSplitCount) - 1);
+			clamp(I32(maxDistFromNearPlane * g_unis.m_zSplitCountOverFrustumLength), 0, I32(kZSplitCount) - 1);
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		{
-			atomicOr(s_zSplitMasks[i], mask);
+			InterlockedOr(s_zSplitMasks[i], mask);
 		}
 	}
 
 	// Sync
-	memoryBarrierShared();
-	barrier();
+	GroupMemoryBarrierWithGroupSync();
 
 	// First sample writes the tile
-	if(sampleIdx == 0u && s_tileMasks[localTileIdx] != ExtendedClusterObjectMask(0))
+	if(sampleIdx == 0u && s_tileMasks[localTileIdx] != 0)
 	{
-		if(isPointLight())
+		if(isPointLight(clustererObjectIdx))
 		{
-			atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
+			InterlockedOr(g_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
 		}
-		else if(isSpotLight())
+		else if(isSpotLight(clustererObjectIdx))
 		{
-			atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
+			InterlockedOr(g_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
 		}
-		else if(isDecal())
+		else if(isDecal(clustererObjectIdx))
 		{
-			atomicOr(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
+			InterlockedOr(g_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
 		}
-		else if(isFogVolume())
+		else if(isFogVolume(clustererObjectIdx))
 		{
-			atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
+			InterlockedOr(g_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
 		}
-		else if(isReflectionProbe())
+		else if(isReflectionProbe(clustererObjectIdx))
 		{
-			atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
+			InterlockedOr(g_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
 		}
 		else
 		{
-			atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
+			InterlockedOr(g_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
 		}
 	}
 
 	// All invocations write at least one Z split
-	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
-		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < kZSplitCount; ++i)
+	for(U32 i = svGroupIdx * splitsPerInvocation; i < (svGroupIdx + 1u) * splitsPerInvocation && i < kZSplitCount; ++i)
 	{
-		if(s_zSplitMasks[i] != ExtendedClusterObjectMask(0))
+		if(s_zSplitMasks[i] != 0)
 		{
-			if(isPointLight())
+			if(isPointLight(clustererObjectIdx))
 			{
-				atomicOr(u_clusters[kTileCount + i].m_pointLightsMask, s_zSplitMasks[i]);
+				InterlockedOr(g_clusters[kTileCount + i].m_pointLightsMask, s_zSplitMasks[i]);
 			}
-			else if(isSpotLight())
+			else if(isSpotLight(clustererObjectIdx))
 			{
-				atomicOr(u_clusters[kTileCount + i].m_spotLightsMask, s_zSplitMasks[i]);
+				InterlockedOr(g_clusters[kTileCount + i].m_spotLightsMask, s_zSplitMasks[i]);
 			}
-			else if(isDecal())
+			else if(isDecal(clustererObjectIdx))
 			{
-				atomicOr(u_clusters[kTileCount + i].m_decalsMask, s_zSplitMasks[i]);
+				InterlockedOr(g_clusters[kTileCount + i].m_decalsMask, s_zSplitMasks[i]);
 			}
-			else if(isFogVolume())
+			else if(isFogVolume(clustererObjectIdx))
 			{
-				atomicOr(u_clusters[kTileCount + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
+				InterlockedOr(g_clusters[kTileCount + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
 			}
-			else if(isReflectionProbe())
+			else if(isReflectionProbe(clustererObjectIdx))
 			{
-				atomicOr(u_clusters[kTileCount + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
+				InterlockedOr(g_clusters[kTileCount + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
 			}
 			else
 			{
-				atomicOr(u_clusters[kTileCount + i].m_giProbesMask, U32(s_zSplitMasks[i]));
+				InterlockedOr(g_clusters[kTileCount + i].m_giProbesMask, U32(s_zSplitMasks[i]));
 			}
 		}
 	}

+ 2 - 0
AnKi/Shaders/CollisionFunctions.hlsl

@@ -5,6 +5,8 @@
 
 #pragma once
 
+#include <AnKi/Shaders/Common.hlsl>
+
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection
 Bool testRayTriangle(Vec3 rayOrigin, Vec3 rayDir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfaceCulling, out F32 t, out F32 u,
 					 out F32 v)

+ 1 - 0
AnKi/Shaders/Include/ClusteredShadingTypes.h

@@ -37,6 +37,7 @@ constexpr U32 kMaxVisibleGlobalIlluminationProbes = 8u;
 // Other consts
 constexpr RF32 kClusterObjectFrustumNearPlane = 0.1f / 4.0f; ///< Near plane of all clusterer object frustums.
 constexpr RF32 kSubsurfaceMin = 0.01f;
+constexpr U32 kMaxZsplitCount = 128u;
 
 /// Point light.
 struct PointLight

+ 2 - 1
AnKi/Shaders/Include/Common.h

@@ -74,7 +74,8 @@ void maybeUnused(T a)
 		[[vk::binding(0, s)]] Texture2DArray<RVec4> g_bindlessTextures2dArrayF32[kMaxBindlessTextures]; \
 		[[vk::binding(1, s)]] Buffer<float4> g_bindlessTextureBuffersF32[kMaxBindlessReadonlyTextureBuffers];
 
-#	define _ANKI_SCONST_X(type, n, id) [[vk::constant_id(id)]] const type n = (type)1;
+#	define _ANKI_SCONST_X(type, n, id) \
+		[[vk::constant_id(id)]] const type n = (type)1;
 
 #	define _ANKI_SCONST_X2(type, componentType, n, id) \
 		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = (componentType)1; \