Browse Source

Moving ClusterBinning shader to HLSL

Panagiotis Christopoulos Charitos 3 years ago
parent
commit
88f33be96f

+ 6 - 6
AnKi/Renderer/ClusterBinning.cpp

@@ -78,12 +78,12 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 			cmdb->bindShaderProgram(m_grProg);
 			cmdb->bindShaderProgram(m_grProg);
 			bindUniforms(cmdb, 0, 0, tokens.m_clusteredShadingUniformsToken);
 			bindUniforms(cmdb, 0, 0, tokens.m_clusteredShadingUniformsToken);
 			bindStorage(cmdb, 0, 1, tokens.m_clustersToken);
 			bindStorage(cmdb, 0, 1, tokens.m_clustersToken);
-			bindUniforms(cmdb, 0, 2, tokens.m_pointLightsToken);
-			bindUniforms(cmdb, 0, 3, tokens.m_spotLightsToken);
-			bindUniforms(cmdb, 0, 4, tokens.m_reflectionProbesToken);
-			bindUniforms(cmdb, 0, 5, tokens.m_globalIlluminationProbesToken);
-			bindUniforms(cmdb, 0, 6, tokens.m_fogDensityVolumesToken);
-			bindUniforms(cmdb, 0, 7, tokens.m_decalsToken);
+			bindStorage(cmdb, 0, 2, tokens.m_pointLightsToken);
+			bindStorage(cmdb, 0, 3, tokens.m_spotLightsToken);
+			bindStorage(cmdb, 0, 4, tokens.m_reflectionProbesToken);
+			bindStorage(cmdb, 0, 5, tokens.m_globalIlluminationProbesToken);
+			bindStorage(cmdb, 0, 6, tokens.m_fogDensityVolumesToken);
+			bindStorage(cmdb, 0, 7, tokens.m_decalsToken);
 
 
 			const U32 sampleCount = 4;
 			const U32 sampleCount = 4;
 			const U32 sizex = m_tileCount * sampleCount;
 			const U32 sizex = m_tileCount * sampleCount;

+ 1 - 1
AnKi/Renderer/ConfigVars.defs.h

@@ -8,7 +8,7 @@ ANKI_CONFIG_VAR_GROUP(R)
 ANKI_CONFIG_VAR_U8(RTextureAnisotropy, ((ANKI_PLATFORM_MOBILE) ? 1 : 8), 1, 16,
 ANKI_CONFIG_VAR_U8(RTextureAnisotropy, ((ANKI_PLATFORM_MOBILE) ? 1 : 8), 1, 16,
 				   "Texture anisotropy for the main passes")
 				   "Texture anisotropy for the main passes")
 ANKI_CONFIG_VAR_U32(RTileSize, 64, 8, 256, "Tile lighting tile size")
 ANKI_CONFIG_VAR_U32(RTileSize, 64, 8, 256, "Tile lighting tile size")
-ANKI_CONFIG_VAR_U32(RZSplitCount, 64, 8, 1024, "Clusterer number of Z splits")
+ANKI_CONFIG_VAR_U32(RZSplitCount, 64, 8, kMaxZsplitCount, "Clusterer number of Z splits")
 ANKI_CONFIG_VAR_BOOL(RPreferCompute, !ANKI_PLATFORM_MOBILE, "Prefer compute shaders")
 ANKI_CONFIG_VAR_BOOL(RPreferCompute, !ANKI_PLATFORM_MOBILE, "Prefer compute shaders")
 ANKI_CONFIG_VAR_BOOL(RHighQualityHdr, !ANKI_PLATFORM_MOBILE,
 ANKI_CONFIG_VAR_BOOL(RHighQualityHdr, !ANKI_PLATFORM_MOBILE,
 					 "If true use R16G16B16 for HDR images. Alternatively use B10G11R11")
 					 "If true use R16G16B16 for HDR images. Alternatively use B10G11R11")

+ 1 - 0
AnKi/ShaderCompiler/RadeonGpuAnalyzer.cpp

@@ -92,6 +92,7 @@ Error runRadeonGpuAnalyzer(CString rgaExecutable, ConstWeakArray<U8> spirv, Shad
 	ANKI_CHECK(analysisFile.open(outFilename, FileOpenFlag::kRead));
 	ANKI_CHECK(analysisFile.open(outFilename, FileOpenFlag::kRead));
 	StringRaii analysisText(&tmpPool);
 	StringRaii analysisText(&tmpPool);
 	ANKI_CHECK(analysisFile.readAllText(analysisText));
 	ANKI_CHECK(analysisFile.readAllText(analysisText));
+	analysisText.replaceAll("\r", "");
 
 
 	// Parse the text
 	// Parse the text
 	StringListRaii lines(&tmpPool);
 	StringListRaii lines(&tmpPool);

+ 96 - 130
AnKi/Shaders/ClusterBinning.ankiprog

@@ -3,11 +3,12 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
+#pragma anki hlsl
+
 #pragma anki start comp
 #pragma anki start comp
 
 
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
-#include <AnKi/Shaders/Common.glsl>
-#include <AnKi/Shaders/CollisionFunctions.glsl>
+#include <AnKi/Shaders/CollisionFunctions.hlsl>
 
 
 ANKI_SPECIALIZATION_CONSTANT_U32(kTileSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kTileSize, 0u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kTileCountX, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kTileCountX, 1u);
@@ -15,151 +16,118 @@ ANKI_SPECIALIZATION_CONSTANT_U32(kTileCountY, 2u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 3u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 3u);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kRenderingSize, 4u);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(kRenderingSize, 4u);
 
 
-const U32 kWorkgroupSize = 64u;
-layout(local_size_x = kWorkgroupSize) in;
-
-layout(set = 0, binding = 0) uniform b_unis
-{
-	ClusteredShadingUniforms u_unis;
-};
-
-layout(set = 0, binding = 1) writeonly buffer b_clusters
-{
-	Cluster u_clusters[];
-};
-
-layout(set = 0, binding = 2) uniform b_pointLights
-{
-	PointLight u_pointLights[kMaxVisiblePointLights];
-};
-
-layout(set = 0, binding = 3) uniform b_spotLights
-{
-	SpotLightBinning u_spotLights[kMaxVisibleSpotLights];
-};
-
-layout(set = 0, binding = 4) uniform b_reflectionProbes
-{
-	ReflectionProbe u_reflectionProbes[kMaxVisibleReflectionProbes];
-};
+[[vk::binding(0)]] ConstantBuffer<ClusteredShadingUniforms> g_unis;
+[[vk::binding(1)]] RWStructuredBuffer<Cluster> g_clusters;
+[[vk::binding(2)]] StructuredBuffer<PointLight> g_pointLights;
+[[vk::binding(3)]] StructuredBuffer<SpotLightBinning> g_spotLights;
+[[vk::binding(4)]] StructuredBuffer<ReflectionProbe> g_reflectionProbes;
+[[vk::binding(5)]] StructuredBuffer<GlobalIlluminationProbe> g_giProbes;
+[[vk::binding(6)]] StructuredBuffer<FogDensityVolume> g_fogVolumes;
+[[vk::binding(7)]] StructuredBuffer<Decal> g_decals;
 
 
-layout(set = 0, binding = 5) uniform b_giProbes
-{
-	GlobalIlluminationProbe u_giProbes[kMaxVisibleGlobalIlluminationProbes];
-};
-
-layout(set = 0, binding = 6) uniform b_fogVolumes
-{
-	FogDensityVolume u_fogVolumes[kMaxVisibleFogDensityVolumes];
-};
-
-layout(set = 0, binding = 7) uniform b_decals
-{
-	Decal u_decals[kMaxVisibleDecals];
-};
-
-const U32 kTileCount = kTileCountX * kTileCountY;
+#define THREADGROUP_SIZE 64
+constexpr U32 kTileCount = kTileCountX * kTileCountY;
 
 
 // DX Sample locations
 // DX Sample locations
-const U32 kSampleCount = 4u;
+constexpr U32 kSampleCount = 4u;
 #define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(kTileSize))
 #define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(kTileSize))
-UVec2 SAMPLE_LOCATIONS[kSampleCount] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6));
+constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(-2, -6), LOCATION(6, -2), LOCATION(-6, 2), LOCATION(2, 6)};
 #undef LOCATION
 #undef LOCATION
 
 
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
-const U32 kTilesPerWorkgroup = kWorkgroupSize / kSampleCount;
-shared ExtendedClusterObjectMask s_tileMasks[kTilesPerWorkgroup];
+constexpr U32 kTilesPerThreadgroup = THREADGROUP_SIZE / kSampleCount;
+groupshared ExtendedClusterObjectMask s_tileMasks[kTilesPerThreadgroup];
 
 
 // A mask for each Z split for a specific clusterer object
 // A mask for each Z split for a specific clusterer object
-shared ExtendedClusterObjectMask s_zSplitMasks[kZSplitCount];
+groupshared ExtendedClusterObjectMask s_zSplitMasks[kMaxZsplitCount];
 
 
-Bool isPointLight()
+Bool isPointLight(U32 objectIdx)
 {
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypePointLight].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypePointLight].x;
 }
 }
 
 
-Bool isSpotLight()
+Bool isSpotLight(U32 objectIdx)
 {
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeSpotLight].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeSpotLight].x;
 }
 }
 
 
-Bool isDecal()
+Bool isDecal(U32 objectIdx)
 {
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeDecal].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeDecal].x;
 }
 }
 
 
-Bool isFogVolume()
+Bool isFogVolume(U32 objectIdx)
 {
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeFogDensityVolume].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeFogDensityVolume].x;
 }
 }
 
 
-Bool isReflectionProbe()
+Bool isReflectionProbe(U32 objectIdx)
 {
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeReflectionProbe].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeReflectionProbe].x;
 }
 }
 
 
-Bool isGiProbe()
+Bool isGiProbe(U32 objectIdx)
 {
 {
-	return gl_GlobalInvocationID.y < u_unis.m_objectCountsUpTo[kClusterObjectTypeGlobalIlluminationProbe].x;
+	return objectIdx < g_unis.m_objectCountsUpTo[kClusterObjectTypeGlobalIlluminationProbe].x;
 }
 }
 
 
-void main()
+ANKI_NUMTHREADS(THREADGROUP_SIZE, 1, 1)
+void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID, U32 svGroupIdx : SV_GROUPINDEX)
 {
 {
-	const U32 globalInvocationIDX = min(gl_GlobalInvocationID.x, kTileCount * kSampleCount);
-	const U32 tileIdx = globalInvocationIDX / kSampleCount;
-	const U32 sampleIdx = globalInvocationIDX % kSampleCount;
-	const U32 localTileIdx = gl_LocalInvocationIndex / kSampleCount;
-	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	const U32 dispatchThreadIdX = min(svDispatchThreadId.x, kTileCount * kSampleCount);
+	const U32 tileIdx = dispatchThreadIdX / kSampleCount;
+	const U32 sampleIdx = dispatchThreadIdX % kSampleCount;
+	const U32 localTileIdx = svGroupIdx / kSampleCount;
+	const U32 clustererObjectIdx = svDispatchThreadId.y;
 
 
 	const UVec2 tileXY = UVec2(tileIdx % kTileCountX, tileIdx / kTileCountX);
 	const UVec2 tileXY = UVec2(tileIdx % kTileCountX, tileIdx / kTileCountX);
 
 
 	// This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
 	// This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
-	const UVec2 pixel = tileXY * kTileSize + SAMPLE_LOCATIONS[sampleIdx];
+	const UVec2 pixel = tileXY * kTileSize + kSampleLocations[sampleIdx];
 
 
 	const Vec2 uv = Vec2(pixel) / Vec2(kRenderingSize);
 	const Vec2 uv = Vec2(pixel) / Vec2(kRenderingSize);
-	const Vec2 ndc = UV_TO_NDC(uv);
+	const Vec2 ndc = uvToNdc(uv);
 
 
 	// Unproject the sample in view space
 	// Unproject the sample in view space
-	const Vec4 farWorldPos4 = u_unis.m_matrices.m_invertedViewProjection * Vec4(ndc, 1.0, 1.0);
+	const Vec4 farWorldPos4 = mul(g_unis.m_matrices.m_invertedViewProjection, Vec4(ndc, 1.0, 1.0));
 	const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
 	const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
 
 
 	// Create the ray that will test the clusterer objects
 	// Create the ray that will test the clusterer objects
-	const Vec3 rayOrigin = u_unis.m_cameraPosition;
+	const Vec3 rayOrigin = g_unis.m_cameraPosition;
 	const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
 	const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
 
 
 	// Zero shared memory
 	// Zero shared memory
-	s_tileMasks[localTileIdx] = ExtendedClusterObjectMask(0);
-	const U32 splitsPerInvocation = max(1u, kZSplitCount / kWorkgroupSize);
-	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
-		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < kZSplitCount; ++i)
+	s_tileMasks[localTileIdx] = 0;
+	const U32 splitsPerInvocation = max(1u, kZSplitCount / THREADGROUP_SIZE);
+	for(U32 i = svGroupIdx * splitsPerInvocation; i < (svGroupIdx + 1u) * splitsPerInvocation && i < kZSplitCount; ++i)
 	{
 	{
-		s_zSplitMasks[i] = ExtendedClusterObjectMask(0);
+		s_zSplitMasks[i] = 0;
 	}
 	}
-	memoryBarrierShared();
-	barrier();
+
+	GroupMemoryBarrierWithGroupSync();
 
 
 	// Do collision
 	// Do collision
 	F32 t0, t1;
 	F32 t0, t1;
 	U32 objectArrayIdx;
 	U32 objectArrayIdx;
 	Bool collides;
 	Bool collides;
 	// Point light
 	// Point light
-	if(isPointLight())
+	if(isPointLight(clustererObjectIdx))
 	{
 	{
 		objectArrayIdx = clustererObjectIdx;
 		objectArrayIdx = clustererObjectIdx;
-		const PointLight light = u_pointLights[objectArrayIdx];
+		const PointLight light = g_pointLights[objectArrayIdx];
 		collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
 		collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
 	}
 	}
 	// Spot light
 	// Spot light
-	else if(isSpotLight())
+	else if(isSpotLight(clustererObjectIdx))
 	{
 	{
-		objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeSpotLight - 1u].x;
-		const SpotLightBinning light = u_spotLights[objectArrayIdx];
+		objectArrayIdx = clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeSpotLight - 1u].x;
+		const SpotLightBinning light = g_spotLights[objectArrayIdx];
 
 
 		t0 = 10000.0;
 		t0 = 10000.0;
 		t1 = -10000.0;
 		t1 = -10000.0;
 
 
 		// Iterate all triangles
 		// Iterate all triangles
-		const U32 indices[6u * 3u] = U32[](0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u);
+		const U32 indices[6u * 3u] = {0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u};
 		U32 hits = 0u;
 		U32 hits = 0u;
 		U32 idx = 0u;
 		U32 idx = 0u;
 		do
 		do
@@ -188,18 +156,18 @@ void main()
 		collides = (hits != 0u);
 		collides = (hits != 0u);
 	}
 	}
 	// Decal
 	// Decal
-	else if(isDecal())
+	else if(isDecal(clustererObjectIdx))
 	{
 	{
-		objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeDecal - 1u].x;
-		const Decal decal = u_decals[objectArrayIdx];
+		objectArrayIdx = clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeDecal - 1u].x;
+		const Decal decal = g_decals[objectArrayIdx];
 
 
 		collides = testRayObb(rayOrigin, rayDir, decal.m_obbExtend, decal.m_invertedTransform, t0, t1);
 		collides = testRayObb(rayOrigin, rayDir, decal.m_obbExtend, decal.m_invertedTransform, t0, t1);
 	}
 	}
 	// Fog volume
 	// Fog volume
-	else if(isFogVolume())
+	else if(isFogVolume(clustererObjectIdx))
 	{
 	{
-		objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeFogDensityVolume - 1u].x;
-		const FogDensityVolume vol = u_fogVolumes[objectArrayIdx];
+		objectArrayIdx = clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeFogDensityVolume - 1u].x;
+		const FogDensityVolume vol = g_fogVolumes[objectArrayIdx];
 
 
 		if(vol.m_isBox != 0u)
 		if(vol.m_isBox != 0u)
 		{
 		{
@@ -213,10 +181,10 @@ void main()
 		}
 		}
 	}
 	}
 	// Reflection probe
 	// Reflection probe
-	else if(isReflectionProbe())
+	else if(isReflectionProbe(clustererObjectIdx))
 	{
 	{
-		objectArrayIdx = clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeReflectionProbe - 1u].x;
-		const ReflectionProbe probe = u_reflectionProbes[objectArrayIdx];
+		objectArrayIdx = clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeReflectionProbe - 1u].x;
+		const ReflectionProbe probe = g_reflectionProbes[objectArrayIdx];
 
 
 		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
 		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
 	}
 	}
@@ -224,8 +192,8 @@ void main()
 	else
 	else
 	{
 	{
 		objectArrayIdx =
 		objectArrayIdx =
-			clustererObjectIdx - u_unis.m_objectCountsUpTo[kClusterObjectTypeGlobalIlluminationProbe - 1u].x;
-		const GlobalIlluminationProbe probe = u_giProbes[objectArrayIdx];
+			clustererObjectIdx - g_unis.m_objectCountsUpTo[kClusterObjectTypeGlobalIlluminationProbe - 1u].x;
+		const GlobalIlluminationProbe probe = g_giProbes[objectArrayIdx];
 
 
 		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
 		collides = testRayAabb(rayOrigin, rayDir, probe.m_aabbMin, probe.m_aabbMax, t0, t1);
 	}
 	}
@@ -236,15 +204,15 @@ void main()
 		// Set the tile
 		// Set the tile
 		const ExtendedClusterObjectMask mask = ExtendedClusterObjectMask(1)
 		const ExtendedClusterObjectMask mask = ExtendedClusterObjectMask(1)
 											   << ExtendedClusterObjectMask(objectArrayIdx);
 											   << ExtendedClusterObjectMask(objectArrayIdx);
-		atomicOr(s_tileMasks[localTileIdx], mask);
+		InterlockedOr(s_tileMasks[localTileIdx], mask);
 
 
 		// Compute and set the Z splits
 		// Compute and set the Z splits
 		const Vec3 hitpointA = rayDir * t0 + rayOrigin;
 		const Vec3 hitpointA = rayDir * t0 + rayOrigin;
 		const Vec3 hitpointB = rayDir * t1 + rayOrigin;
 		const Vec3 hitpointB = rayDir * t1 + rayOrigin;
 		const F32 distFromNearPlaneA =
 		const F32 distFromNearPlaneA =
-			testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointA);
+			testPlanePoint(g_unis.m_nearPlaneWSpace.xyz, g_unis.m_nearPlaneWSpace.w, hitpointA);
 		const F32 distFromNearPlaneB =
 		const F32 distFromNearPlaneB =
-			testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointB);
+			testPlanePoint(g_unis.m_nearPlaneWSpace.xyz, g_unis.m_nearPlaneWSpace.w, hitpointB);
 
 
 		F32 minDistFromNearPlane;
 		F32 minDistFromNearPlane;
 		F32 maxDistFromNearPlane;
 		F32 maxDistFromNearPlane;
@@ -259,77 +227,75 @@ void main()
 			maxDistFromNearPlane = distFromNearPlaneA;
 			maxDistFromNearPlane = distFromNearPlaneA;
 		}
 		}
 
 
-		const I32 startZSplit = max(I32(minDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0);
+		const I32 startZSplit = max(I32(minDistFromNearPlane * g_unis.m_zSplitCountOverFrustumLength), 0);
 		const I32 endZSplit =
 		const I32 endZSplit =
-			clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(kZSplitCount) - 1);
+			clamp(I32(maxDistFromNearPlane * g_unis.m_zSplitCountOverFrustumLength), 0, I32(kZSplitCount) - 1);
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		{
 		{
-			atomicOr(s_zSplitMasks[i], mask);
+			InterlockedOr(s_zSplitMasks[i], mask);
 		}
 		}
 	}
 	}
 
 
 	// Sync
 	// Sync
-	memoryBarrierShared();
-	barrier();
+	GroupMemoryBarrierWithGroupSync();
 
 
 	// First sample writes the tile
 	// First sample writes the tile
-	if(sampleIdx == 0u && s_tileMasks[localTileIdx] != ExtendedClusterObjectMask(0))
+	if(sampleIdx == 0u && s_tileMasks[localTileIdx] != 0)
 	{
 	{
-		if(isPointLight())
+		if(isPointLight(clustererObjectIdx))
 		{
 		{
-			atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
+			InterlockedOr(g_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
 		}
 		}
-		else if(isSpotLight())
+		else if(isSpotLight(clustererObjectIdx))
 		{
 		{
-			atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
+			InterlockedOr(g_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
 		}
 		}
-		else if(isDecal())
+		else if(isDecal(clustererObjectIdx))
 		{
 		{
-			atomicOr(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
+			InterlockedOr(g_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
 		}
 		}
-		else if(isFogVolume())
+		else if(isFogVolume(clustererObjectIdx))
 		{
 		{
-			atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
+			InterlockedOr(g_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
 		}
 		}
-		else if(isReflectionProbe())
+		else if(isReflectionProbe(clustererObjectIdx))
 		{
 		{
-			atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
+			InterlockedOr(g_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
 		}
 		}
 		else
 		else
 		{
 		{
-			atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
+			InterlockedOr(g_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
 		}
 		}
 	}
 	}
 
 
 	// All invocations write at least one Z split
 	// All invocations write at least one Z split
-	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
-		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < kZSplitCount; ++i)
+	for(U32 i = svGroupIdx * splitsPerInvocation; i < (svGroupIdx + 1u) * splitsPerInvocation && i < kZSplitCount; ++i)
 	{
 	{
-		if(s_zSplitMasks[i] != ExtendedClusterObjectMask(0))
+		if(s_zSplitMasks[i] != 0)
 		{
 		{
-			if(isPointLight())
+			if(isPointLight(clustererObjectIdx))
 			{
 			{
-				atomicOr(u_clusters[kTileCount + i].m_pointLightsMask, s_zSplitMasks[i]);
+				InterlockedOr(g_clusters[kTileCount + i].m_pointLightsMask, s_zSplitMasks[i]);
 			}
 			}
-			else if(isSpotLight())
+			else if(isSpotLight(clustererObjectIdx))
 			{
 			{
-				atomicOr(u_clusters[kTileCount + i].m_spotLightsMask, s_zSplitMasks[i]);
+				InterlockedOr(g_clusters[kTileCount + i].m_spotLightsMask, s_zSplitMasks[i]);
 			}
 			}
-			else if(isDecal())
+			else if(isDecal(clustererObjectIdx))
 			{
 			{
-				atomicOr(u_clusters[kTileCount + i].m_decalsMask, s_zSplitMasks[i]);
+				InterlockedOr(g_clusters[kTileCount + i].m_decalsMask, s_zSplitMasks[i]);
 			}
 			}
-			else if(isFogVolume())
+			else if(isFogVolume(clustererObjectIdx))
 			{
 			{
-				atomicOr(u_clusters[kTileCount + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
+				InterlockedOr(g_clusters[kTileCount + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
 			}
 			}
-			else if(isReflectionProbe())
+			else if(isReflectionProbe(clustererObjectIdx))
 			{
 			{
-				atomicOr(u_clusters[kTileCount + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
+				InterlockedOr(g_clusters[kTileCount + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
 			}
 			}
 			else
 			else
 			{
 			{
-				atomicOr(u_clusters[kTileCount + i].m_giProbesMask, U32(s_zSplitMasks[i]));
+				InterlockedOr(g_clusters[kTileCount + i].m_giProbesMask, U32(s_zSplitMasks[i]));
 			}
 			}
 		}
 		}
 	}
 	}

+ 2 - 0
AnKi/Shaders/CollisionFunctions.hlsl

@@ -5,6 +5,8 @@
 
 
 #pragma once
 #pragma once
 
 
+#include <AnKi/Shaders/Common.hlsl>
+
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection
 /// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection
 Bool testRayTriangle(Vec3 rayOrigin, Vec3 rayDir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfaceCulling, out F32 t, out F32 u,
 Bool testRayTriangle(Vec3 rayOrigin, Vec3 rayDir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfaceCulling, out F32 t, out F32 u,
 					 out F32 v)
 					 out F32 v)

+ 1 - 0
AnKi/Shaders/Include/ClusteredShadingTypes.h

@@ -37,6 +37,7 @@ constexpr U32 kMaxVisibleGlobalIlluminationProbes = 8u;
 // Other consts
 // Other consts
 constexpr RF32 kClusterObjectFrustumNearPlane = 0.1f / 4.0f; ///< Near plane of all clusterer object frustums.
 constexpr RF32 kClusterObjectFrustumNearPlane = 0.1f / 4.0f; ///< Near plane of all clusterer object frustums.
 constexpr RF32 kSubsurfaceMin = 0.01f;
 constexpr RF32 kSubsurfaceMin = 0.01f;
+constexpr U32 kMaxZsplitCount = 128u;
 
 
 /// Point light.
 /// Point light.
 struct PointLight
 struct PointLight

+ 2 - 1
AnKi/Shaders/Include/Common.h

@@ -74,7 +74,8 @@ void maybeUnused(T a)
 		[[vk::binding(0, s)]] Texture2DArray<RVec4> g_bindlessTextures2dArrayF32[kMaxBindlessTextures]; \
 		[[vk::binding(0, s)]] Texture2DArray<RVec4> g_bindlessTextures2dArrayF32[kMaxBindlessTextures]; \
 		[[vk::binding(1, s)]] Buffer<float4> g_bindlessTextureBuffersF32[kMaxBindlessReadonlyTextureBuffers];
 		[[vk::binding(1, s)]] Buffer<float4> g_bindlessTextureBuffersF32[kMaxBindlessReadonlyTextureBuffers];
 
 
-#	define _ANKI_SCONST_X(type, n, id) [[vk::constant_id(id)]] const type n = (type)1;
+#	define _ANKI_SCONST_X(type, n, id) \
+		[[vk::constant_id(id)]] const type n = (type)1;
 
 
 #	define _ANKI_SCONST_X2(type, componentType, n, id) \
 #	define _ANKI_SCONST_X2(type, componentType, n, id) \
 		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = (componentType)1; \
 		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = (componentType)1; \