Bladeren bron

Add a 2nd path in shaders that doesn't use U64

Panagiotis Christopoulos Charitos 4 jaren geleden
bovenliggende
commit
1244e621a2

+ 0 - 2
AnKi/Renderer/ClusterBinning.cpp

@@ -40,8 +40,6 @@ Error ClusterBinning::init(const ConfigSet& config)
 	variantInitInfo.addConstant("RENDERING_SIZE",
 	variantInitInfo.addConstant("RENDERING_SIZE",
 								UVec2(m_r->getInternalResolution().x(), m_r->getInternalResolution().y()));
 								UVec2(m_r->getInternalResolution().x(), m_r->getInternalResolution().y()));
 
 
-	variantInitInfo.addMutation("SUPPORTS_64BIT_ATOMICS", m_r->getGrManager().getDeviceCapabilities().m_64bitAtomics);
-
 	const ShaderProgramResourceVariant* variant;
 	const ShaderProgramResourceVariant* variant;
 	m_prog->getOrCreateVariant(variantInitInfo, variant);
 	m_prog->getOrCreateVariant(variantInitInfo, variant);
 	m_grProg = variant->getProgram();
 	m_grProg = variant->getProgram();

+ 1 - 1
AnKi/Renderer/ConfigDefs.h

@@ -36,7 +36,7 @@ ANKI_CONFIG_OPTION(r_probeReflectionShadowMapResolution, 64, 4, 2048)
 ANKI_CONFIG_OPTION(r_lensFlareMaxSpritesPerFlare, 8, 4, 256)
 ANKI_CONFIG_OPTION(r_lensFlareMaxSpritesPerFlare, 8, 4, 256)
 ANKI_CONFIG_OPTION(r_lensFlareMaxFlares, 16, 8, 256)
 ANKI_CONFIG_OPTION(r_lensFlareMaxFlares, 16, 8, 256)
 
 
-ANKI_CONFIG_OPTION(r_giTileResolution, 32, 4, 2048)
+ANKI_CONFIG_OPTION(r_giTileResolution, (ANKI_OS_ANDROID) ? 16 : 32, 4, 2048)
 ANKI_CONFIG_OPTION(r_giShadowMapResolution, 128, 4, 2048)
 ANKI_CONFIG_OPTION(r_giShadowMapResolution, 128, 4, 2048)
 ANKI_CONFIG_OPTION(r_giMaxCachedProbes, 16, 4, 2048)
 ANKI_CONFIG_OPTION(r_giMaxCachedProbes, 16, 4, 2048)
 ANKI_CONFIG_OPTION(r_giMaxVisibleProbes, 8, 1, 256)
 ANKI_CONFIG_OPTION(r_giMaxVisibleProbes, 8, 1, 256)

+ 5 - 2
AnKi/ShaderCompiler/Glslang.cpp

@@ -235,6 +235,7 @@ static ANKI_USE_RESULT Error logShaderErrorCode(CString error, CString source, G
 
 
 	StringAuto prettySrc(alloc);
 	StringAuto prettySrc(alloc);
 	StringListAuto lines(alloc);
 	StringListAuto lines(alloc);
+	StringAuto errorLineTxt(alloc);
 
 
 	static const char* padding = "==============================================================================";
 	static const char* padding = "==============================================================================";
 
 
@@ -249,6 +250,7 @@ static ANKI_USE_RESULT Error logShaderErrorCode(CString error, CString source, G
 		if(!it->isEmpty() && lineno == errorLineNumber)
 		if(!it->isEmpty() && lineno == errorLineNumber)
 		{
 		{
 			tmp.sprintf(">>%8u: %s\n", lineno, &(*it)[0]);
 			tmp.sprintf(">>%8u: %s\n", lineno, &(*it)[0]);
+			errorLineTxt.sprintf("%s", &(*it)[0]);
 		}
 		}
 		else if(!it->isEmpty())
 		else if(!it->isEmpty())
 		{
 		{
@@ -262,8 +264,9 @@ static ANKI_USE_RESULT Error logShaderErrorCode(CString error, CString source, G
 		prettySrc.append(tmp);
 		prettySrc.append(tmp);
 	}
 	}
 
 
-	ANKI_SHADER_COMPILER_LOGE("Shader compilation failed:\n%s\n%s\n%s\n%s\n%s\n%s", padding, &error[0], padding,
-							  &prettySrc[0], padding, &error[0]);
+	ANKI_SHADER_COMPILER_LOGE("Shader compilation failed:\n%s\n%s\nIn: %s\n%s\n%s\n%s\n%s\nIn: %s\n", padding,
+							  &error[0], errorLineTxt.cstr(), padding, &prettySrc[0], padding, &error[0],
+							  errorLineTxt.cstr());
 
 
 	return Error::NONE;
 	return Error::NONE;
 }
 }

+ 37 - 20
AnKi/ShaderCompiler/ShaderProgramParser.cpp

@@ -22,6 +22,11 @@ static const Array<CString, U32(ShaderType::COUNT)> SHADER_STAGE_NAMES = {
 
 
 static const char* SHADER_HEADER = R"(#version 460 core
 static const char* SHADER_HEADER = R"(#version 460 core
 #define ANKI_%s_SHADER 1
 #define ANKI_%s_SHADER 1
+#define ANKI_OS_ANDROID %d
+#define ANKI_OS_WINDOWS %d
+#define ANKI_OS_LINUX %d
+
+#define _ANKI_SUPPORTS_64BIT !ANKI_OS_ANDROID
 
 
 #define gl_VertexID gl_VertexIndex
 #define gl_VertexID gl_VertexIndex
 
 
@@ -47,12 +52,15 @@ static const char* SHADER_HEADER = R"(#version 460 core
 #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
-#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
+
+#if _ANKI_SUPPORTS_64BIT
+#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
 #extension GL_EXT_shader_atomic_int64 : enable
 #extension GL_EXT_shader_atomic_int64 : enable
 #extension GL_EXT_shader_subgroup_extended_types_int64 : enable
 #extension GL_EXT_shader_subgroup_extended_types_int64 : enable
+#endif
 
 
 #extension GL_EXT_nonuniform_qualifier : enable
 #extension GL_EXT_nonuniform_qualifier : enable
 #extension GL_EXT_scalar_block_layout : enable
 #extension GL_EXT_scalar_block_layout : enable
@@ -144,23 +152,25 @@ static const char* SHADER_HEADER = R"(#version 460 core
 #define IVec4 ivec4
 #define IVec4 ivec4
 #define _ANKI_SIZEOF_ivec4 16u
 #define _ANKI_SIZEOF_ivec4 16u
 
 
-#define U64 uint64_t
-#define _ANKI_SIZEOF_uint64_t 8u
-#define U64Vec2 u64vec2
-#define _ANKI_SIZEOF_u64vec2 16u
-#define U64Vec3 u64vec3
-#define _ANKI_SIZEOF_u64vec3 24u
-#define U64Vec4 u64vec4
-#define _ANKI_SIZEOF_u64vec4 32u
-
-#define I64 int64_t
-#define _ANKI_SIZEOF_int64_t 8u
-#define I64Vec2 i64vec2
-#define _ANKI_SIZEOF_i64vec2 16u
-#define I64Vec3 i64vec3
-#define _ANKI_SIZEOF_i64vec3 24u
-#define I64Vec4 i64vec4
-#define _ANKI_SIZEOF_i64vec4 32u
+#if _ANKI_SUPPORTS_64BIT
+#	define U64 uint64_t
+#	define _ANKI_SIZEOF_uint64_t 8u
+#	define U64Vec2 u64vec2
+#	define _ANKI_SIZEOF_u64vec2 16u
+#	define U64Vec3 u64vec3
+#	define _ANKI_SIZEOF_u64vec3 24u
+#	define U64Vec4 u64vec4
+#	define _ANKI_SIZEOF_u64vec4 32u
+
+#	define I64 int64_t
+#	define _ANKI_SIZEOF_int64_t 8u
+#	define I64Vec2 i64vec2
+#	define _ANKI_SIZEOF_i64vec2 16u
+#	define I64Vec3 i64vec3
+#	define _ANKI_SIZEOF_i64vec3 24u
+#	define I64Vec4 i64vec4
+#	define _ANKI_SIZEOF_i64vec4 32u
+#endif
 
 
 #define Mat3 mat3
 #define Mat3 mat3
 
 
@@ -172,6 +182,13 @@ static const char* SHADER_HEADER = R"(#version 460 core
 
 
 #define Bool bool
 #define Bool bool
 
 
+#if _ANKI_SUPPORTS_64BIT
+#	define Address U64
+#else
+#	define Address UVec2
+#endif
+#define _ANKI_SIZEOF_Address 8u
+
 #define _ANKI_CONCATENATE(a, b) a##b
 #define _ANKI_CONCATENATE(a, b) a##b
 #define ANKI_CONCATENATE(a, b) _ANKI_CONCATENATE(a, b)
 #define ANKI_CONCATENATE(a, b) _ANKI_CONCATENATE(a, b)
 
 
@@ -912,8 +929,8 @@ Error ShaderProgramParser::parse()
 void ShaderProgramParser::generateAnkiShaderHeader(ShaderType shaderType, const ShaderCompilerOptions& compilerOptions,
 void ShaderProgramParser::generateAnkiShaderHeader(ShaderType shaderType, const ShaderCompilerOptions& compilerOptions,
 												   StringAuto& header)
 												   StringAuto& header)
 {
 {
-	header.sprintf(SHADER_HEADER, SHADER_STAGE_NAMES[shaderType].cstr(),
-				   compilerOptions.m_bindlessLimits.m_bindlessTextureCount,
+	header.sprintf(SHADER_HEADER, SHADER_STAGE_NAMES[shaderType].cstr(), ANKI_OS_ANDROID, ANKI_OS_WINDOWS,
+				   ANKI_OS_LINUX, compilerOptions.m_bindlessLimits.m_bindlessTextureCount,
 				   compilerOptions.m_bindlessLimits.m_bindlessImageCount);
 				   compilerOptions.m_bindlessLimits.m_bindlessImageCount);
 }
 }
 
 

+ 24 - 87
AnKi/Shaders/ClusterBinning.ankiprog

@@ -3,9 +3,6 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
-// For those platforms that don't support 64bit atomics try to do the atomics in 32bit
-#pragma anki mutator SUPPORTS_64BIT_ATOMICS 0 1
-
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2u);
@@ -26,22 +23,11 @@ layout(set = 0, binding = 0, scalar) uniform b_unis
 	ClusteredShadingUniforms u_unis;
 	ClusteredShadingUniforms u_unis;
 };
 };
 
 
-layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters64
-{
-	Cluster u_clusters64[];
-};
-
-layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters32
+layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters
 {
 {
-	Cluster32 u_clusters32[];
+	Cluster u_clusters[];
 };
 };
 
 
-#if SUPPORTS_64BIT_ATOMICS
-#	define u_clusters u_clusters64
-#else
-#	define u_clusters u_clusters32
-#endif
-
 layout(set = 0, binding = 2, scalar) uniform b_pointLights
 layout(set = 0, binding = 2, scalar) uniform b_pointLights
 {
 {
 	PointLight u_pointLights[MAX_VISIBLE_POINT_LIGHTS];
 	PointLight u_pointLights[MAX_VISIBLE_POINT_LIGHTS];
@@ -82,60 +68,10 @@ UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2)
 
 
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
 // A mask per tile of this workgroup for the clusterer object being processed by this workgroup
 const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
 const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
-
-#if SUPPORTS_64BIT_ATOMICS
-shared U64 s_tileMasks[TILES_PER_WORKGROUP];
-#else
-shared U32 s_tileMasks[TILES_PER_WORKGROUP][2u];
-#endif
+shared ExtendedClusterObjectMask s_tileMasks[TILES_PER_WORKGROUP];
 
 
 // A mask for each Z split for a specific clusterer object
 // A mask for each Z split for a specific clusterer object
-#if SUPPORTS_64BIT_ATOMICS
-shared U64 s_zSplitMasks[Z_SPLIT_COUNT];
-#else
-shared U32 s_zSplitMasks[Z_SPLIT_COUNT][2u];
-#endif
-
-#if SUPPORTS_64BIT_ATOMICS
-#	define atomicOr2x32_64(dest, src) atomicOr(dest, src)
-
-#	define atomicOr2x32_2x32(dest, src) atomicOr(dest, src)
-
-#	define atomicOr32_2X32(dest, src) atomicOr(dest, U32(src))
-
-#	define zero2x32(dest) \
-		do \
-		{ \
-			dest = 0ul; \
-		} while(false)
-
-#	define isZero2x32(src) (src == 0ul)
-#else // !SUPPORTS_64BIT_ATOMICS
-#	define atomicOr2x32_64(dest, src) \
-		do \
-		{ \
-			atomicOr(dest[0u], U32(src)); \
-			atomicOr(dest[1u], U32(src >> 32ul)); \
-		} while(false)
-
-#	define atomicOr2x32_2x32(dest, src) \
-		do \
-		{ \
-			atomicOr(dest[0u], src[0u]); \
-			atomicOr(dest[1u], src[1u]); \
-		} while(false)
-
-#	define atomicOr32_2X32(dest, src) atomicOr(dest, src[1u])
-
-#	define zero2x32(dest) \
-		do \
-		{ \
-			dest[0u] = 0u; \
-			dest[1u] = 0u; \
-		} while(false)
-
-#	define isZero2x32(src) ((src[0u] | src[1u]) == 0u)
-#endif
+shared ExtendedClusterObjectMask s_zSplitMasks[Z_SPLIT_COUNT];
 
 
 Bool isPointLight()
 Bool isPointLight()
 {
 {
@@ -196,12 +132,12 @@ void main()
 	const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
 	const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
 
 
 	// Zero shared memory
 	// Zero shared memory
-	zero2x32(s_tileMasks[localTileIdx]);
+	s_tileMasks[localTileIdx] = ExtendedClusterObjectMask(0);
 	const U32 splitsPerInvocation = max(1u, Z_SPLIT_COUNT / WORKGROUP_SIZE);
 	const U32 splitsPerInvocation = max(1u, Z_SPLIT_COUNT / WORKGROUP_SIZE);
 	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
 	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
 		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
 		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
 	{
 	{
-		zero2x32(s_zSplitMasks[i]);
+		s_zSplitMasks[i] = ExtendedClusterObjectMask(0);
 	}
 	}
 	memoryBarrierShared();
 	memoryBarrierShared();
 	barrier();
 	barrier();
@@ -302,8 +238,9 @@ void main()
 	if(collides)
 	if(collides)
 	{
 	{
 		// Set the tile
 		// Set the tile
-		const U64 mask = 1ul << U64(objectArrayIdx);
-		atomicOr2x32_64(s_tileMasks[localTileIdx], mask);
+		const ExtendedClusterObjectMask mask = ExtendedClusterObjectMask(1)
+											   << ExtendedClusterObjectMask(objectArrayIdx);
+		atomicOr(s_tileMasks[localTileIdx], mask);
 
 
 		// Compute and set the Z splits
 		// Compute and set the Z splits
 		const Vec3 hitpointA = rayDir * t0 + rayOrigin;
 		const Vec3 hitpointA = rayDir * t0 + rayOrigin;
@@ -331,7 +268,7 @@ void main()
 			clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(Z_SPLIT_COUNT) - 1);
 			clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(Z_SPLIT_COUNT) - 1);
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		for(I32 i = startZSplit; i <= endZSplit; ++i)
 		{
 		{
-			atomicOr2x32_64(s_zSplitMasks[i], mask);
+			atomicOr(s_zSplitMasks[i], mask);
 		}
 		}
 	}
 	}
 
 
@@ -340,31 +277,31 @@ void main()
 	barrier();
 	barrier();
 
 
 	// First sample writes the tile
 	// First sample writes the tile
-	if(sampleIdx == 0u && !isZero2x32(s_tileMasks[localTileIdx]))
+	if(sampleIdx == 0u && s_tileMasks[localTileIdx] != ExtendedClusterObjectMask(0))
 	{
 	{
 		if(isPointLight())
 		if(isPointLight())
 		{
 		{
-			atomicOr2x32_2x32(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
+			atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
 		}
 		}
 		else if(isSpotLight())
 		else if(isSpotLight())
 		{
 		{
-			atomicOr2x32_2x32(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
+			atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
 		}
 		}
 		else if(isDecal())
 		else if(isDecal())
 		{
 		{
-			atomicOr2x32_2x32(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
+			atomicOr(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
 		}
 		}
 		else if(isFogVolume())
 		else if(isFogVolume())
 		{
 		{
-			atomicOr32_2X32(u_clusters[tileIdx].m_fogDensityVolumesMask, s_tileMasks[localTileIdx]);
+			atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
 		}
 		}
 		else if(isReflectionProbe())
 		else if(isReflectionProbe())
 		{
 		{
-			atomicOr32_2X32(u_clusters[tileIdx].m_reflectionProbesMask, s_tileMasks[localTileIdx]);
+			atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
 		}
 		}
 		else
 		else
 		{
 		{
-			atomicOr32_2X32(u_clusters[tileIdx].m_giProbesMask, s_tileMasks[localTileIdx]);
+			atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
 		}
 		}
 	}
 	}
 
 
@@ -372,31 +309,31 @@ void main()
 	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
 	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
 		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
 		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
 	{
 	{
-		if(!isZero2x32(s_zSplitMasks[i]))
+		if(s_zSplitMasks[i] != ExtendedClusterObjectMask(0))
 		{
 		{
 			if(isPointLight())
 			if(isPointLight())
 			{
 			{
-				atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
+				atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
 			}
 			}
 			else if(isSpotLight())
 			else if(isSpotLight())
 			{
 			{
-				atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
+				atomicOr(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
 			}
 			}
 			else if(isDecal())
 			else if(isDecal())
 			{
 			{
-				atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_decalsMask, s_zSplitMasks[i]);
+				atomicOr(u_clusters[TILE_COUNT + i].m_decalsMask, s_zSplitMasks[i]);
 			}
 			}
 			else if(isFogVolume())
 			else if(isFogVolume())
 			{
 			{
-				atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, s_zSplitMasks[i]);
+				atomicOr(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
 			}
 			}
 			else if(isReflectionProbe())
 			else if(isReflectionProbe())
 			{
 			{
-				atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, s_zSplitMasks[i]);
+				atomicOr(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
 			}
 			}
 			else
 			else
 			{
 			{
-				atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_giProbesMask, s_zSplitMasks[i]);
+				atomicOr(u_clusters[TILE_COUNT + i].m_giProbesMask, U32(s_zSplitMasks[i]));
 			}
 			}
 		}
 		}
 	}
 	}

+ 6 - 6
AnKi/Shaders/ForwardShadingCommonFrag.glsl

@@ -44,10 +44,10 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 	Cluster cluster = getClusterFragCoord(gl_FragCoord.xyz);
 	Cluster cluster = getClusterFragCoord(gl_FragCoord.xyz);
 
 
 	// Point lights
 	// Point lights
-	ANKI_LOOP while(cluster.m_pointLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_pointLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_pointLightsMask);
-		cluster.m_pointLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_pointLightsMask);
+		cluster.m_pointLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const PointLight light = u_pointLights2[idx];
 		const PointLight light = u_pointLights2[idx];
 
 
 		const Vec3 diffC = diffCol * light.m_diffuseColor;
 		const Vec3 diffC = diffCol * light.m_diffuseColor;
@@ -69,10 +69,10 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 	}
 	}
 
 
 	// Spot lights
 	// Spot lights
-	ANKI_LOOP while(cluster.m_spotLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_spotLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_spotLightsMask);
-		cluster.m_spotLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_spotLightsMask);
+		cluster.m_spotLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const SpotLight light = u_spotLights2[idx];
 		const SpotLight light = u_spotLights2[idx];
 
 
 		const Vec3 diffC = diffCol * light.m_diffuseColor;
 		const Vec3 diffC = diffCol * light.m_diffuseColor;

+ 9 - 1
AnKi/Shaders/Functions.glsl

@@ -587,11 +587,19 @@ F32 computeMipLevel(Vec2 normalizedUvs)
 }
 }
 #endif
 #endif
 
 
+#if defined(U64)
 /// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
 /// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
 /// 32bit input. This is an alternative implementation but it expects that the input is not zero.
 /// 32bit input. This is an alternative implementation but it expects that the input is not zero.
-I32 findLSB64(U64 v)
+I32 findLSB2(U64 v)
 {
 {
 	const I32 lsb1 = findLSB(U32(v));
 	const I32 lsb1 = findLSB(U32(v));
 	const I32 lsb2 = findLSB(U32(v >> 32ul));
 	const I32 lsb2 = findLSB(U32(v >> 32ul));
 	return (lsb1 >= 0) ? lsb1 : lsb2 + 32;
 	return (lsb1 >= 0) ? lsb1 : lsb2 + 32;
 }
 }
+#endif
+
+/// Define an alternative findLSB to go in pair with the 64bit version.
+I32 findLSB2(U32 v)
+{
+	return findLSB(v);
+}

+ 4 - 4
AnKi/Shaders/GBufferPost.ankiprog

@@ -53,15 +53,15 @@ void main()
 										  u_clusteredShading.m_zSplitMagic.x, u_clusteredShading.m_zSplitMagic.y);
 										  u_clusteredShading.m_zSplitMagic.x, u_clusteredShading.m_zSplitMagic.y);
 
 
 	// Process decals
 	// Process decals
-	if(cluster.m_decalsMask == 0ul)
+	if(cluster.m_decalsMask == ExtendedClusterObjectMask(0))
 	{
 	{
 		discard;
 		discard;
 	}
 	}
 
 
-	ANKI_LOOP while(cluster.m_decalsMask != 0ul)
+	ANKI_LOOP while(cluster.m_decalsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_decalsMask);
-		cluster.m_decalsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_decalsMask);
+		cluster.m_decalsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const Decal decal = u_decals2[idx];
 		const Decal decal = u_decals2[idx];
 
 
 		// Project pos to decal space
 		// Project pos to decal space

+ 28 - 18
AnKi/Shaders/Include/ClusteredShadingTypes.h

@@ -7,6 +7,8 @@
 
 
 #include <AnKi/Shaders/Include/Common.h>
 #include <AnKi/Shaders/Include/Common.h>
 
 
+#define ANKI_CLUSTERED_SHADING_USE_64BIT !ANKI_OS_ANDROID
+
 ANKI_BEGIN_NAMESPACE
 ANKI_BEGIN_NAMESPACE
 
 
 // Enum of clusterer object types
 // Enum of clusterer object types
@@ -204,33 +206,41 @@ const U32 _ANKI_SIZEOF_ClusteredShadingUniforms =
 	28u * ANKI_SIZEOF(U32) + 2u * ANKI_SIZEOF(CommonMatrices) + ANKI_SIZEOF(DirectionalLight);
 	28u * ANKI_SIZEOF(U32) + 2u * ANKI_SIZEOF(CommonMatrices) + ANKI_SIZEOF(DirectionalLight);
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClusteredShadingUniforms) == _ANKI_SIZEOF_ClusteredShadingUniforms);
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClusteredShadingUniforms) == _ANKI_SIZEOF_ClusteredShadingUniforms);
 
 
+// Define the type of some cluster object masks
+#if !defined(__cplusplus)
+#	if ANKI_CLUSTERED_SHADING_USE_64BIT
+#		define ExtendedClusterObjectMask U64
+#	else
+#		define ExtendedClusterObjectMask U32
+#	endif
+#else
+#	if ANKI_CLUSTERED_SHADING_USE_64BIT
+using ExtendedClusterObjectMask = U64;
+#	else
+using ExtendedClusterObjectMask = U32;
+#	endif
+#endif
+
 /// Information that a tile or a Z-split will contain.
 /// Information that a tile or a Z-split will contain.
 struct Cluster
 struct Cluster
 {
 {
-	U64 m_pointLightsMask;
-	U64 m_spotLightsMask;
-	U64 m_decalsMask;
+	ExtendedClusterObjectMask m_pointLightsMask;
+	ExtendedClusterObjectMask m_spotLightsMask;
+	ExtendedClusterObjectMask m_decalsMask;
 	U32 m_fogDensityVolumesMask;
 	U32 m_fogDensityVolumesMask;
 	U32 m_reflectionProbesMask;
 	U32 m_reflectionProbesMask;
 	U32 m_giProbesMask;
 	U32 m_giProbesMask;
+#if ANKI_CLUSTERED_SHADING_USE_64BIT
 	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
 	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
+#endif
 };
 };
+
+#if ANKI_CLUSTERED_SHADING_USE_64BIT
 const U32 _ANKI_SIZEOF_Cluster = 5u * ANKI_SIZEOF(U64);
 const U32 _ANKI_SIZEOF_Cluster = 5u * ANKI_SIZEOF(U64);
 ANKI_SHADER_STATIC_ASSERT(sizeof(Cluster) == _ANKI_SIZEOF_Cluster);
 ANKI_SHADER_STATIC_ASSERT(sizeof(Cluster) == _ANKI_SIZEOF_Cluster);
-
-/// An alternative representation of Cluster that doesn't contain 64bit values
-struct Cluster32
-{
-	U32 m_pointLightsMask[2u];
-	U32 m_spotLightsMask[2u];
-	U32 m_decalsMask[2u];
-	U32 m_fogDensityVolumesMask;
-	U32 m_reflectionProbesMask;
-	U32 m_giProbesMask;
-	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
-};
-
-const U32 _ANKI_SIZEOF_Cluster32 = _ANKI_SIZEOF_Cluster;
-ANKI_SHADER_STATIC_ASSERT(sizeof(Cluster32) == _ANKI_SIZEOF_Cluster32);
+#else
+const U32 _ANKI_SIZEOF_Cluster = 6u * ANKI_SIZEOF(U32);
+ANKI_SHADER_STATIC_ASSERT(sizeof(Cluster) == _ANKI_SIZEOF_Cluster);
+#endif
 
 
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE

+ 2 - 0
AnKi/Shaders/Include/Common.h

@@ -32,6 +32,8 @@ inline F32 dot(const T& a, const T& b)
 {
 {
 	return a.dot(b);
 	return a.dot(b);
 }
 }
+
+using Address = U64;
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE
 
 
 #	define ANKI_CPP_CODE(x) x
 #	define ANKI_CPP_CODE(x) x

+ 4 - 4
AnKi/Shaders/Include/ModelTypes.h

@@ -88,11 +88,11 @@ ANKI_SHADER_STATIC_ASSERT(_ANKI_SIZEOF_BoneInfoVertex == sizeof(BoneInfoVertex))
 /// A structure that contains all the info of a geometry.
 /// A structure that contains all the info of a geometry.
 struct MeshGpuDescriptor
 struct MeshGpuDescriptor
 {
 {
-	U64 m_indexBufferPtr; ///< Points to a buffer of U16 indices.
+	Address m_indexBufferPtr; ///< Points to a buffer of U16 indices.
 #if defined(__cplusplus)
 #if defined(__cplusplus)
-	Array<U64, U(VertexAttributeBufferId::COUNT)> m_vertexBufferPtrs;
+	Array<Address, U(VertexAttributeBufferId::COUNT)> m_vertexBufferPtrs;
 #else
 #else
-	U64 m_vertexBufferPtrs[VERTEX_ATTRIBUTE_BUFFER_ID_COUNT];
+	Address m_vertexBufferPtrs[VERTEX_ATTRIBUTE_BUFFER_ID_COUNT];
 #endif
 #endif
 	U32 m_indexCount;
 	U32 m_indexCount;
 	U32 m_vertexCount;
 	U32 m_vertexCount;
@@ -100,7 +100,7 @@ struct MeshGpuDescriptor
 	Vec3 m_aabbMax;
 	Vec3 m_aabbMax;
 };
 };
 
 
-const U32 _ANKI_SIZEOF_MeshGpuDescriptor = 4u * ANKI_SIZEOF(U64) + 8u * ANKI_SIZEOF(F32);
+const U32 _ANKI_SIZEOF_MeshGpuDescriptor = 4u * ANKI_SIZEOF(UVec2) + 8u * ANKI_SIZEOF(F32);
 const U32 _ANKI_ALIGNOF_MeshGpuDescriptor = 8u;
 const U32 _ANKI_ALIGNOF_MeshGpuDescriptor = 8u;
 ANKI_SHADER_STATIC_ASSERT(_ANKI_SIZEOF_MeshGpuDescriptor == sizeof(MeshGpuDescriptor));
 ANKI_SHADER_STATIC_ASSERT(_ANKI_SIZEOF_MeshGpuDescriptor == sizeof(MeshGpuDescriptor));
 
 

+ 10 - 10
AnKi/Shaders/LightShading.ankiprog

@@ -129,10 +129,10 @@ void main()
 	}
 	}
 
 
 	// Point lights
 	// Point lights
-	ANKI_LOOP while(cluster.m_pointLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_pointLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_pointLightsMask);
-		cluster.m_pointLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_pointLightsMask);
+		cluster.m_pointLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const PointLight light = u_pointLights2[idx];
 		const PointLight light = u_pointLights2[idx];
 
 
 		LIGHTING_COMMON_BRDF();
 		LIGHTING_COMMON_BRDF();
@@ -151,10 +151,10 @@ void main()
 	}
 	}
 
 
 	// Spot lights
 	// Spot lights
-	ANKI_LOOP while(cluster.m_spotLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_spotLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_spotLightsMask);
-		cluster.m_spotLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_spotLightsMask);
+		cluster.m_spotLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const SpotLight light = u_spotLights2[idx];
 		const SpotLight light = u_spotLights2[idx];
 
 
 		LIGHTING_COMMON_BRDF();
 		LIGHTING_COMMON_BRDF();
@@ -186,7 +186,7 @@ void main()
 		{
 		{
 			// Only one probe, do a fast path without blend weight
 			// Only one probe, do a fast path without blend weight
 
 
-			const ReflectionProbe probe = u_reflectionProbes[findLSB(cluster.m_reflectionProbesMask)];
+			const ReflectionProbe probe = u_reflectionProbes[findLSB2(cluster.m_reflectionProbesMask)];
 
 
 			// Sample
 			// Sample
 			const Vec3 cubeUv = intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
 			const Vec3 cubeUv = intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
@@ -202,7 +202,7 @@ void main()
 			// Loop probes
 			// Loop probes
 			ANKI_LOOP while(cluster.m_reflectionProbesMask != 0u)
 			ANKI_LOOP while(cluster.m_reflectionProbesMask != 0u)
 			{
 			{
-				const U32 idx = U32(findLSB(cluster.m_reflectionProbesMask));
+				const U32 idx = U32(findLSB2(cluster.m_reflectionProbesMask));
 				cluster.m_reflectionProbesMask &= ~(1u << idx);
 				cluster.m_reflectionProbesMask &= ~(1u << idx);
 				const ReflectionProbe probe = u_reflectionProbes[idx];
 				const ReflectionProbe probe = u_reflectionProbes[idx];
 
 
@@ -244,7 +244,7 @@ void main()
 		{
 		{
 			// All subgroups point to the same probe and there is only one probe, do a fast path without blend weight
 			// All subgroups point to the same probe and there is only one probe, do a fast path without blend weight
 
 
-			const GlobalIlluminationProbe probe = u_giProbes[findLSB(cluster.m_giProbesMask)];
+			const GlobalIlluminationProbe probe = u_giProbes[findLSB2(cluster.m_giProbesMask)];
 
 
 			// Sample
 			// Sample
 			diffIndirect = sampleGlobalIllumination(worldPos, gbuffer.m_normal, probe, u_globalIlluminationTextures,
 			diffIndirect = sampleGlobalIllumination(worldPos, gbuffer.m_normal, probe, u_globalIlluminationTextures,
@@ -260,7 +260,7 @@ void main()
 			// Loop probes
 			// Loop probes
 			ANKI_LOOP while(cluster.m_giProbesMask != 0u)
 			ANKI_LOOP while(cluster.m_giProbesMask != 0u)
 			{
 			{
-				const U32 idx = U32(findLSB(cluster.m_giProbesMask));
+				const U32 idx = U32(findLSB2(cluster.m_giProbesMask));
 				cluster.m_giProbesMask &= ~(1u << idx);
 				cluster.m_giProbesMask &= ~(1u << idx);
 				const GlobalIlluminationProbe probe = u_giProbes[idx];
 				const GlobalIlluminationProbe probe = u_giProbes[idx];
 
 

+ 6 - 6
AnKi/Shaders/RtShadowsRayGen.ankiprog

@@ -126,10 +126,10 @@ void main()
 	}
 	}
 
 
 	// Point lights
 	// Point lights
-	ANKI_LOOP while(cluster.m_pointLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_pointLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_pointLightsMask);
-		cluster.m_pointLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_pointLightsMask);
+		cluster.m_pointLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const PointLight light = u_pointLights2[idx];
 		const PointLight light = u_pointLights2[idx];
 
 
 		ANKI_BRANCH if(light.m_shadowAtlasTileScale >= 0.0)
 		ANKI_BRANCH if(light.m_shadowAtlasTileScale >= 0.0)
@@ -153,10 +153,10 @@ void main()
 	}
 	}
 
 
 	// Spot lights
 	// Spot lights
-	ANKI_LOOP while(cluster.m_spotLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_spotLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_spotLightsMask);
-		cluster.m_spotLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_spotLightsMask);
+		cluster.m_spotLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const SpotLight light = u_spotLights2[idx];
 		const SpotLight light = u_spotLights2[idx];
 
 
 		ANKI_BRANCH if(light.m_shadowLayer != MAX_U32)
 		ANKI_BRANCH if(light.m_shadowLayer != MAX_U32)

+ 6 - 6
AnKi/Shaders/ShadowmapsResolve.ankiprog

@@ -79,10 +79,10 @@ void main()
 	}
 	}
 
 
 	// Point lights
 	// Point lights
-	ANKI_LOOP while(cluster.m_pointLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_pointLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_pointLightsMask);
-		cluster.m_pointLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_pointLightsMask);
+		cluster.m_pointLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const PointLight light = u_pointLights2[idx];
 		const PointLight light = u_pointLights2[idx];
 
 
 		ANKI_BRANCH if(light.m_shadowAtlasTileScale >= 0.0)
 		ANKI_BRANCH if(light.m_shadowAtlasTileScale >= 0.0)
@@ -96,10 +96,10 @@ void main()
 	}
 	}
 
 
 	// Spot lights
 	// Spot lights
-	ANKI_LOOP while(cluster.m_spotLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_spotLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_spotLightsMask);
-		cluster.m_spotLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_spotLightsMask);
+		cluster.m_spotLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const SpotLight light = u_spotLights2[idx];
 		const SpotLight light = u_spotLights2[idx];
 
 
 		ANKI_BRANCH if(light.m_shadowLayer != MAX_U32)
 		ANKI_BRANCH if(light.m_shadowLayer != MAX_U32)

+ 9 - 9
AnKi/Shaders/VolumetricLightingAccumulation.ankiprog

@@ -118,10 +118,10 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 	}
 	}
 
 
 	// Point lights
 	// Point lights
-	ANKI_LOOP while(cluster.m_pointLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_pointLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_pointLightsMask);
-		cluster.m_pointLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_pointLightsMask);
+		cluster.m_pointLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const PointLight light = u_pointLights2[idx];
 		const PointLight light = u_pointLights2[idx];
 
 
 		const Vec3 frag2Light = light.m_position - worldPos;
 		const Vec3 frag2Light = light.m_position - worldPos;
@@ -140,10 +140,10 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 	}
 	}
 
 
 	// Spot lights
 	// Spot lights
-	ANKI_LOOP while(cluster.m_spotLightsMask != 0ul)
+	ANKI_LOOP while(cluster.m_spotLightsMask != ExtendedClusterObjectMask(0))
 	{
 	{
-		const I32 idx = findLSB64(cluster.m_spotLightsMask);
-		cluster.m_spotLightsMask &= ~(1ul << U64(idx));
+		const I32 idx = findLSB2(cluster.m_spotLightsMask);
+		cluster.m_spotLightsMask &= ~(ExtendedClusterObjectMask(1) << ExtendedClusterObjectMask(idx));
 		const SpotLight light = u_spotLights2[idx];
 		const SpotLight light = u_spotLights2[idx];
 
 
 		const Vec3 frag2Light = light.m_position - worldPos;
 		const Vec3 frag2Light = light.m_position - worldPos;
@@ -173,7 +173,7 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 		{
 		{
 			// Only one probe, do a fast path without blend weight
 			// Only one probe, do a fast path without blend weight
 
 
-			const GlobalIlluminationProbe probe = u_giProbes[findLSB(cluster.m_giProbesMask)];
+			const GlobalIlluminationProbe probe = u_giProbes[findLSB2(cluster.m_giProbesMask)];
 
 
 			// Sample
 			// Sample
 			diffIndirect = sampleGlobalIllumination(worldPos, viewDir, probe, u_globalIlluminationTextures,
 			diffIndirect = sampleGlobalIllumination(worldPos, viewDir, probe, u_globalIlluminationTextures,
@@ -189,7 +189,7 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 			// Loop probes
 			// Loop probes
 			ANKI_LOOP while(cluster.m_giProbesMask != 0u)
 			ANKI_LOOP while(cluster.m_giProbesMask != 0u)
 			{
 			{
-				const U32 idx = U32(findLSB(cluster.m_giProbesMask));
+				const U32 idx = U32(findLSB2(cluster.m_giProbesMask));
 				cluster.m_giProbesMask &= ~(1u << idx);
 				cluster.m_giProbesMask &= ~(1u << idx);
 				const GlobalIlluminationProbe probe = u_giProbes[idx];
 				const GlobalIlluminationProbe probe = u_giProbes[idx];
 
 
@@ -218,7 +218,7 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 	{
 	{
 		ANKI_LOOP while(cluster.m_fogDensityVolumesMask != 0u)
 		ANKI_LOOP while(cluster.m_fogDensityVolumesMask != 0u)
 		{
 		{
-			const U32 idx = U32(findLSB(cluster.m_fogDensityVolumesMask));
+			const U32 idx = U32(findLSB2(cluster.m_fogDensityVolumesMask));
 			cluster.m_fogDensityVolumesMask &= ~(1u << idx);
 			cluster.m_fogDensityVolumesMask &= ~(1u << idx);
 			const FogDensityVolume vol = u_fogDensityVolumes[idx];
 			const FogDensityVolume vol = u_fogDensityVolumes[idx];