Browse Source

Make some functions templates in HLSL

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
001b550c0c

+ 3 - 2
AnKi/Shaders/ApplyIrradianceToReflection.ankiprog

@@ -33,8 +33,9 @@ RWTexture2D<Vec4> g_cubeTex[6u] : register(u0); // RWTexture2D because there is
 							g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), gbuffer);
 
 	// Sample
-	const RVec3 irradiance = sampleAmbientDice(g_irradianceDice[0u].xyz, g_irradianceDice[1u].xyz, g_irradianceDice[2u].xyz, g_irradianceDice[3u].xyz,
-											   g_irradianceDice[4u].xyz, g_irradianceDice[5u].xyz, gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
+	const RVec3 irradiance =
+		sampleAmbientDice<RF32>(g_irradianceDice[0u].xyz, g_irradianceDice[1u].xyz, g_irradianceDice[2u].xyz, g_irradianceDice[3u].xyz,
+								g_irradianceDice[4u].xyz, g_irradianceDice[5u].xyz, gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
 
 	// Compute the indirect term
 	const RVec3 indirect = gbuffer.m_diffuse * irradiance;

+ 3 - 3
AnKi/Shaders/ClusteredShadingFunctions.hlsl

@@ -224,7 +224,7 @@ vector<T, 3> sampleGiProbes(Cluster cluster, StructuredBuffer<GlobalIllumination
 		const GlobalIlluminationProbe probe = probes[firstbitlow2(cluster.m_giProbesMask)];
 
 		// Sample
-		probeColor = sampleGlobalIllumination(worldPos, normal, probe, getBindlessTexture3DRVec4(probe.m_volumeTexture), trilinearClampSampler);
+		probeColor = sampleGlobalIllumination<T>(worldPos, normal, probe, getBindlessTexture3DVec4(probe.m_volumeTexture), trilinearClampSampler);
 	}
 	else
 	{
@@ -245,8 +245,8 @@ vector<T, 3> sampleGiProbes(Cluster cluster, StructuredBuffer<GlobalIllumination
 			totalBlendWeight += blendWeight;
 
 			// Sample
-			const vector<T, 3> c = sampleGlobalIllumination(worldPos, normal, probe, getBindlessTextureNonUniformIndex3DRVec4(probe.m_volumeTexture),
-															trilinearClampSampler);
+			const vector<T, 3> c = sampleGlobalIllumination<T>(worldPos, normal, probe,
+															   getBindlessTextureNonUniformIndex3DVec4(probe.m_volumeTexture), trilinearClampSampler);
 			probeColor += c * blendWeight;
 		}
 

+ 6 - 6
AnKi/Shaders/ForwardShadingCommon.hlsl

@@ -56,12 +56,12 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos, Vec4 svPosition)
 		const Vec3 diffC = diffCol * light.m_diffuseColor;
 
 		const Vec3 frag2Light = light.m_position - worldPos;
-		const F32 att = computeAttenuationFactor(light.m_radius, frag2Light);
+		const RF32 att = computeAttenuationFactor<RF32>(light.m_radius, frag2Light);
 
-		F32 shadow = 1.0;
+		RF32 shadow = 1.0;
 		if(light.m_shadowAtlasTileScale >= 0.0)
 		{
-			shadow = computeShadowFactorPointLight(light, frag2Light, g_shadowAtlasTex, g_shadowSampler);
+			shadow = computeShadowFactorPointLight<RF32>(light, frag2Light, g_shadowAtlasTex, g_shadowSampler);
 		}
 
 		outColor += diffC * (att * shadow);
@@ -75,16 +75,16 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos, Vec4 svPosition)
 		const Vec3 diffC = diffCol * light.m_diffuseColor;
 
 		const Vec3 frag2Light = light.m_position - worldPos;
-		const F32 att = computeAttenuationFactor(light.m_radius, frag2Light);
+		const RF32 att = computeAttenuationFactor<RF32>(light.m_radius, frag2Light);
 
 		const Vec3 l = normalize(frag2Light);
 
-		const F32 spot = computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_direction);
+		const F32 spot = computeSpotFactor<RF32>(l, light.m_outerCos, light.m_innerCos, light.m_direction);
 
 		F32 shadow = 1.0;
 		[branch] if(light.m_shadow != 0u)
 		{
-			shadow = computeShadowFactorSpotLight(light, worldPos, g_shadowAtlasTex, g_shadowSampler);
+			shadow = computeShadowFactorSpotLight<RF32>(light, worldPos, g_shadowAtlasTex, g_shadowSampler);
 		}
 
 		outColor += diffC * (att * spot * shadow);

+ 2 - 2
AnKi/Shaders/IrradianceDice.ankiprog

@@ -136,8 +136,8 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 
 		// Sample irradiance
 		RVec3 firstBounceIrradiance =
-			sampleAmbientDice(s_integrationResults[0][0], s_integrationResults[1][0], s_integrationResults[2][0], s_integrationResults[3][0],
-							  s_integrationResults[4][0], s_integrationResults[5][0], gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
+			sampleAmbientDice<RF32>(s_integrationResults[0][0], s_integrationResults[1][0], s_integrationResults[2][0], s_integrationResults[3][0],
+									s_integrationResults[4][0], s_integrationResults[5][0], gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
 		firstBounceIrradiance = gbuffer.m_diffuse * firstBounceIrradiance;
 
 		// Compute 2nd bounce

+ 109 - 87
AnKi/Shaders/LightFunctions.hlsl

@@ -55,12 +55,13 @@ T D_GGX(T roughness, T NoH, vector<T, 3> h, vector<T, 3> worldNormal)
 }
 
 // Visibility term: Geometric shadowing divided by BRDF denominator
-RF32 V_Schlick(RF32 roughness, RF32 NoV, RF32 NoL)
+template<typename T>
+T V_Schlick(T roughness, T NoV, T NoL)
 {
-	const RF32 k = (roughness * roughness) * 0.5;
-	const RF32 Vis_SchlickV = NoV * (1.0 - k) + k;
-	const RF32 Vis_SchlickL = NoL * (1.0 - k) + k;
-	return 0.25 / (Vis_SchlickV * Vis_SchlickL);
+	const T k = (roughness * roughness) * T(0.5);
+	const T Vis_SchlickV = NoV * (T(1) - k) + k;
+	const T Vis_SchlickL = NoL * (T(1) - k) + k;
+	return T(0.25) / (Vis_SchlickV * Vis_SchlickL);
 }
 
 // Visibility term: Hammon 2017, "PBR Diffuse Lighting for GGX+Smith Microsurfaces"
@@ -72,14 +73,16 @@ T V_SmithGGXCorrelatedFast(T roughness, T NoV, T NoL)
 	return saturate(v);
 }
 
-RF32 Fd_Lambert()
+template<typename T>
+T Fd_Lambert()
 {
-	return 1.0 / kPi;
+	return T(1.0 / kPi);
 }
 
-RVec3 diffuseLobe(RVec3 diffuse)
+template<typename T>
+vector<T, 3> diffuseLobe(vector<T, 3> diffuse)
 {
-	return diffuse * Fd_Lambert();
+	return diffuse * Fd_Lambert<T>();
 }
 
 // Performs BRDF specular lighting
@@ -105,35 +108,38 @@ vector<T, 3> specularIsotropicLobe(vector<T, 3> normal, vector<T, 3> f0, T rough
 	return F * (V * D);
 }
 
-Vec3 specularDFG(RVec3 F0, RF32 roughness, Texture2D<RVec4> integrationLut, SamplerState integrationLutSampler, F32 NoV)
+template<typename T>
+vector<T, 3> specularDFG(vector<T, 3> F0, T roughness, Texture2D<Vec4> integrationLut, SamplerState integrationLutSampler, T NoV)
 {
-	const Vec2 envBRDF = integrationLut.SampleLevel(integrationLutSampler, Vec2(roughness, NoV), 0.0).xy;
+	const vector<T, 2> envBRDF = integrationLut.SampleLevel(integrationLutSampler, vector<T, 2>(roughness, NoV), 0.0).xy;
 	return lerp(envBRDF.xxx, envBRDF.yyy, F0);
 }
 
-RF32 computeSpotFactor(RVec3 l, RF32 outerCos, RF32 innerCos, RVec3 spotDir)
+template<typename T>
+T computeSpotFactor(vector<T, 3> frag2Light, T outerCos, T innerCos, vector<T, 3> spotDir)
 {
-	const RF32 costheta = -dot(l, spotDir);
-	const RF32 spotFactor = smoothstep(outerCos, innerCos, costheta);
+	const T costheta = -dot(frag2Light, spotDir);
+	const T spotFactor = smoothstep(outerCos, innerCos, costheta);
 	return spotFactor;
 }
 
 // PCSS calculation. Can be visualized here for spot lights: https://www.desmos.com/calculator/l0viaopwbi
 // and here for directional: https://www.desmos.com/calculator/0dh0ybqvv1
+template<typename T>
 struct Pcss
 {
 	SamplerState m_linearClampSampler;
 
-	Vec2 computePenumbra(Texture2D<Vec4> shadowmap, Vec2 searchDist, Vec3 projCoords, F32 cosTheta, F32 sinTheta, F32 lightSize, Bool dirLight)
+	vector<T, 2> computePenumbra(Texture2D<Vec4> shadowmap, Vec2 searchDist, Vec3 projCoords, T cosTheta, T sinTheta, F32 lightSize, Bool dirLight)
 	{
-		F32 inShadowCount = 0.0;
+		T inShadowCount = 0.0;
 		F32 avgOccluderZ = 0.0;
 		[unroll] for(U32 i = 0u; i < ARRAY_SIZE(kPoissonDisk4); ++i)
 		{
-			const Vec2 diskPoint = kPoissonDisk4[i];
+			const vector<T, 2> diskPoint = kPoissonDisk4[i];
 
 			// Rotate the disk point
-			Vec2 rotatedDiskPoint;
+			vector<T, 2> rotatedDiskPoint;
 			rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
 			rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
 
@@ -148,10 +154,10 @@ struct Pcss
 			}
 		}
 
-		F32 factor;
+		T factor;
 		if(inShadowCount == 0.0 || inShadowCount == ARRAY_SIZE(kPoissonDisk4))
 		{
-			factor = 0;
+			factor = 0.0;
 		}
 		else
 		{
@@ -168,26 +174,27 @@ struct Pcss
 			}
 		}
 
-		return Vec2(factor, inShadowCount);
+		return vector<T, 2>(factor, inShadowCount);
 	}
 };
 
+template<typename T>
 struct PcssDisabled
 {
-	Vec2 computePenumbra(Texture2D<Vec4> shadowmap, Vec2 texelSize, Vec3 projCoords, F32 cosTheta, F32 sinTheta, F32 lightSize, Bool dirLight)
+	vector<T, 2> computePenumbra(Texture2D<Vec4> shadowmap, Vec2 searchDist, Vec3 projCoords, T cosTheta, T sinTheta, F32 lightSize, Bool dirLight)
 	{
 		return -1.0;
 	}
 };
 
-template<typename TPcss>
-RF32 computeShadowFactorSpotLightGeneric(SpotLight light, Vec3 worldPos, Texture2D<Vec4> shadowTex, SamplerComparisonState shadowMapSampler, Bool pcf,
-										 RF32 randFactor, TPcss pcss)
+template<typename T, typename TPcss>
+T computeShadowFactorSpotLightGeneric(SpotLight light, Vec3 worldPos, Texture2D<Vec4> shadowTex, SamplerComparisonState shadowMapSampler, Bool pcf,
+									  T randFactor, TPcss pcss)
 {
 	const Vec4 texCoords4 = mul(light.m_textureMatrix, Vec4(worldPos, 1.0));
 	const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
 
-	RF32 shadow;
+	T shadow;
 	if(pcf)
 	{
 		Vec2 texSize;
@@ -195,13 +202,13 @@ RF32 computeShadowFactorSpotLightGeneric(SpotLight light, Vec3 worldPos, Texture
 		shadowTex.GetDimensions(0, texSize.x, texSize.y, mipCount);
 		const Vec2 smTexelSize = 1.0 / texSize;
 
-		const F32 sinTheta = sin(randFactor * 2.0 * kPi);
-		const F32 cosTheta = cos(randFactor * 2.0 * kPi);
+		const T sinTheta = sin(randFactor * 2.0 * kPi);
+		const T cosTheta = cos(randFactor * 2.0 * kPi);
 
 		// PCSS
-		const Vec2 pcssRes =
+		const vector<T, 2> pcssRes =
 			pcss.computePenumbra(shadowTex, smTexelSize * kPcssSearchTexelRadius, texCoords3, cosTheta, sinTheta, light.m_radius, false);
-		F32 pcfPixels;
+		T pcfPixels;
 		if(pcssRes.x == -1.0)
 		{
 			// PCSS disabled
@@ -220,10 +227,10 @@ RF32 computeShadowFactorSpotLightGeneric(SpotLight light, Vec3 worldPos, Texture
 		shadow = 0.0;
 		[unroll] for(U32 i = 0u; i < ARRAY_SIZE(kPoissonDisk4); ++i)
 		{
-			const Vec2 diskPoint = kPoissonDisk4[i];
+			const vector<T, 2> diskPoint = kPoissonDisk4[i];
 
 			// Rotate the disk point
-			Vec2 rotatedDiskPoint;
+			vector<T, 2> rotatedDiskPoint;
 			rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
 			rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
 
@@ -233,7 +240,7 @@ RF32 computeShadowFactorSpotLightGeneric(SpotLight light, Vec3 worldPos, Texture
 			shadow += shadowTex.SampleCmpLevelZero(shadowMapSampler, newUv, texCoords3.z);
 		}
 
-		shadow /= F32(ARRAY_SIZE(kPoissonDisk4));
+		shadow /= T(ARRAY_SIZE(kPoissonDisk4));
 	}
 	else
 	{
@@ -243,29 +250,33 @@ RF32 computeShadowFactorSpotLightGeneric(SpotLight light, Vec3 worldPos, Texture
 	return shadow;
 }
 
-RF32 computeShadowFactorSpotLight(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler)
+template<typename T>
+T computeShadowFactorSpotLight(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler)
 {
-	PcssDisabled noPcss = (PcssDisabled)0;
+	PcssDisabled<T> noPcss = (PcssDisabled<T>)0;
 	return computeShadowFactorSpotLightGeneric(light, worldPos, shadowTex, shadowMapSampler, false, 0.0, noPcss);
 }
 
-RF32 computeShadowFactorSpotLightPcf(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler, RF32 randFactor)
+template<typename T>
+T computeShadowFactorSpotLightPcf(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler, T randFactor)
 {
-	PcssDisabled noPcss = (PcssDisabled)0;
+	PcssDisabled<T> noPcss = (PcssDisabled<T>)0;
 	return computeShadowFactorSpotLightGeneric(light, worldPos, shadowTex, shadowMapSampler, true, randFactor, noPcss);
 }
 
-RF32 computeShadowFactorSpotLightPcss(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler, RF32 randFactor,
-									  SamplerState linearClampAnySampler)
+template<typename T>
+T computeShadowFactorSpotLightPcss(SpotLight light, Vec3 worldPos, Texture2D shadowTex, SamplerComparisonState shadowMapSampler, T randFactor,
+								   SamplerState linearClampAnySampler)
 {
-	Pcss pcss;
+	Pcss<T> pcss;
 	pcss.m_linearClampSampler = linearClampAnySampler;
 	return computeShadowFactorSpotLightGeneric(light, worldPos, shadowTex, shadowMapSampler, true, randFactor, pcss);
 }
 
 // Compute the shadow factor of point (omni) lights.
-RF32 computeShadowFactorPointLightGeneric(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler,
-										  RF32 randFactor, Bool pcf)
+template<typename T>
+T computeShadowFactorPointLightGeneric(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler, T randFactor,
+									   Bool pcf)
 {
 	const Vec3 dir = -frag2Light;
 	const Vec3 dirabs = abs(dir);
@@ -297,7 +308,7 @@ RF32 computeShadowFactorPointLightGeneric(PointLight light, Vec3 frag2Light, Tex
 	uv += atlasOffset;
 
 	// Sample
-	RF32 shadow;
+	T shadow;
 	if(pcf)
 	{
 		F32 mipCount;
@@ -305,16 +316,16 @@ RF32 computeShadowFactorPointLightGeneric(PointLight light, Vec3 frag2Light, Tex
 		shadowMap.GetDimensions(0, smTexelSize.x, smTexelSize.y, mipCount);
 		smTexelSize = 1.0 / smTexelSize;
 
-		const F32 sinTheta = sin(randFactor * 2.0 * kPi);
-		const F32 cosTheta = cos(randFactor * 2.0 * kPi);
+		const T sinTheta = sin(randFactor * 2.0 * kPi);
+		const T cosTheta = cos(randFactor * 2.0 * kPi);
 
 		shadow = 0.0;
 		[unroll] for(U32 i = 0u; i < ARRAY_SIZE(kPoissonDisk4); ++i)
 		{
-			const Vec2 diskPoint = kPoissonDisk4[i];
+			const vector<T, 2> diskPoint = kPoissonDisk4[i];
 
 			// Rotate the disk point
-			Vec2 rotatedDiskPoint;
+			vector<T, 2> rotatedDiskPoint;
 			rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
 			rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
 
@@ -324,7 +335,7 @@ RF32 computeShadowFactorPointLightGeneric(PointLight light, Vec3 frag2Light, Tex
 			shadow += shadowMap.SampleCmpLevelZero(shadowMapSampler, newUv, z);
 		}
 
-		shadow /= F32(ARRAY_SIZE(kPoissonDisk4));
+		shadow /= T(ARRAY_SIZE(kPoissonDisk4));
 	}
 	else
 	{
@@ -334,21 +345,22 @@ RF32 computeShadowFactorPointLightGeneric(PointLight light, Vec3 frag2Light, Tex
 	return shadow;
 }
 
-RF32 computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler)
+template<typename T>
+T computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler)
 {
 	return computeShadowFactorPointLightGeneric(light, frag2Light, shadowMap, shadowMapSampler, -1.0, false);
 }
 
-RF32 computeShadowFactorPointLightPcf(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler,
-									  RF32 randFactor)
+template<typename T>
+T computeShadowFactorPointLightPcf(PointLight light, Vec3 frag2Light, Texture2D shadowMap, SamplerComparisonState shadowMapSampler, T randFactor)
 {
 	return computeShadowFactorPointLightGeneric(light, frag2Light, shadowMap, shadowMapSampler, randFactor, true);
 }
 
 // Compute the shadow factor of a directional light
-template<typename TPcss>
-RF32 computeShadowFactorDirLightGeneric(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap,
-										SamplerComparisonState shadowMapSampler, RF32 randFactor, Bool pcf, TPcss pcss)
+template<typename T, typename TPcss>
+T computeShadowFactorDirLightGeneric(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap,
+									 SamplerComparisonState shadowMapSampler, T randFactor, Bool pcf, TPcss pcss)
 {
 #define ANKI_FAST_CASCADES_WORKAROUND 1 // light might be in a constant buffer and dynamic indexing in constant buffers is too slow on nvidia
 
@@ -386,11 +398,11 @@ RF32 computeShadowFactorDirLightGeneric(DirectionalLight light, U32 cascadeIdx,
 	const Vec4 texCoords4 = mul(lightProjectionMat, Vec4(worldPos, 1.0));
 	Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
 
-	RF32 shadow;
+	T shadow;
 	if(pcf || pcfDistUvSpace == 0.0f)
 	{
-		const F32 sinTheta = sin(randFactor * 2.0 * kPi);
-		const F32 cosTheta = cos(randFactor * 2.0 * kPi);
+		const T sinTheta = sin(randFactor * 2.0 * kPi);
+		const T cosTheta = cos(randFactor * 2.0 * kPi);
 
 		// PCSS
 		const Vec2 pcssRes =
@@ -412,10 +424,10 @@ RF32 computeShadowFactorDirLightGeneric(DirectionalLight light, U32 cascadeIdx,
 		shadow = 0.0;
 		[unroll] for(U32 i = 0u; i < ARRAY_SIZE(kPoissonDisk8); ++i)
 		{
-			const Vec2 diskPoint = kPoissonDisk8[i];
+			const vector<T, 2> diskPoint = kPoissonDisk8[i];
 
 			// Rotate the disk point
-			Vec2 rotatedDiskPoint;
+			vector<T, 2> rotatedDiskPoint;
 			rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
 			rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
 
@@ -425,7 +437,7 @@ RF32 computeShadowFactorDirLightGeneric(DirectionalLight light, U32 cascadeIdx,
 			shadow += shadowMap.SampleCmpLevelZero(shadowMapSampler, newUv, texCoords3.z);
 		}
 
-		shadow /= F32(ARRAY_SIZE(kPoissonDisk8));
+		shadow /= T(ARRAY_SIZE(kPoissonDisk8));
 	}
 	else
 	{
@@ -435,34 +447,38 @@ RF32 computeShadowFactorDirLightGeneric(DirectionalLight light, U32 cascadeIdx,
 	return shadow;
 }
 
-RF32 computeShadowFactorDirLight(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap, SamplerComparisonState shadowMapSampler)
+template<typename T>
+T computeShadowFactorDirLight(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap, SamplerComparisonState shadowMapSampler)
 {
-	PcssDisabled noPcss = (PcssDisabled)0;
+	PcssDisabled<T> noPcss = (PcssDisabled<T>)0;
 	return computeShadowFactorDirLightGeneric(light, cascadeIdx, worldPos, shadowMap, shadowMapSampler, -1.0, false, noPcss);
 }
 
-RF32 computeShadowFactorDirLightPcf(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap,
-									SamplerComparisonState shadowMapSampler, F32 randFactor)
+template<typename T>
+T computeShadowFactorDirLightPcf(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap, SamplerComparisonState shadowMapSampler,
+								 T randFactor)
 {
-	PcssDisabled noPcss = (PcssDisabled)0;
+	PcssDisabled<T> noPcss = (PcssDisabled<T>)0;
 	return computeShadowFactorDirLightGeneric(light, cascadeIdx, worldPos, shadowMap, shadowMapSampler, randFactor, true, noPcss);
 }
 
-RF32 computeShadowFactorDirLightPcss(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap,
-									 SamplerComparisonState shadowMapSampler, F32 randFactor, SamplerState linearClampAnySampler)
+template<typename T>
+T computeShadowFactorDirLightPcss(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap, SamplerComparisonState shadowMapSampler,
+								  T randFactor, SamplerState linearClampAnySampler)
 {
-	Pcss pcss;
+	Pcss<T> pcss;
 	pcss.m_linearClampSampler = linearClampAnySampler;
 	return computeShadowFactorDirLightGeneric(light, cascadeIdx, worldPos, shadowMap, shadowMapSampler, randFactor, true, pcss);
 }
 
 // Compute the shadow factor of a directional light
-RF32 computeShadowFactorDirLight(Mat4 lightProjectionMat, Vec3 worldPos, Texture2D<RVec4> shadowMap, SamplerComparisonState shadowMapSampler)
+template<typename T>
+T computeShadowFactorDirLight(Mat4 lightProjectionMat, Vec3 worldPos, Texture2D<Vec4> shadowMap, SamplerComparisonState shadowMapSampler)
 {
 	const Vec4 texCoords4 = mul(lightProjectionMat, Vec4(worldPos, 1.0));
 	const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
 
-	const RF32 shadowFactor = shadowMap.SampleCmpLevelZero(shadowMapSampler, texCoords3.xy, texCoords3.z);
+	const T shadowFactor = shadowMap.SampleCmpLevelZero(shadowMapSampler, texCoords3.xy, texCoords3.z);
 	return shadowFactor;
 }
 
@@ -496,10 +512,12 @@ Vec3 computeCubemapVecCheap(Vec3 r, F32 R2, Vec3 f)
 	return r;
 }
 
-RF32 computeAttenuationFactor(RF32 lightRadius, RVec3 frag2Light)
+template<typename T>
+T computeAttenuationFactor(T lightRadius, Vec3 frag2Light)
 {
-	const RF32 fragLightDist = dot(frag2Light, frag2Light);
-	RF32 att = 1.0 - fragLightDist / (lightRadius * lightRadius);
+	const F32 fragLightDist = dot(frag2Light, frag2Light);
+	T att = fragLightDist / (lightRadius * lightRadius);
+	att = T(1) - att;
 	att = max(0.0, att);
 	return att * att;
 }
@@ -536,25 +554,28 @@ F32 computeProbeBlendWeight(Vec3 fragPos, // Doesn't need to be inside the AABB
 
 // Given the value of the 6 faces of the dice and a normal, sample the correct weighted value.
 // https://www.shadertoy.com/view/XtcBDB
-RVec3 sampleAmbientDice(RVec3 posx, RVec3 negx, RVec3 posy, RVec3 negy, RVec3 posz, RVec3 negz, RVec3 normal)
+template<typename T>
+vector<T, 3> sampleAmbientDice(vector<T, 3> posx, vector<T, 3> negx, vector<T, 3> posy, vector<T, 3> negy, vector<T, 3> posz, vector<T, 3> negz,
+							   vector<T, 3> normal)
 {
-	normal.z *= -1.0f;
-	const RVec3 axisWeights = normal * normal;
-	const RVec3 uv = normal * 0.5f + 0.5f;
+	normal.z *= -1.0;
+	const vector<T, 3> axisWeights = normal * normal;
+	const vector<T, 3> uv = normal * 0.5 + 0.5;
 
-	RVec3 col = lerp(negx, posx, uv.x) * axisWeights.x;
+	vector<T, 3> col = lerp(negx, posx, uv.x) * axisWeights.x;
 	col += lerp(negy, posy, uv.y) * axisWeights.y;
 	col += lerp(negz, posz, uv.z) * axisWeights.z;
 
 	// Divide by weight
-	col /= axisWeights.x + axisWeights.y + axisWeights.z + kEpsilonRF32;
+	col /= axisWeights.x + axisWeights.y + axisWeights.z + 0.0001;
 
 	return col;
 }
 
 // Sample the irradiance term from the clipmap
-RVec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const GlobalIlluminationProbe probe, Texture3D<RVec4> tex,
-							   SamplerState linearAnyClampSampler)
+template<typename T>
+vector<T, 3> sampleGlobalIllumination(const Vec3 worldPos, const vector<T, 3> normal, const GlobalIlluminationProbe probe, Texture3D<Vec4> tex,
+									  SamplerState linearAnyClampSampler)
 {
 	// Find the UVW
 	Vec3 uvw = (worldPos - probe.m_aabbMin) / (probe.m_aabbMax - probe.m_aabbMin);
@@ -568,7 +589,7 @@ RVec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const Glo
 	uvw.x = clamp(uvw.x, probe.m_halfTexelSizeU, (1.0 / 6.0) - probe.m_halfTexelSizeU);
 
 	// Read the irradiance
-	RVec3 irradiancePerDir[6u];
+	vector<T, 3> irradiancePerDir[6u];
 	[unroll] for(U32 dir = 0u; dir < 6u; ++dir)
 	{
 		// Point to the correct UV
@@ -579,8 +600,8 @@ RVec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const Glo
 	}
 
 	// Sample the irradiance
-	const RVec3 irradiance = sampleAmbientDice(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2], irradiancePerDir[3],
-											   irradiancePerDir[4], irradiancePerDir[5], normal);
+	const vector<T, 3> irradiance = sampleAmbientDice<T>(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2], irradiancePerDir[3],
+														 irradiancePerDir[4], irradiancePerDir[5], normal);
 
 	return irradiance;
 }
@@ -777,16 +798,17 @@ U32 computeShadowCascadeIndex(F32 distance, Vec4 cascadeDistances, U32 shadowCas
 
 /// Bring the indices of the closest cascades and a factor to blend them. To visualize what's going on go to:
 /// https://www.desmos.com/calculator/g1ibye6ebg
-UVec2 computeShadowCascadeIndex2(F32 distance, Vec4 cascadeDistances, U32 shadowCascadeCount, out RF32 factor)
+template<typename T>
+UVec2 computeShadowCascadeIndex2(F32 distance, Vec4 cascadeDistances, U32 shadowCascadeCount, out T factor)
 {
 	const U32 cascade = computeShadowCascadeIndex(distance, cascadeDistances, shadowCascadeCount);
 	const U32 nextCascade = min(cascade + 1u, shadowCascadeCount - 1u);
 
-	const F32 minDist = (cascade == 0u) ? 0.0f : cascadeDistances[cascade - 1u];
+	const F32 minDist = (cascade == 0u) ? 0.0 : cascadeDistances[cascade - 1u];
 	const F32 maxDist = cascadeDistances[cascade];
 
 	factor = (distance - minDist) / max(kEpsilonF32, maxDist - minDist);
-	factor = pow(factor, 16.0f); // WARNING: Need to change the C++ code if you change this
+	factor = pow(factor, T(16.0)); // WARNING: Need to change the C++ code if you change this
 
 	return UVec2(cascade, nextCascade);
 }

+ 5 - 5
AnKi/Shaders/LightShading.ankiprog

@@ -30,7 +30,7 @@ Texture2D g_depthTex : register(t8);
 Texture2D<RVec4> g_resolvedShadowsTex : register(t9);
 Texture2D<RVec4> g_ssaoTex : register(t10);
 Texture2D<RVec4> g_reflectionsTex : register(t11);
-Texture2D<RVec4> g_integrationLut : register(t12);
+Texture2D<Vec4> g_integrationLut : register(t12);
 
 // Common code for lighting
 #	define LIGHTING_COMMON_BRDF() \
@@ -38,7 +38,7 @@ Texture2D<RVec4> g_integrationLut : register(t12);
 		const RVec3 l = normalize(frag2Light); \
 		const RVec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l); \
 		const RVec3 diffC = diffuseLobe(gbuffer.m_diffuse); \
-		const RF32 att = computeAttenuationFactor(light.m_radius, frag2Light); \
+		const RF32 att = computeAttenuationFactor<RF32>(light.m_radius, frag2Light); \
 		RF32 lambert = max(0.0, dot(gbuffer.m_normal, l));
 
 RVec4 main(VertOut input) : SV_TARGET0
@@ -87,8 +87,8 @@ RVec4 main(VertOut input) : SV_TARGET0
 		RVec3 refl = g_reflectionsTex[coord].xyz;
 
 		// Apply the reflection
-		const F32 NoV = max(0.0f, dot(gbuffer.m_normal, viewDir));
-		const Vec3 env = specularDFG(gbuffer.m_f0, gbuffer.m_roughness, g_integrationLut, g_trilinearClampSampler, NoV);
+		const RF32 NoV = max(0.0f, dot(gbuffer.m_normal, viewDir));
+		const Vec3 env = specularDFG<RF32>(gbuffer.m_f0, gbuffer.m_roughness, g_integrationLut, g_trilinearClampSampler, NoV);
 		refl *= env;
 
 		outColor += refl;
@@ -147,7 +147,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 
 		LIGHTING_COMMON_BRDF();
 
-		const F32 spot = computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_direction);
+		const F32 spot = computeSpotFactor<RF32>(l, light.m_outerCos, light.m_innerCos, light.m_direction);
 
 		[branch] if(light.m_shadow)
 		{

+ 7 - 6
AnKi/Shaders/Reflections.ankiprog

@@ -37,6 +37,8 @@ constexpr Bool kDebugSsr = false;
 constexpr Bool kSsrHallucinate = true;
 constexpr Bool kSsrHallucinateDebug = false;
 constexpr F32 kTemporalSourceWeight = 0.005;
+constexpr F32 kTemporalGamma = 1.2;
+constexpr Bool kPerfectTemporal = true;
 #define TILE_SIZE 32
 
 // The states of a tile
@@ -66,7 +68,7 @@ Vec3 getDiffuseIndirect(StructuredBuffer<GpuSceneGlobalIlluminationProbe> giProb
 	if(probeFound)
 	{
 		const GpuSceneGlobalIlluminationProbe probe = giProbes[i];
-		return sampleGlobalIllumination(worldPos, worldNormal, probe, getBindlessTexture3DRVec4(probe.m_volumeTexture), linearAnyClampSampler);
+		return sampleGlobalIllumination<F32>(worldPos, worldNormal, probe, getBindlessTexture3DVec4(probe.m_volumeTexture), linearAnyClampSampler);
 	}
 	else
 	{
@@ -237,7 +239,7 @@ Vec3 doLightShading(Vec3 worldPos, Vec3 viewPos, UVec2 coord, F32 depth)
 
 			const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
 
-			shadowFactor = computeShadowFactorDirLight(dirLight, cascadeIdx, worldPos, g_shadowAtlasTex, g_shadowSampler);
+			shadowFactor = computeShadowFactorDirLight<F32>(dirLight, cascadeIdx, worldPos, g_shadowAtlasTex, g_shadowSampler);
 		}
 		else
 		{
@@ -710,7 +712,7 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 
 		const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
 
-		shadow = computeShadowFactorDirLight(dirLight, cascadeIdx, hitPos, g_shadowAtlasTex, g_shadowSampler);
+		shadow = computeShadowFactorDirLight<F32>(dirLight, cascadeIdx, hitPos, g_shadowAtlasTex, g_shadowSampler);
 	}
 	else
 	{
@@ -1114,11 +1116,10 @@ void computeSourceColor(Vec2 uv, IVec2 coord, IVec2 textureSize, out Vec3 m1, ou
 
 	// Fix history
 	constexpr F32 sampleCount = 9.0;
-	const F32 gamma = 1.0;
 	const Vec3 mu = m1 / sampleCount;
 	const Vec3 sigma = sqrt(abs((m2 / sampleCount) - (mu * mu)));
-	const Vec3 minc = mu - gamma * sigma;
-	const Vec3 maxc = mu + gamma * sigma;
+	const Vec3 minc = mu - kTemporalGamma * sigma;
+	const Vec3 maxc = mu + kTemporalGamma * sigma;
 
 	history = clamp(history, minc, maxc);
 

+ 18 - 17
AnKi/Shaders/ShadowmapsResolve.ankiprog

@@ -144,14 +144,14 @@ RVec4 main(VertOut input) : SV_TARGET0
 #		endif
 
 #		if QUALITY == 2
-			const F32 shadowFactorCascadeA = computeShadowFactorDirLightPcss(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex,
-																			 g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
+			const RF32 shadowFactorCascadeA = computeShadowFactorDirLightPcss<RF32>(
+				dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
 #		elif QUALITY == 1
-			const F32 shadowFactorCascadeA =
-				computeShadowFactorDirLightPcf(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
+			const RF32 shadowFactorCascadeA = computeShadowFactorDirLightPcf<RF32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex,
+																				   g_linearAnyClampShadowSampler, randFactor);
 #		else
-			const F32 shadowFactorCascadeA =
-				computeShadowFactorDirLight(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			const RF32 shadowFactorCascadeA =
+				computeShadowFactorDirLight<RF32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 #		endif
 
 			if(cascadeBlendFactor < 0.01 || cascadeIndices.x == cascadeIndices.y)
@@ -163,16 +163,16 @@ RVec4 main(VertOut input) : SV_TARGET0
 			{
 #		if QUALITY == 2
 				// Blend cascades
-				const F32 shadowFactorCascadeB = computeShadowFactorDirLightPcss(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex,
-																				 g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
+				const RF32 shadowFactorCascadeB = computeShadowFactorDirLightPcss<RF32>(
+					dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
 #		elif QUALITY == 1
 				// Blend cascades
-				const F32 shadowFactorCascadeB =
-					computeShadowFactorDirLightPcf(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
+				const RF32 shadowFactorCascadeB = computeShadowFactorDirLightPcf<RF32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex,
+																					   g_linearAnyClampShadowSampler, randFactor);
 #		else
 				// Blend cascades
-				const F32 shadowFactorCascadeB =
-					computeShadowFactorDirLight(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+				const RF32 shadowFactorCascadeB =
+					computeShadowFactorDirLight<RF32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 #		endif
 				shadowFactor = lerp(shadowFactorCascadeA, shadowFactorCascadeB, cascadeBlendFactor);
 			}
@@ -205,7 +205,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 			const RF32 shadowFactor =
 				computeShadowFactorPointLightPcf(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
 #	else
-			const RF32 shadowFactor = computeShadowFactorPointLight(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			const RF32 shadowFactor = computeShadowFactorPointLight<F32>(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 #	endif
 			shadowFactors[min(kMaxShadowCastersPerFragment - 1u, shadowCasterCountPerFragment++)] = shadowFactor;
 		}
@@ -219,12 +219,13 @@ RVec4 main(VertOut input) : SV_TARGET0
 		[branch] if(light.m_shadow)
 		{
 #	if QUALITY == 2
-			const RF32 shadowFactor = computeShadowFactorSpotLightPcss(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor,
-																	   g_linearAnyClampSampler);
+			const RF32 shadowFactor = computeShadowFactorSpotLightPcss<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler,
+																			 randFactor, g_linearAnyClampSampler);
 #	elif QUALITY == 1
-			const RF32 shadowFactor = computeShadowFactorSpotLightPcf(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
+			const RF32 shadowFactor =
+				computeShadowFactorSpotLightPcf<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
 #	else
-			const RF32 shadowFactor = computeShadowFactorSpotLight(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			const RF32 shadowFactor = computeShadowFactorSpotLight<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 #	endif
 			shadowFactors[min(kMaxShadowCastersPerFragment - 1u, shadowCasterCountPerFragment++)] = shadowFactor;
 		}

+ 4 - 4
AnKi/Shaders/TraditionalDeferredShading.ankiprog

@@ -30,7 +30,7 @@ Texture2D<Vec4> g_depthTex : register(t5);
 
 // For directional light:
 SamplerComparisonState g_shadowMapSampler : register(s1);
-Texture2D<RVec4> g_shadowMap : register(t6);
+Texture2D<Vec4> g_shadowMap : register(t6);
 
 ConstantBuffer<GlobalRendererConstants> g_globalRendererConsts : register(b1);
 
@@ -68,7 +68,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 		{
 			// Acceptable distance
 
-			shadowFactor = computeShadowFactorDirLight(g_consts.m_dirLight.m_lightMatrix, worldPos, g_shadowMap, g_shadowMapSampler);
+			shadowFactor = computeShadowFactorDirLight<RF32>(g_consts.m_dirLight.m_lightMatrix, worldPos, g_shadowMap, g_shadowMapSampler);
 		}
 		else
 		{
@@ -97,10 +97,10 @@ Vec4 main(VertOut input) : SV_TARGET0
 		const Vec3 l = normalize(frag2Light);
 		const F32 nol = max(0.0, dot(gbuffer.m_normal, l));
 
-		const F32 att = computeAttenuationFactor(light.m_radius, frag2Light);
+		const F32 att = computeAttenuationFactor<F32>(light.m_radius, frag2Light);
 		const F32 lambert = nol;
 		const F32 spot = ((U32)light.m_flags & (U32)GpuSceneLightFlag::kSpotLight)
-							 ? computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_direction)
+							 ? computeSpotFactor<RF32>(l, light.m_outerCos, light.m_innerCos, light.m_direction)
 							 : 1.0f;
 		const F32 factor = att * spot * max(lambert, gbuffer.m_subsurface);
 

+ 9 - 9
AnKi/Shaders/VolumetricLightingAccumulation.ankiprog

@@ -107,7 +107,7 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 		{
 			const U32 cascadeIdx = computeShadowCascadeIndex(negativeZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount);
 
-			factor *= computeShadowFactorDirLight(dirLight, cascadeIdx, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			factor *= computeShadowFactorDirLight<F32>(dirLight, cascadeIdx, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 		}
 #endif
 
@@ -121,14 +121,14 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 		const PointLight light = g_pointLights[idx];
 
 		const Vec3 frag2Light = light.m_position - worldPos;
-		F32 factor = computeAttenuationFactor(light.m_radius, frag2Light);
+		F32 factor = computeAttenuationFactor<F32>(light.m_radius, frag2Light);
 
 		factor *= phaseFunction(viewDir, normalize(worldPos - light.m_position), kPhaseFunctionAnisotropy);
 
 #if ENABLE_SHADOWS
 		if(light.m_shadow)
 		{
-			factor *= computeShadowFactorPointLight(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			factor *= computeShadowFactorPointLight<F32>(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 		}
 #endif
 
@@ -141,18 +141,18 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 		const SpotLight light = g_spotLights[idx];
 
 		const Vec3 frag2Light = light.m_position - worldPos;
-		F32 factor = computeAttenuationFactor(light.m_radius, frag2Light);
+		F32 factor = computeAttenuationFactor<F32>(light.m_radius, frag2Light);
 
 		const Vec3 l = normalize(frag2Light);
 
-		factor *= computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_direction);
+		factor *= computeSpotFactor<RF32>(l, light.m_outerCos, light.m_innerCos, light.m_direction);
 
 		factor *= phaseFunction(viewDir, light.m_direction, kPhaseFunctionAnisotropy);
 
 #if ENABLE_SHADOWS
 		if(light.m_shadow)
 		{
-			factor *= computeShadowFactorSpotLight(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			factor *= computeShadowFactorSpotLight<F32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 		}
 #endif
 
@@ -172,7 +172,7 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 
 			// Sample
 			diffIndirect =
-				sampleGlobalIllumination(worldPos, viewDir, probe, getBindlessTexture3DRVec4(probe.m_volumeTexture), g_linearAnyClampSampler);
+				sampleGlobalIllumination<F32>(worldPos, viewDir, probe, getBindlessTexture3DVec4(probe.m_volumeTexture), g_linearAnyClampSampler);
 		}
 		else
 		{
@@ -193,8 +193,8 @@ Vec4 accumulateLightsAndFog(Cluster cluster, Vec3 worldPos, F32 negativeZViewSpa
 				totalBlendWeight += blendWeight;
 
 				// Sample
-				const Vec3 c = sampleGlobalIllumination(worldPos, viewDir, probe, getBindlessTextureNonUniformIndex3DRVec4(probe.m_volumeTexture),
-														g_linearAnyClampSampler);
+				const Vec3 c = sampleGlobalIllumination<F32>(worldPos, viewDir, probe, getBindlessTextureNonUniformIndex3DVec4(probe.m_volumeTexture),
+															 g_linearAnyClampSampler);
 				diffIndirect += c * blendWeight;
 			}