Przeglądaj źródła

Optimize indirect specular

Panagiotis Christopoulos Charitos 4 lat temu
rodzic
commit
0ca1009c1c

+ 3 - 0
AnKi/Renderer/ClusterBinning.cpp

@@ -7,6 +7,7 @@
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/RenderQueue.h>
 #include <AnKi/Renderer/VolumetricLightingAccumulation.h>
+#include <AnKi/Renderer/ProbeReflections.h>
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Util/ThreadHive.h>
@@ -442,6 +443,8 @@ void ClusterBinning::writeClustererBuffersTask()
 		unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE] =
 			unis.m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE - 1] + rqueue.m_giProbes.getSize();
 
+		unis.m_reflectionProbesMipCount = F32(m_r->getProbeReflections().getReflectionTextureMipmapCount());
+
 		unis.m_matrices = ctx.m_matrices;
 		unis.m_previousMatrices = ctx.m_prevMatrices;
 

+ 21 - 22
AnKi/Renderer/LightShading.cpp

@@ -149,28 +149,23 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 		bindUniforms(cmdb, 0, 2, binning.m_spotLightsToken);
 		rgraphCtx.bindColorTexture(0, 3, m_r->getShadowMapping().getShadowmapRt());
 
-		bindUniforms(cmdb, 0, 4, binning.m_reflectionProbesToken);
-		rgraphCtx.bindColorTexture(0, 5, m_r->getProbeReflections().getReflectionRt());
-		cmdb->bindTexture(0, 6, m_r->getProbeReflections().getIntegrationLut());
-
-		bindStorage(cmdb, 0, 7, binning.m_clustersToken);
-
-		cmdb->bindSampler(0, 8, m_r->getSamplers().m_nearestNearestClamp);
-		cmdb->bindSampler(0, 9, m_r->getSamplers().m_trilinearClamp);
-		rgraphCtx.bindColorTexture(0, 10, m_r->getGBuffer().getColorRt(0));
-		rgraphCtx.bindColorTexture(0, 11, m_r->getGBuffer().getColorRt(1));
-		rgraphCtx.bindColorTexture(0, 12, m_r->getGBuffer().getColorRt(2));
-		rgraphCtx.bindTexture(0, 13, m_r->getGBuffer().getDepthRt(),
+		bindStorage(cmdb, 0, 4, binning.m_clustersToken);
+
+		cmdb->bindSampler(0, 5, m_r->getSamplers().m_nearestNearestClamp);
+		cmdb->bindSampler(0, 6, m_r->getSamplers().m_trilinearClamp);
+		rgraphCtx.bindColorTexture(0, 7, m_r->getGBuffer().getColorRt(0));
+		rgraphCtx.bindColorTexture(0, 8, m_r->getGBuffer().getColorRt(1));
+		rgraphCtx.bindColorTexture(0, 9, m_r->getGBuffer().getColorRt(2));
+		rgraphCtx.bindTexture(0, 10, m_r->getGBuffer().getDepthRt(),
 							  TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
-		rgraphCtx.bindColorTexture(0, 14, m_r->getSsr().getRt());
 
 		if(m_r->getRtShadowsEnabled())
 		{
-			rgraphCtx.bindColorTexture(0, 15, m_r->getRtShadows().getRt());
+			rgraphCtx.bindColorTexture(0, 11, m_r->getRtShadows().getRt());
 		}
 		else
 		{
-			rgraphCtx.bindColorTexture(0, 16, m_r->getShadowmapsResolve().getRt());
+			rgraphCtx.bindColorTexture(0, 12, m_r->getShadowmapsResolve().getRt());
 		}
 
 		// Draw
@@ -186,10 +181,17 @@ void LightShading::run(const RenderingContext& ctx, RenderPassWorkContext& rgrap
 		cmdb->bindSampler(0, 0, m_r->getSamplers().m_nearestNearestClamp);
 		cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearClamp);
 		rgraphCtx.bindColorTexture(0, 2, m_r->getIndirectDiffuse().getRt());
-		rgraphCtx.bindColorTexture(0, 3, m_r->getDepthDownscale().getHiZRt());
-		rgraphCtx.bindTexture(0, 4, m_r->getGBuffer().getDepthRt(),
+		rgraphCtx.bindColorTexture(0, 3, m_r->getSsr().getRt());
+		rgraphCtx.bindColorTexture(0, 4, m_r->getDepthDownscale().getHiZRt());
+		rgraphCtx.bindTexture(0, 5, m_r->getGBuffer().getDepthRt(),
 							  TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
-		rgraphCtx.bindColorTexture(0, 5, m_r->getGBuffer().getColorRt(0));
+		rgraphCtx.bindColorTexture(0, 6, m_r->getGBuffer().getColorRt(0));
+		rgraphCtx.bindColorTexture(0, 7, m_r->getGBuffer().getColorRt(1));
+		rgraphCtx.bindColorTexture(0, 8, m_r->getGBuffer().getColorRt(2));
+		cmdb->bindTexture(0, 9, m_r->getProbeReflections().getIntegrationLut());
+
+		const ClusteredShadingContext& binning = ctx.m_clusteredShading;
+		bindUniforms(cmdb, 0, 10, binning.m_clusteredShadingUniformsToken);
 
 		const Vec4 pc(2.0f / Vec2(m_r->getInternalResolution()), 0.0f, 0.0f);
 		cmdb->setPushConstants(&pc, sizeof(pc));
@@ -324,13 +326,10 @@ void LightShading::populateRenderGraph(RenderingContext& ctx)
 	pass.newDependency(
 		RenderPassDependency(ctx.m_clusteredShading.m_clustersBufferHandle, BufferUsageBit::STORAGE_FRAGMENT_READ));
 
-	// Refl & indirect
-	pass.newDependency(RenderPassDependency(m_r->getSsr().getRt(), readUsage));
-	pass.newDependency(RenderPassDependency(m_r->getProbeReflections().getReflectionRt(), readUsage));
-
 	// Apply indirect
 	pass.newDependency(RenderPassDependency(m_r->getIndirectDiffuse().getRt(), readUsage));
 	pass.newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), readUsage));
+	pass.newDependency(RenderPassDependency(m_r->getSsr().getRt(), readUsage));
 
 	// Fog
 	pass.newDependency(RenderPassDependency(m_r->getVolumetricFog().getRt(), readUsage));

+ 13 - 7
AnKi/Renderer/Ssr.cpp

@@ -9,6 +9,7 @@
 #include <AnKi/Renderer/DepthDownscale.h>
 #include <AnKi/Renderer/DownscaleBlur.h>
 #include <AnKi/Renderer/RenderQueue.h>
+#include <AnKi/Renderer/ProbeReflections.h>
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Shaders/Include/SsrTypes.h>
 
@@ -38,7 +39,7 @@ Error Ssr::initInternal()
 	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_noiseImage));
 
 	// Create RT
-	m_rtDescr = m_r->create2DRenderTargetDescription(width, height, Format::R16G16B16A16_SFLOAT, "SSR");
+	m_rtDescr = m_r->create2DRenderTargetDescription(width, height, Format::B10G11R11_UFLOAT_PACK32, "SSR");
 	m_rtDescr.bake();
 
 	m_fbDescr.m_colorAttachmentCount = 1;
@@ -93,10 +94,10 @@ void Ssr::populateRenderGraph(RenderingContext& ctx)
 	ppass->newDependency(RenderPassDependency(m_r->getGBuffer().getColorRt(2), readUsage));
 
 	TextureSubresourceInfo hizSubresource;
-	hizSubresource.m_firstMipmap = min(getConfig().getRSsrDepthLod(), m_r->getDepthDownscale().getMipmapCount() - 1);
+	hizSubresource.m_mipmapCount = min(getConfig().getRSsrDepthLod() + 1, m_r->getDepthDownscale().getMipmapCount());
 	ppass->newDependency(RenderPassDependency(m_r->getDepthDownscale().getHiZRt(), readUsage, hizSubresource));
 
-	ppass->newDependency(RenderPassDependency(m_r->getDownscaleBlur().getRt(), readUsage));
+	ppass->newDependency(RenderPassDependency(m_r->getProbeReflections().getReflectionRt(), readUsage));
 
 	ppass->setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
 		run(ctx, rgraphCtx);
@@ -112,8 +113,7 @@ void Ssr::run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
 
 	// Bind uniforms
 	SsrUniforms* unis = allocateAndBindUniforms<SsrUniforms*>(sizeof(SsrUniforms), cmdb, 0, 0);
-	unis->m_depthBufferSize =
-		UVec2(m_r->getInternalResolution().x(), m_r->getInternalResolution().y()) >> (depthLod + 1);
+	unis->m_depthBufferSize = m_r->getInternalResolution() >> (depthLod + 1);
 	unis->m_framebufferSize = UVec2(m_r->getInternalResolution().x(), m_r->getInternalResolution().y()) / 2;
 	unis->m_frameCount = m_r->getFrameCount() & MAX_U32;
 	unis->m_depthMipCount = m_r->getDepthDownscale().getMipmapCount();
@@ -133,7 +133,7 @@ void Ssr::run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
 	rgraphCtx.bindColorTexture(0, 3, m_r->getGBuffer().getColorRt(2));
 
 	TextureSubresourceInfo hizSubresource;
-	hizSubresource.m_firstMipmap = depthLod;
+	hizSubresource.m_mipmapCount = depthLod + 1;
 	rgraphCtx.bindTexture(0, 4, m_r->getDepthDownscale().getHiZRt(), hizSubresource);
 
 	rgraphCtx.bindColorTexture(0, 5, m_r->getDownscaleBlur().getRt());
@@ -141,9 +141,15 @@ void Ssr::run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
 	cmdb->bindSampler(0, 6, m_r->getSamplers().m_trilinearRepeat);
 	cmdb->bindTexture(0, 7, m_noiseImage->getTextureView());
 
+	const ClusteredShadingContext& binning = ctx.m_clusteredShading;
+	bindUniforms(cmdb, 0, 8, binning.m_clusteredShadingUniformsToken);
+	bindUniforms(cmdb, 0, 9, binning.m_reflectionProbesToken);
+	rgraphCtx.bindColorTexture(0, 10, m_r->getProbeReflections().getReflectionRt());
+	bindStorage(cmdb, 0, 11, binning.m_clustersToken);
+
 	if(getConfig().getRPreferCompute())
 	{
-		rgraphCtx.bindImage(0, 8, m_runCtx.m_rt, TextureSubresourceInfo());
+		rgraphCtx.bindImage(0, 12, m_runCtx.m_rt, TextureSubresourceInfo());
 
 		dispatchPPCompute(cmdb, 8, 8, m_r->getInternalResolution().x() / 2, m_r->getInternalResolution().y() / 2);
 	}

+ 0 - 2
AnKi/Shaders/ClusteredShadingCommon.glsl

@@ -45,8 +45,6 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_REFLECTIONS_BIND
 
 layout(set = CLUSTERED_SHADING_SET,
 	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 1) uniform ANKI_RP textureCubeArray u_reflectionsTex;
-layout(set = CLUSTERED_SHADING_SET,
-	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 2) uniform texture2D u_integrationLut;
 #endif
 
 //

+ 1 - 1
AnKi/Shaders/Include/ClusteredShadingTypes.h

@@ -217,7 +217,7 @@ struct ClusteredShadingUniforms
 	/// This are some additive counts used to map a flat index to the index of the specific object.
 	U32 m_objectCountsUpTo[CLUSTER_OBJECT_TYPE_COUNT];
 
-	U32 m_padding;
+	F32 m_reflectionProbesMipCount;
 
 	CommonMatrices m_matrices;
 	CommonMatrices m_previousMatrices;

+ 1 - 1
AnKi/Shaders/LightFunctions.glsl

@@ -438,7 +438,7 @@ Vec3 sampleReflectionVector(Vec3 viewDir, Vec3 normal, F32 roughness, Vec2 unifo
 {
 	const Mat3 tbn = rotationFromDirection(normal);
 	const Mat3 tbnT = transpose(tbn);
-	const Vec3 viewDirTbn = tbnT * (-viewDir);
+	const Vec3 viewDirTbn = tbnT * viewDir;
 
 	Vec3 sampledNormalTbn = sampleGgxVndf(viewDirTbn, roughness, roughness, uniformRandom.x, uniformRandom.y);
 	const Bool perfectReflection = false; // For debugging

+ 12 - 76
AnKi/Shaders/LightShading.ankiprog

@@ -8,7 +8,6 @@
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(TILE_COUNTS, 0u);
 ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 2u);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 3u);
-ANKI_SPECIALIZATION_CONSTANT_U32(IR_MIPMAP_COUNT, 4u);
 
 #pragma anki start vert
 #include <AnKi/Shaders/QuadVert.glsl>
@@ -22,22 +21,20 @@ ANKI_SPECIALIZATION_CONSTANT_U32(IR_MIPMAP_COUNT, 4u);
 #define CLUSTERED_SHADING_SET 0
 #define CLUSTERED_SHADING_UNIFORMS_BINDING 0
 #define CLUSTERED_SHADING_LIGHTS_BINDING 1
-#define CLUSTERED_SHADING_REFLECTIONS_BINDING 4
-#define CLUSTERED_SHADING_CLUSTERS_BINDING 7
+#define CLUSTERED_SHADING_CLUSTERS_BINDING 4
 #include <AnKi/Shaders/ClusteredShadingCommon.glsl>
 
-layout(set = 0, binding = 8) uniform sampler u_nearestAnyClampSampler;
-layout(set = 0, binding = 9) uniform sampler u_trilinearClampSampler;
+layout(set = 0, binding = 5) uniform sampler u_nearestAnyClampSampler;
+layout(set = 0, binding = 6) uniform sampler u_trilinearClampSampler;
 
-layout(set = 0, binding = 10) uniform texture2D u_msRt0;
-layout(set = 0, binding = 11) uniform texture2D u_msRt1;
-layout(set = 0, binding = 12) uniform texture2D u_msRt2;
-layout(set = 0, binding = 13) uniform texture2D u_msDepthRt;
-layout(set = 0, binding = 14) uniform texture2D u_ssrRt;
+layout(set = 0, binding = 7) uniform texture2D u_gbuffer0Tex;
+layout(set = 0, binding = 8) uniform texture2D u_gbuffer1Tex;
+layout(set = 0, binding = 9) uniform texture2D u_gbuffer2Tex;
+layout(set = 0, binding = 10) uniform texture2D u_msDepthRt;
 #if USE_SHADOW_LAYERS
-layout(set = 0, binding = 15) uniform utexture2D u_shadowLayersTex;
+layout(set = 0, binding = 11) uniform utexture2D u_shadowLayersTex;
 #else
-layout(set = 0, binding = 16) uniform texture2D u_resolvedSm;
+layout(set = 0, binding = 12) uniform texture2D u_resolvedSm;
 #endif
 
 layout(location = 0) in Vec2 in_uv;
@@ -76,9 +73,9 @@ void main()
 
 	// Decode GBuffer
 	GbufferInfo gbuffer;
-	unpackGBufferNoVelocity(textureLod(u_msRt0, u_nearestAnyClampSampler, in_uv, 0.0),
-							textureLod(u_msRt1, u_nearestAnyClampSampler, in_uv, 0.0),
-							textureLod(u_msRt2, u_nearestAnyClampSampler, in_uv, 0.0), gbuffer);
+	unpackGBufferNoVelocity(textureLod(u_gbuffer0Tex, u_nearestAnyClampSampler, in_uv, 0.0),
+							textureLod(u_gbuffer1Tex, u_nearestAnyClampSampler, in_uv, 0.0),
+							textureLod(u_gbuffer2Tex, u_nearestAnyClampSampler, in_uv, 0.0), gbuffer);
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, SUBSURFACE_MIN);
 
 	// SM
@@ -169,67 +166,6 @@ void main()
 		out_color += (diffC + specC) * light.m_diffuseColor * (att * spot * max(gbuffer.m_subsurface, lambert));
 	}
 
-	// Indirect specular
-	{
-		// Do the probe read
-		Vec3 specIndirect = Vec3(0.0);
-
-		const Vec3 reflDir = reflect(-viewDir, gbuffer.m_normal);
-		const F32 reflLod = F32(IR_MIPMAP_COUNT - 1u) * gbuffer.m_roughness;
-
-		if(bitCount(cluster.m_reflectionProbesMask) == 1)
-		{
-			// Only one probe, do a fast path without blend weight
-
-			const ReflectionProbe probe = u_reflectionProbes[findLSB2(cluster.m_reflectionProbesMask)];
-
-			// Sample
-			const Vec3 cubeUv = intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
-			const Vec4 cubeArrUv = Vec4(cubeUv, probe.m_cubemapIndex);
-			specIndirect = textureLod(u_reflectionsTex, u_trilinearClampSampler, cubeArrUv, reflLod).rgb;
-		}
-		else
-		{
-			// Zero or more than one probes, do a slow path that blends them together
-
-			F32 totalBlendWeight = EPSILON;
-
-			// Loop probes
-			ANKI_LOOP while(cluster.m_reflectionProbesMask != 0u)
-			{
-				const U32 idx = U32(findLSB2(cluster.m_reflectionProbesMask));
-				cluster.m_reflectionProbesMask &= ~(1u << idx);
-				const ReflectionProbe probe = u_reflectionProbes[idx];
-
-				// Compute blend weight
-				const F32 blendWeight = computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, 0.2);
-				totalBlendWeight += blendWeight;
-
-				// Sample reflections
-				const Vec3 cubeUv =
-					intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
-				const Vec4 cubeArrUv = Vec4(cubeUv, probe.m_cubemapIndex);
-				const Vec3 c = textureLod(u_reflectionsTex, u_trilinearClampSampler, cubeArrUv, reflLod).rgb;
-				specIndirect += c * blendWeight;
-			}
-
-			// Normalize the colors
-			specIndirect /= totalBlendWeight;
-		}
-
-		// Read the SSL result
-		const Vec4 ssr = textureLod(u_ssrRt, u_trilinearClampSampler, in_uv, 0.0);
-
-		// Combine the SSR and probe reflections and write the result
-		const Vec3 finalSpecIndirect = specIndirect * ssr.a + ssr.rgb;
-
-		// Compute env BRDF
-		const F32 NoV = max(0.0, dot(gbuffer.m_normal, viewDir));
-		const Vec3 env = specularDFG(gbuffer.m_f0, gbuffer.m_roughness, u_integrationLut, u_trilinearClampSampler, NoV);
-
-		out_color += finalSpecIndirect * env;
-	}
-
 	out_color = saturateRp(out_color);
 }
 #pragma anki end

+ 42 - 12
AnKi/Shaders/LightShadingApplyIndirect.ankiprog

@@ -14,9 +14,17 @@
 layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
 layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
 layout(set = 0, binding = 2) uniform ANKI_RP texture2D u_quarterDiffuseIndirectTex;
-layout(set = 0, binding = 3) uniform texture2D u_quarterDepthTex;
-layout(set = 0, binding = 4) uniform texture2D u_fullDepthTex;
-layout(set = 0, binding = 5) uniform ANKI_RP texture2D u_gbuffer0Tex;
+layout(set = 0, binding = 3) uniform ANKI_RP texture2D u_quarterSpecularIndirectTex;
+layout(set = 0, binding = 4) uniform texture2D u_quarterDepthTex;
+layout(set = 0, binding = 5) uniform texture2D u_fullDepthTex;
+layout(set = 0, binding = 6) uniform ANKI_RP texture2D u_gbuffer0Tex;
+layout(set = 0, binding = 7) uniform ANKI_RP texture2D u_gbuffer1Tex;
+layout(set = 0, binding = 8) uniform ANKI_RP texture2D u_gbuffer2Tex;
+layout(set = 0, binding = 9) uniform ANKI_RP texture2D u_integrationLut;
+
+#define CLUSTERED_SHADING_SET 0
+#define CLUSTERED_SHADING_UNIFORMS_BINDING 10
+#include <AnKi/Shaders/ClusteredShadingCommon.glsl>
 
 layout(push_constant, std430) uniform b_pc
 {
@@ -29,11 +37,18 @@ layout(location = 0) out ANKI_RP Vec3 out_color;
 
 void main()
 {
+	// GBuffer
+	GbufferInfo gbuffer;
+	unpackGBufferNoVelocity(textureLod(u_gbuffer0Tex, u_nearestAnyClampSampler, in_uv, 0.0),
+							textureLod(u_gbuffer1Tex, u_nearestAnyClampSampler, in_uv, 0.0),
+							textureLod(u_gbuffer2Tex, u_nearestAnyClampSampler, in_uv, 0.0), gbuffer);
+
 	// Reference
 	const F32 depthCenter = textureLod(u_fullDepthTex, u_linearAnyClampSampler, in_uv, 0.0).x;
 
 	// Do a bilateral upscale
-	out_color = Vec3(0.0);
+	ANKI_RP Vec3 diffuse = Vec3(0.0);
+	ANKI_RP Vec3 specular = Vec3(0.0);
 	const F32 radius = 1.0;
 	F32 sumWeight = EPSILON;
 	for(F32 x = -radius; x <= radius; x += 1.0)
@@ -46,17 +61,32 @@ void main()
 			const F32 w = calculateBilateralWeightDepth(depthCenter, depthTap, 1.0);
 			sumWeight += w;
 
-			const ANKI_RP Vec3 colorTap =
+			// Diffuse
+			ANKI_RP Vec3 colorTap =
 				textureLod(u_quarterDiffuseIndirectTex, u_nearestAnyClampSampler, sampleUv, 0.0).xyz;
-			out_color += colorTap * w;
+			diffuse += colorTap * w;
+
+			// Specular
+			colorTap = textureLod(u_quarterSpecularIndirectTex, u_nearestAnyClampSampler, sampleUv, 0.0).xyz;
+			specular += colorTap * w;
 		}
 	}
 
-	// Modulate
-	out_color /= sumWeight;
-	const ANKI_RP F32 metallic = 0.0;
-	const ANKI_RP Vec3 albedo =
-		unpackDiffuseFromGBuffer(textureLod(u_gbuffer0Tex, u_linearAnyClampSampler, in_uv, 0.0), metallic);
-	out_color *= albedo;
+	// Do diffuse
+	diffuse /= sumWeight;
+	diffuse *= gbuffer.m_diffuse;
+
+	// Do specular
+	const Vec2 ndc = UV_TO_NDC(in_uv);
+	const Vec4 viewPos4 = u_clusteredShading.m_matrices.m_invertedProjectionJitter * Vec4(ndc, depthCenter, 1.0);
+	const Vec3 viewPos = viewPos4.xyz / viewPos4.w;
+	const ANKI_RP Vec3 viewDir = normalize(-viewPos);
+	const F32 NoV = max(0.0, dot(gbuffer.m_normal, viewDir));
+	const Vec3 env = specularDFG(gbuffer.m_f0, gbuffer.m_roughness, u_integrationLut, u_linearAnyClampSampler, NoV);
+	specular /= sumWeight;
+	specular *= env;
+
+	// Writeout
+	out_color = saturateRp(diffuse + specular);
 }
 #pragma anki end

+ 6 - 1
AnKi/Shaders/PackFunctions.glsl

@@ -174,6 +174,11 @@ ANKI_RP F32 unpackRoughnessFromGBuffer(ANKI_RP Vec4 rt1)
 	return r;
 }
 
+ANKI_RP Vec3 unpackF0FromGBuffer(ANKI_RP Vec4 rt1)
+{
+	return Vec3(rt1.z);
+}
+
 // Read part of the G-buffer
 void unpackGBufferNoVelocity(ANKI_RP Vec4 rt0, ANKI_RP Vec4 rt1, ANKI_RP Vec4 rt2, out GbufferInfo g)
 {
@@ -182,7 +187,7 @@ void unpackGBufferNoVelocity(ANKI_RP Vec4 rt0, ANKI_RP Vec4 rt1, ANKI_RP Vec4 rt
 
 	g.m_roughness = unpackRoughnessFromGBuffer(rt1);
 	g.m_metallic = rt1.y;
-	g.m_f0 = Vec3(rt1.z);
+	g.m_f0 = unpackF0FromGBuffer(rt1);
 	const ANKI_RP F32 maxEmission = rt1.w * ABSOLUTE_MAX_EMISSION;
 
 	g.m_normal = signedOctDecode(rt2.yzw);

+ 90 - 16
AnKi/Shaders/Ssr.glsl

@@ -27,14 +27,20 @@ layout(set = 0, binding = 6) uniform sampler u_trilinearRepeatSampler;
 layout(set = 0, binding = 7) uniform texture2D u_noiseTex;
 const Vec2 NOISE_TEX_SIZE = Vec2(64.0);
 
+#define CLUSTERED_SHADING_SET 0
+#define CLUSTERED_SHADING_UNIFORMS_BINDING 8
+#define CLUSTERED_SHADING_REFLECTIONS_BINDING 9
+#define CLUSTERED_SHADING_CLUSTERS_BINDING 11
+#include <AnKi/Shaders/ClusteredShadingCommon.glsl>
+
 #if defined(ANKI_COMPUTE_SHADER)
 const UVec2 WORKGROUP_SIZE = UVec2(8, 8);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
 
-layout(set = 0, binding = 8) uniform writeonly image2D u_outImg;
+layout(set = 0, binding = 12) uniform writeonly image2D u_outImg;
 #else
 layout(location = 0) in Vec2 in_uv;
-layout(location = 0) out Vec4 out_color;
+layout(location = 0) out Vec3 out_color;
 #endif
 
 void main()
@@ -66,22 +72,22 @@ void main()
 	const Vec3 viewPos = viewPos4.xyz / viewPos4.w;
 
 	// Compute refl vector
-	const Vec3 viewDir = normalize(viewPos);
+	const Vec3 viewDir = -normalize(viewPos);
 	const Vec3 viewNormal = u_unis.m_normalMat * worldNormal;
 #if STOCHASTIC
-	const Vec3 reflVec = sampleReflectionVector(viewDir, viewNormal, roughness, noise.xy);
+	const Vec3 reflDir = sampleReflectionVector(viewDir, viewNormal, roughness, noise.xy);
 #else
-	const Vec3 reflVec = reflect(viewDir, viewNormal);
+	const Vec3 reflDir = reflect(-viewDir, viewNormal);
 #endif
 
 	// Do the heavy work
 	Vec3 hitPoint;
 	F32 hitAttenuation;
-	const U32 lod = 0u;
+	const U32 lod = 8u; // Use the max LOD for ray marching
 	const U32 step = u_unis.m_firstStepPixels;
 	const F32 stepf = F32(step);
 	const F32 minStepf = stepf / 4.0;
-	raymarchGroundTruth(viewPos, reflVec, uv, depth, u_unis.m_projMat, u_unis.m_maxSteps, u_depthRt,
+	raymarchGroundTruth(viewPos, reflDir, uv, depth, u_unis.m_projMat, u_unis.m_maxSteps, u_depthRt,
 						u_trilinearClampSampler, F32(lod), u_unis.m_depthBufferSize, step,
 						U32((stepf - minStepf) * noise.x + minStepf), hitPoint, hitAttenuation);
 
@@ -93,7 +99,7 @@ void main()
 			u_unis.m_normalMat
 			* unpackNormalFromGBuffer(textureLod(u_gbufferRt2, u_trilinearClampSampler, hitPoint.xy, 0.0));
 		F32 backFaceAttenuation;
-		rejectBackFaces(reflVec, hitNormal, backFaceAttenuation);
+		rejectBackFaces(reflDir, hitNormal, backFaceAttenuation);
 
 		hitAttenuation *= backFaceAttenuation;
 	}
@@ -116,7 +122,7 @@ void main()
 #endif
 
 	// Read the reflection
-	Vec4 outColor;
+	Vec3 ssrColor = Vec3(0.0);
 	ANKI_BRANCH if(hitAttenuation > 0.0)
 	{
 		// Reproject the UV because you are reading the previous frame
@@ -132,19 +138,87 @@ void main()
 #endif
 
 		// Read the light buffer
-		outColor.rgb = textureLod(u_lightBufferRt, u_trilinearClampSampler, hitPoint.xy, lod).rgb;
-		outColor.rgb = clamp(outColor.rgb, 0.0, MAX_F32); // Fix the value just in case
-		outColor.rgb *= hitAttenuation;
-		outColor.a = 1.0 - hitAttenuation;
+		ssrColor = textureLod(u_lightBufferRt, u_trilinearClampSampler, hitPoint.xy, lod).rgb;
+		ssrColor = clamp(ssrColor, 0.0, MAX_F32); // Fix the value just in case
 	}
-	else
+
+	// Read probes
+	Vec3 probeColor = Vec3(0.0);
+	ANKI_BRANCH if(hitAttenuation < 1.0)
 	{
-		outColor = Vec4(0.0, 0.0, 0.0, 1.0);
+#if defined(ANKI_COMPUTE_SHADER)
+		const Vec2 fragCoord = Vec2(gl_GlobalInvocationID.xy) + 0.5;
+#else
+		const Vec2 fragCoord = gl_FragCoord.xy;
+#endif
+
+#if STOCHASTIC
+		const F32 reflLod = 0.0;
+#else
+		const F32 reflLod = (u_clusteredShading.m_reflectionProbesMipCount - 1.0) * roughness;
+#endif
+
+		// Get cluster
+		const Vec2 ndc = UV_TO_NDC(uv);
+		const Vec4 worldPos4 = u_clusteredShading.m_matrices.m_invertedViewProjectionJitter * Vec4(ndc, depth, 1.0);
+		const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
+		Cluster cluster = getClusterFragCoord(Vec3(fragCoord * 2.0, depth));
+
+		// Compute the refl dir in word space this time
+		const ANKI_RP Vec3 viewDir = normalize(u_clusteredShading.m_cameraPosition - worldPos);
+#if STOCHASTIC
+		const Vec3 reflDir = sampleReflectionVector(viewDir, worldNormal, roughness, noise.xy);
+#else
+		const Vec3 reflDir = reflect(-viewDir, worldNormal);
+#endif
+
+		if(bitCount(cluster.m_reflectionProbesMask) == 1)
+		{
+			// Only one probe, do a fast path without blend weight
+
+			const ReflectionProbe probe = u_reflectionProbes[findLSB2(cluster.m_reflectionProbesMask)];
+
+			// Sample
+			const Vec3 cubeUv = intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
+			const Vec4 cubeArrUv = Vec4(cubeUv, probe.m_cubemapIndex);
+			probeColor = textureLod(u_reflectionsTex, u_trilinearClampSampler, cubeArrUv, reflLod).rgb;
+		}
+		else
+		{
+			// Zero or more than one probes, do a slow path that blends them together
+
+			F32 totalBlendWeight = EPSILON;
+
+			// Loop probes
+			ANKI_LOOP while(cluster.m_reflectionProbesMask != 0u)
+			{
+				const U32 idx = U32(findLSB2(cluster.m_reflectionProbesMask));
+				cluster.m_reflectionProbesMask &= ~(1u << idx);
+				const ReflectionProbe probe = u_reflectionProbes[idx];
+
+				// Compute blend weight
+				const F32 blendWeight = computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, 0.2);
+				totalBlendWeight += blendWeight;
+
+				// Sample reflections
+				const Vec3 cubeUv =
+					intersectProbe(worldPos, reflDir, probe.m_aabbMin, probe.m_aabbMax, probe.m_position);
+				const Vec4 cubeArrUv = Vec4(cubeUv, probe.m_cubemapIndex);
+				const Vec3 c = textureLod(u_reflectionsTex, u_trilinearClampSampler, cubeArrUv, reflLod).rgb;
+				probeColor += c * blendWeight;
+			}
+
+			// Normalize the colors
+			probeColor /= totalBlendWeight;
+		}
 	}
 
+	// Compute final value
+	const Vec3 outColor = mix(probeColor, ssrColor, hitAttenuation);
+
 	// Store
 #if defined(ANKI_COMPUTE_SHADER)
-	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), outColor);
+	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), Vec4(outColor, 0.0));
 #else
 	out_color = outColor;
 #endif