Browse Source

Move SM resolve to the new clusters

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
1a2af2ae48

+ 1 - 1
AnKi/Renderer/LightShading.cpp

@@ -57,7 +57,7 @@ Error LightShading::initLightShading(const ConfigSet& config)
 	ANKI_CHECK(getResourceManager().loadResource("Shaders/LightShading.ankiprog", m_lightShading.m_prog));
 
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_lightShading.m_prog);
-	variantInitInfo.addConstant("TILE_COUNT", m_r->getTileCounts());
+	variantInitInfo.addConstant("TILE_COUNTS", m_r->getTileCounts());
 	variantInitInfo.addConstant("Z_SPLIT_COUNT", m_r->getZSplitCount());
 	variantInitInfo.addConstant("TILE_SIZE", m_r->getTileSize());
 	variantInitInfo.addConstant("IR_MIPMAP_COUNT", U32(m_r->getProbeReflections().getReflectionTextureMipmapCount()));

+ 13 - 17
AnKi/Renderer/ShadowmapsResolve.cpp

@@ -35,14 +35,14 @@ Error ShadowmapsResolve::initInternal(const ConfigSet& cfg)
 	height = min(m_r->getHeight(), getAlignedRoundUp(4, height));
 	ANKI_R_LOGI("Initializing shadow resolve pass. Size %ux%u", width, height);
 
-	m_rtDescr = m_r->create2DRenderTargetDescription(width, height, Format::R8G8B8A8_UNORM, "SM_resolve");
+	m_rtDescr = m_r->create2DRenderTargetDescription(width, height, Format::R8G8B8A8_UNORM, "SM resolve");
 	m_rtDescr.bake();
 
 	ANKI_CHECK(getResourceManager().loadResource("Shaders/ShadowmapsResolve.ankiprog", m_prog));
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
-	variantInitInfo.addConstant("CLUSTER_COUNT_X", U32(m_r->getClusterCount()[0]));
-	variantInitInfo.addConstant("CLUSTER_COUNT_Y", U32(m_r->getClusterCount()[1]));
 	variantInitInfo.addConstant("FB_SIZE", UVec2(width, height));
+	variantInitInfo.addConstant("TILE_COUNTS", m_r->getTileCounts());
+	variantInitInfo.addConstant("Z_SPLIT_COUNT", m_r->getZSplitCount());
 	const ShaderProgramResourceVariant* variant;
 	m_prog->getOrCreateVariant(variantInitInfo, variant);
 	m_grProg = variant->getProgram();
@@ -56,7 +56,7 @@ void ShadowmapsResolve::populateRenderGraph(RenderingContext& ctx)
 	m_runCtx.m_ctx = &ctx;
 	m_runCtx.m_rt = rgraph.newRenderTarget(m_rtDescr);
 
-	ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("SM_resolve");
+	ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("SM resolve");
 	rpass.setWork(
 		[](RenderPassWorkContext& rgraphCtx) { static_cast<ShadowmapsResolve*>(rgraphCtx.m_userData)->run(rgraphCtx); },
 		this, 0);
@@ -70,23 +70,19 @@ void ShadowmapsResolve::run(RenderPassWorkContext& rgraphCtx)
 {
 	const RenderingContext& ctx = *m_runCtx.m_ctx;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
-	const ClusterBinOut& rsrc = ctx.m_clusterBinOut;
+	const ClustererGpuObjects& rsrc = ctx.m_clusterShading;
 
 	cmdb->bindShaderProgram(m_grProg);
 
-	rgraphCtx.bindImage(0, 0, m_runCtx.m_rt, TextureSubresourceInfo());
-	cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearClamp);
+	bindUniforms(cmdb, 0, 0, rsrc.m_clusteredShadingUniformsToken);
+	bindUniforms(cmdb, 0, 1, rsrc.m_pointLightsToken);
+	bindUniforms(cmdb, 0, 2, rsrc.m_spotLightsToken);
+	rgraphCtx.bindColorTexture(0, 3, m_r->getShadowMapping().getShadowmapRt());
+	bindStorage(cmdb, 0, 4, rsrc.m_clustersToken);
 
-	rgraphCtx.bindTexture(0, 2, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
-
-	bindUniforms(cmdb, 0, 3, ctx.m_lightShadingUniformsToken);
-
-	bindUniforms(cmdb, 0, 4, rsrc.m_pointLightsToken);
-	bindUniforms(cmdb, 0, 5, rsrc.m_spotLightsToken);
-	rgraphCtx.bindColorTexture(0, 6, m_r->getShadowMapping().getShadowmapRt());
-
-	bindStorage(cmdb, 0, 7, rsrc.m_clustersToken);
-	bindStorage(cmdb, 0, 8, rsrc.m_indicesToken);
+	rgraphCtx.bindImage(0, 5, m_runCtx.m_rt, TextureSubresourceInfo());
+	cmdb->bindSampler(0, 6, m_r->getSamplers().m_trilinearClamp);
+	rgraphCtx.bindTexture(0, 7, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
 
 	dispatchPPCompute(cmdb, 8, 8, m_rtDescr.m_width, m_rtDescr.m_height);
 }

+ 34 - 15
AnKi/Shaders/ClusteredShadingCommon2.glsl

@@ -145,7 +145,6 @@ Vec3 clusterHeatmap(Cluster cluster, U32 objectTypeMask)
 	return heatmap(factor);
 }
 
-#if defined(ANKI_FRAGMENT_SHADER)
 /// Returns the index of the zSplit or linearizeDepth(n, f, depth)*zSplitCount
 /// Simplifying this equation is 1/(a+b/depth) where a=(n-f)/(n*zSplitCount) and b=f/(n*zSplitCount)
 U32 computeZSplitClusterIndex(F32 depth, U32 zSplitCount, F32 a, F32 b)
@@ -155,19 +154,22 @@ U32 computeZSplitClusterIndex(F32 depth, U32 zSplitCount, F32 a, F32 b)
 }
 
 /// Return the tile index.
-U32 computeTileClusterIndex(U32 tileSize, U32 tileCountX)
+U32 computeTileClusterIndexUv(Vec2 uv, UVec2 tileCounts)
 {
-	const UVec2 tileXY = UVec2(gl_FragCoord.xy / F32(tileSize));
-	return tileXY.y * tileCountX + tileXY.x;
+	const UVec2 tileXY = UVec2(uv * Vec2(tileCounts));
+	return tileXY.y * tileCounts.x + tileXY.x;
 }
 
-#	if defined(CLUSTERED_SHADING_CLUSTERS_BINDING)
-/// Get the final cluster after ORing and ANDing the masks.
-Cluster getCluster(F32 depth, U32 tileSize, U32 tileCountX, U32 tileCountY, U32 zSplitCount, F32 a, F32 b)
+/// Return the tile index.
+U32 computeTileClusterIndexFragCoord(Vec2 fragCoord, U32 tileSize, U32 tileCountX)
 {
-	const Cluster tileCluster = u_clusters2[computeTileClusterIndex(tileSize, tileCountX)];
-	const Cluster zCluster = u_clusters2[computeZSplitClusterIndex(depth, zSplitCount, a, b) + tileCountX * tileCountY];
+	const UVec2 tileXY = UVec2(fragCoord / F32(tileSize));
+	return tileXY.y * tileCountX + tileXY.x;
+}
 
+/// Merge the tiles with z splits into a single cluster.
+Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
+{
 	Cluster outCluster;
 	outCluster.m_pointLightsMask = subgroupOr(tileCluster.m_pointLightsMask & zCluster.m_pointLightsMask);
 	outCluster.m_spotLightsMask = subgroupOr(tileCluster.m_spotLightsMask & zCluster.m_spotLightsMask);
@@ -181,11 +183,28 @@ Cluster getCluster(F32 depth, U32 tileSize, U32 tileCountX, U32 tileCountY, U32
 	return outCluster;
 }
 
-Cluster getCluster(F32 depth)
+#if defined(CLUSTERED_SHADING_CLUSTERS_BINDING)
+/// Get the final cluster after ORing and ANDing the masks.
+Cluster getClusterFragCoord(Vec3 fragCoord, U32 tileSize, UVec2 tileCounts, U32 zSplitCount, F32 a, F32 b)
 {
-	return getCluster(depth, u_clusterShading.m_tileSize, u_clusterShading.m_tileCounts.x,
-					  u_clusterShading.m_tileCounts.y, u_clusterShading.m_zSplitCount, u_clusterShading.m_zSplitMagic.x,
-					  u_clusterShading.m_zSplitMagic.y);
+	const Cluster tileCluster = u_clusters2[computeTileClusterIndexFragCoord(fragCoord.xy, tileSize, tileCounts.x)];
+	const Cluster zCluster =
+		u_clusters2[computeZSplitClusterIndex(fragCoord.z, zSplitCount, a, b) + tileCounts.x * tileCounts.y];
+	return mergeClusters(tileCluster, zCluster);
 }
-#	endif
-#endif // defined(ANKI_FRAGMENT_SHADER)
+
+Cluster getClusterFragCoord(Vec3 fragCoord)
+{
+	return getClusterFragCoord(fragCoord, u_clusterShading.m_tileSize, u_clusterShading.m_tileCounts,
+							   u_clusterShading.m_zSplitCount, u_clusterShading.m_zSplitMagic.x,
+							   u_clusterShading.m_zSplitMagic.y);
+}
+
+Cluster getClusterUv(Vec2 uv, F32 depth, UVec2 tileCounts, U32 zSplitCount, F32 a, F32 b)
+{
+	const Cluster tileCluster = u_clusters2[computeTileClusterIndexUv(uv, tileCounts)];
+	const Cluster zCluster =
+		u_clusters2[computeZSplitClusterIndex(depth, zSplitCount, a, b) + tileCounts.x * tileCounts.y];
+	return mergeClusters(tileCluster, zCluster);
+}
+#endif

+ 1 - 1
AnKi/Shaders/ForwardShadingCommonFrag.glsl

@@ -40,7 +40,7 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 	Vec3 outColor = Vec3(0.0);
 
 	// Find the cluster and then the light counts
-	Cluster cluster = getCluster(gl_FragCoord.z);
+	Cluster cluster = getClusterFragCoord(gl_FragCoord.xyz);
 
 	// Point lights
 	ANKI_LOOP while(cluster.m_pointLightsMask != 0u)

+ 3 - 3
AnKi/Shaders/LightShading.ankiprog

@@ -5,7 +5,7 @@
 
 #pragma anki mutator USE_SHADOW_LAYERS 0 1
 
-ANKI_SPECIALIZATION_CONSTANT_UVEC2(TILE_COUNT, 0);
+ANKI_SPECIALIZATION_CONSTANT_UVEC2(TILE_COUNTS, 0);
 ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 2);
 ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 3);
 ANKI_SPECIALIZATION_CONSTANT_U32(IR_MIPMAP_COUNT, 4);
@@ -73,8 +73,8 @@ void main()
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
 	// Get the cluster
-	Cluster cluster = getCluster(depth, TILE_SIZE, TILE_COUNT.x, TILE_COUNT.y, Z_SPLIT_COUNT,
-								 u_clusterShading.m_zSplitMagic.x, u_clusterShading.m_zSplitMagic.y);
+	Cluster cluster = getClusterFragCoord(Vec3(gl_FragCoord.xy, depth), TILE_SIZE, TILE_COUNTS, Z_SPLIT_COUNT,
+										  u_clusterShading.m_zSplitMagic.x, u_clusterShading.m_zSplitMagic.y);
 
 	// out_color = clusterHeatmap(cluster, 1u << CLUSTER_OBJECT_TYPE_POINT_LIGHT); return;
 

+ 36 - 30
AnKi/Shaders/ShadowmapsResolve.ankiprog

@@ -6,21 +6,21 @@
 #pragma anki start comp
 
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(FB_SIZE, 0);
-ANKI_SPECIALIZATION_CONSTANT_U32(CLUSTER_COUNT_X, 2);
-ANKI_SPECIALIZATION_CONSTANT_U32(CLUSTER_COUNT_Y, 3);
+ANKI_SPECIALIZATION_CONSTANT_UVEC2(TILE_COUNTS, 2);
+ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 4);
 
-#define LIGHT_SET 0
-#define LIGHT_COMMON_UNIS_BINDING 3
-#define LIGHT_LIGHTS_BINDING 4
-#define LIGHT_CLUSTERS_BINDING 7
-#include <AnKi/Shaders/ClusteredShadingCommon.glsl>
+#define CLUSTERED_SHADING_SET 0
+#define CLUSTERED_SHADING_UNIFORMS_BINDING 0
+#define CLUSTERED_SHADING_LIGHTS_BINDING 1
+#define CLUSTERED_SHADING_CLUSTERS_BINDING 4
+#include <AnKi/Shaders/ClusteredShadingCommon2.glsl>
 
 const UVec2 WORKGROUP_SIZE = UVec2(8, 8);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
 
-layout(set = 0, binding = 0, rgba8) uniform image2D out_img;
-layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 2) uniform texture2D u_depthRt;
+layout(set = 0, binding = 5, rgba8) writeonly uniform image2D u_outImg;
+layout(set = 0, binding = 6) uniform sampler u_linearAnyClampSampler;
+layout(set = 0, binding = 7) uniform texture2D u_depthRt;
 
 void main()
 {
@@ -33,34 +33,37 @@ void main()
 	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
 	const Vec2 ndc = UV_TO_NDC(uv);
 	const F32 depth = textureLod(u_depthRt, u_linearAnyClampSampler, uv, 0.0).r;
-	const Vec4 worldPos4 = u_invViewProjMat * Vec4(ndc, depth, 1.0);
+	const Vec4 worldPos4 = u_clusterShading.m_matrices.m_invertedViewProjectionJitter * Vec4(ndc, depth, 1.0);
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
 	// Cluster
-	const U32 clusterIdx = computeClusterIndex(u_clustererMagic, uv, worldPos, CLUSTER_COUNT_X, CLUSTER_COUNT_Y);
-	U32 idxOffset = u_clusters[clusterIdx];
+	Cluster cluster = getClusterUv(uv, depth, TILE_COUNTS, Z_SPLIT_COUNT, u_clusterShading.m_zSplitMagic.x,
+								   u_clusterShading.m_zSplitMagic.y);
 
+	// Layers
 	U32 shadowCasterCountPerFragment = 0;
 	const U32 maxShadowCastersPerFragment = 4;
 	F32 shadowFactors[maxShadowCastersPerFragment] = F32[](0.0, 0.0, 0.0, 0.0);
 
 	// Dir light
-	if(u_dirLight.m_active != 0u && u_dirLight.m_cascadeCount > 0)
+	const DirectionalLight2 dirLight = u_clusterShading.m_directionalLight;
+	if(dirLight.m_active != 0u && dirLight.m_cascadeCount > 0)
 	{
-		const Vec4 viewPos4 = u_invProjMat * Vec4(ndc, depth, 1.0);
-		const F32 positiveZViewSpace = -(viewPos4.z / viewPos4.w);
+		const F32 positiveZViewSpace =
+			testPlanePoint(u_clusterShading.m_nearPlaneWSpace.xyz, u_clusterShading.m_nearPlaneWSpace.w, worldPos)
+			+ u_clusterShading.m_near;
 
 		F32 shadowFactor;
-		if(positiveZViewSpace < u_dirLight.m_effectiveShadowDistance)
+		if(positiveZViewSpace < dirLight.m_effectiveShadowDistance)
 		{
 			const U32 cascadeIdx =
-				computeShadowCascadeIndex(positiveZViewSpace, u_dirLight.m_shadowCascadesDistancePower,
-										  u_dirLight.m_effectiveShadowDistance, u_dirLight.m_cascadeCount);
+				computeShadowCascadeIndex(positiveZViewSpace, dirLight.m_shadowCascadesDistancePower,
+										  dirLight.m_effectiveShadowDistance, dirLight.m_cascadeCount);
 
 			shadowFactor =
-				computeShadowFactorDirLight(u_dirLight, cascadeIdx, worldPos, u_shadowTex, u_linearAnyClampSampler);
+				computeShadowFactorDirLight(dirLight, cascadeIdx, worldPos, u_shadowAtlasTex, u_linearAnyClampSampler);
 
-			F32 distanceFadeFactor = saturate(positiveZViewSpace / u_dirLight.m_effectiveShadowDistance);
+			F32 distanceFadeFactor = saturate(positiveZViewSpace / dirLight.m_effectiveShadowDistance);
 			distanceFadeFactor = pow(distanceFadeFactor, 8.0);
 			shadowFactor += distanceFadeFactor;
 		}
@@ -74,36 +77,39 @@ void main()
 	}
 
 	// Point lights
-	U32 idx;
-	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
+	ANKI_LOOP while(cluster.m_pointLightsMask != 0u)
 	{
-		PointLight light = u_pointLights[idx];
+		const I32 idx = findLSB64(cluster.m_pointLightsMask);
+		cluster.m_pointLightsMask &= ~(1ul << U64(idx));
+		const PointLight2 light = u_pointLights2[idx];
 
 		ANKI_BRANCH if(light.m_shadowAtlasTileScale >= 0.0)
 		{
 			const Vec3 frag2Light = light.m_position - worldPos;
 
 			const F32 shadowFactor =
-				computeShadowFactorPointLight(light, frag2Light, u_shadowTex, u_linearAnyClampSampler);
+				computeShadowFactorPointLight(light, frag2Light, u_shadowAtlasTex, u_linearAnyClampSampler);
 			shadowFactors[min(maxShadowCastersPerFragment - 1, shadowCasterCountPerFragment++)] = shadowFactor;
 		}
 	}
 
 	// Spot lights
-	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
+	ANKI_LOOP while(cluster.m_spotLightsMask != 0u)
 	{
-		SpotLight light = u_spotLights[idx];
+		const I32 idx = findLSB64(cluster.m_spotLightsMask);
+		cluster.m_spotLightsMask &= ~(1ul << U64(idx));
+		const SpotLight2 light = u_spotLights2[idx];
 
-		ANKI_BRANCH if(light.m_shadowmapId >= 0.0)
+		ANKI_BRANCH if(light.m_shadowLayer != MAX_U32)
 		{
 			const F32 shadowFactor =
-				computeShadowFactorSpotLight(light, worldPos, u_shadowTex, u_linearAnyClampSampler);
+				computeShadowFactorSpotLight(light, worldPos, u_shadowAtlasTex, u_linearAnyClampSampler);
 			shadowFactors[min(maxShadowCastersPerFragment - 1, shadowCasterCountPerFragment++)] = shadowFactor;
 		}
 	}
 
 	// Store
-	imageStore(out_img, IVec2(gl_GlobalInvocationID.xy),
+	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy),
 			   Vec4(shadowFactors[0], shadowFactors[1], shadowFactors[2], shadowFactors[3]));
 }