Просмотр исходного кода

Fix bugs when there are multiple probes

Panagiotis Christopoulos Charitos 6 лет назад
Родитель
Сommit
de51e0dc02

+ 12 - 7
shaders/ClusteredShadingCommon.glsl

@@ -122,36 +122,41 @@ layout(set = LIGHT_SET, binding = LIGHT_CLUSTERS_BINDING + 1, std430) readonly b
 #endif
 
 // Debugging function
-Vec3 lightHeatmap(U32 firstIndex, U32 maxLights, Bool decals, Bool plights, Bool slights, Bool probes, Bool fogVolumes)
+Vec3 lightHeatmap(U32 firstIndex, U32 maxObjects, U32 typeMask)
 {
 	U32 count = 0;
 	U32 idx;
 
 	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += (plights) ? 1u : 0u;
+		count += ((typeMask & (1u << 0u)) != 0u) ? 1u : 0u;
 	}
 
 	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += (slights) ? 1u : 0u;
+		count += ((typeMask & (1u << 1u)) != 0u) ? 1u : 0u;
 	}
 
 	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += (probes) ? 1u : 0u;
+		count += ((typeMask & (1u << 2u)) != 0u) ? 1u : 0u;
 	}
 
 	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += (decals) ? 1u : 0u;
+		count += ((typeMask & (1u << 3u)) != 0u) ? 1u : 0u;
 	}
 
 	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
 	{
-		count += (fogVolumes) ? 1u : 0u;
+		count += ((typeMask & (1u << 4u)) != 0u) ? 1u : 0u;
 	}
 
-	const F32 factor = min(1.0, F32(count) / F32(maxLights));
+	while((idx = u_lightIndices[firstIndex++]) != MAX_U32)
+	{
+		count += ((typeMask & (1u << 5u)) != 0u) ? 1u : 0u;
+	}
+
+	const F32 factor = min(1.0, F32(count) / F32(maxObjects));
 	return heatmap(factor);
 }

+ 32 - 18
shaders/LightShading.glslp

@@ -84,7 +84,7 @@ void main()
 
 		idxOffset = u_clusters[clusterIdx];
 
-		// out_color = lightHeatmap(idxOffset, 5, false, true, false, false, false); return;
+		// out_color = lightHeatmap(idxOffset, 5, 1u << 3); return;
 	}
 
 	// Decode GBuffer
@@ -227,13 +227,15 @@ void main()
 
 	// Indirect diffuse
 	{
-		Vec3 diffIndirect = Vec3(0.0);
+		Vec3 diffIndirect;
 
-		if(subgroupAll(u_lightIndices[idxOffset] != MAX_U32 && u_lightIndices[idxOffset + 1u] == MAX_U32))
+		const U32 crntProbeIdx = u_lightIndices[idxOffset];
+		if(subgroupAllEqual(crntProbeIdx)
+			&& subgroupAll(crntProbeIdx != MAX_U32 && u_lightIndices[idxOffset + 1u] == MAX_U32))
 		{
-			// Only one probe, do a fast path without blend weight
+			// All sugroups point to the same probe and there is only one probe, do a fast path without blend weight
 
-			GlobalIlluminationProbe probe = u_giProbes[u_lightIndices[idxOffset]];
+			GlobalIlluminationProbe probe = u_giProbes[subgroupBroadcastFirst(crntProbeIdx)]; // It should be uniform
 			idxOffset += 2u;
 
 			// Sample
@@ -245,21 +247,33 @@ void main()
 			// Zero or more than one probes, do a slow path that blends them together
 
 			F32 totalBlendWeight = EPSILON;
+			diffIndirect = Vec3(0.0);
 
-			// Loop probes
-			ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
+			UVec4 execMask = UVec4(0u);
+			ANKI_LOOP while((execMask & gl_SubgroupEqMask) == UVec4(0u))
 			{
-				GlobalIlluminationProbe probe = u_giProbes[idx];
-
-				// Compute blend weight
-				const F32 blendWeight =
-					computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, probe.m_fadeDistance);
-				totalBlendWeight += blendWeight;
-
-				// Sample
-				const Vec3 c = sampleGlobalIllumination(
-					worldPos, gbuffer.m_normal, probe, u_globalIlluminationTextures, u_trilinearClampSampler);
-				diffIndirect += c * blendWeight;
+				U32 uniformIdxOffset = subgroupBroadcastFirst(idxOffset); // Should be uniform
+				const UVec4 laneMask = subgroupBallot(uniformIdxOffset == idxOffset);
+				execMask |= laneMask;
+
+				if(uniformIdxOffset == idxOffset)
+				{
+					// Loop probes
+					ANKI_LOOP while((idx = u_lightIndices[uniformIdxOffset++]) != MAX_U32)
+					{
+						GlobalIlluminationProbe probe = u_giProbes[idx];
+
+						// Compute blend weight
+						const F32 blendWeight =
+							computeProbeBlendWeight(worldPos, probe.m_aabbMin, probe.m_aabbMax, probe.m_fadeDistance);
+						totalBlendWeight += blendWeight;
+
+						// Sample
+						const Vec3 c = sampleGlobalIllumination(
+							worldPos, gbuffer.m_normal, probe, u_globalIlluminationTextures, u_trilinearClampSampler);
+						diffIndirect += c * blendWeight;
+					}
+				}
 			}
 
 			// Normalize

+ 1 - 1
src/anki/core/Config.cpp

@@ -49,7 +49,7 @@ Config::Config()
 	newOption("r.gi.tileResolution", 32);
 	newOption("r.gi.shadowMapResolution", 128);
 	newOption("r.gi.maxCachedProbes", 16);
-	newOption("r.gi.maxVisibleProbes", 4);
+	newOption("r.gi.maxVisibleProbes", 8);
 	newOption("r.gi.firstClipmapLevelCellSize", 1.0);
 	newOption("r.gi.secondClipmapLevelCellSize", 8.0);
 	newOption("r.gi.firstClipmapMaxDistance", 20.0);

+ 1 - 0
src/anki/renderer/ClusterBin.cpp

@@ -803,6 +803,7 @@ void ClusterBin::writeTypedObjectsToGpuBuffers(BinCtx& ctx) const
 			out.m_aabbMax = in.m_aabbMax;
 			out.m_textureIndex = &in - &rqueue.m_giProbes.getFront();
 			out.m_halfTexelSizeU = 1.0f / in.m_cellCounts.x() / 2.0f;
+			out.m_fadeDistance = in.m_fadeDistance;
 		}
 	}
 	else