Ver Fonte

HLSL code for tile deferred with MSAA support (WIP)

BearishSun há 8 anos atrás
pai
commit
9e92fbca04

+ 155 - 79
Data/Raw/Engine/Shaders/TiledDeferredLighting.bsl

@@ -32,13 +32,19 @@ Technique
 			// Arbitrary limit, increase if needed
 			// Arbitrary limit, increase if needed
             #define MAX_LIGHTS 512
             #define MAX_LIGHTS 512
 
 
-			SamplerState 	gGBufferASamp : register(s0);
-			SamplerState 	gGBufferBSamp : register(s1);
-			SamplerState 	gDepthBufferSamp : register(s2);
+			SamplerState gGBufferASamp : register(s0);
+			SamplerState gGBufferBSamp : register(s1);
+			SamplerState gDepthBufferSamp : register(s2);
 	
 	
-			Texture2D 		gGBufferATex : register(t0);
-			Texture2D		gGBufferBTex : register(t1);
-			Texture2D 		gDepthBufferTex : register(t2);
+			#if MSAA_COUNT > 1
+			Texture2DMS<float4, MSAA_COUNT> gGBufferATex : register(t0);
+			Texture2DMS<float4, MSAA_COUNT>	gGBufferBTex : register(t1);
+			Texture2DMS<float4, MSAA_COUNT> gDepthBufferTex : register(t2);
+			#else
+			Texture2D gGBufferATex : register(t0);
+			Texture2D gGBufferBTex : register(t1);
+			Texture2D gDepthBufferTex : register(t2);
+			#endif
 			
 			
 			SurfaceData decodeGBuffer(float4 GBufferAData, float4 GBufferBData, float deviceZ)
 			SurfaceData decodeGBuffer(float4 GBufferAData, float4 GBufferBData, float deviceZ)
 			{
 			{
@@ -52,16 +58,67 @@ Technique
 				
 				
 				return output;
 				return output;
 			}			
 			}			
+						
+			StructuredBuffer<LightData> gLights : register(t3);		
+		
+			cbuffer Params : register(b0)
+			{
+				// Offsets at which specific light types begin in gLights buffer
+				// Assumed directional lights start at 0
+				// x - offset to point lights, y - offset to spot lights, z - total number of lights
+				uint3 gLightOffsets;
+				uint2 gFramebufferSize;
+			}
+		
+			#if MSAA_COUNT > 1
+			RWBuffer<float4> gOutput : register(u0);
+			
+			uint getLinearAddress(uint2 coord, uint sampleIndex)
+			{
+				return (coord.y * gFramebufferSize.x + coord.x) * MSAA_COUNT + sampleIndex;
+			}
 			
 			
-			SurfaceData getGBufferData(float2 uv)
+			void writeBufferSample(uint2 coord, uint sampleIndex, float4 color)
 			{
 			{
-				float4 GBufferAData = gGBufferATex.SampleLevel(gGBufferASamp, uv, 0);
-				float4 GBufferBData = gGBufferBTex.SampleLevel(gGBufferBSamp, uv, 0);
-				float deviceZ = gDepthBufferTex.SampleLevel(gDepthBufferSamp, uv, 0).r;
+				uint idx = getLinearAddress(coord, sampleIndex);
+				gOutput[idx] = color;
+			}
+			
+			bool needsPerSampleShading(SurfaceData samples[MSAA_COUNT])
+			{
+				float3 albedo = samples[0].albedo.xyz;
+				float3 normal = samples[0].worldNormal.xyz;
+				float depth = samples[0].depth;
+
+				[unroll]
+				for(int i = 1; i < MSAA_COUNT; i++)
+				{
+					float3 otherAlbedo = samples[i].albedo.xyz;
+					float3 otherNormal = samples[i].worldNormal.xyz;
+					float otherDepth = samples[i].depth;
+
+					[branch]
+					if(abs(depth - otherDepth) > 0.1f || abs(dot(abs(normal - otherNormal), float3(1, 1, 1))) > 0.1f || abs(dot(albedo - otherAlbedo, float3(1, 1, 1))) > 0.1f)
+					{
+						return true;
+					}
+				}
+				
+				return false;
+			}
+			
+			SurfaceData getGBufferData(uint2 pixelPos, uint sampleIndex)
+			{
+				float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0), sampleIndex);
+				float4 GBufferBData = gGBufferBTex.Load(int3(pixelPos, 0), sampleIndex);
+				float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0), sampleIndex).r;
 				
 				
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 			}
 			}
 			
 			
+			#else
+			RWTexture2D<float4>	gOutput : register(u0);
+			
 			SurfaceData getGBufferData(uint2 pixelPos)
 			SurfaceData getGBufferData(uint2 pixelPos)
 			{
 			{
 				float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0));
 				float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0));
@@ -69,20 +126,9 @@ Technique
 				float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
 				float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
 				
 				
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
 				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
-			}	
-			
-			StructuredBuffer<LightData> gLights : register(t3);		
-		
-			RWTexture2D<float4>	gOutput : register(u0);
-		
-			cbuffer Params : register(b0)
-			{
-				// Offsets at which specific light types begin in gLights buffer
-				// Assumed directional lights start at 0
-				// x - offset to point lights, y - offset to spot lights, z - total number of lights
-				uint3 gLightOffsets;
-			}
-			
+			}			
+			#endif
+						
 			groupshared uint sTileMinZ;
 			groupshared uint sTileMinZ;
 			groupshared uint sTileMaxZ;
 			groupshared uint sTileMaxZ;
 
 
@@ -90,6 +136,42 @@ Technique
 			groupshared uint sTotalNumLights;
 			groupshared uint sTotalNumLights;
             groupshared uint sLightIndices[MAX_LIGHTS];
             groupshared uint sLightIndices[MAX_LIGHTS];
 
 
+			float4 getLighting(float2 clipSpacePos, SurfaceData surfaceData)
+			{
+				// x, y are now in clip space, z, w are in view space
+				// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
+				// z, w eliminated (since they are already in view space)
+				// Note: Multiply by depth should be avoided if using ortographic projection
+				float4 mixedSpacePos = float4(clipSpacePos * -surfaceData.depth, surfaceData.depth, 1);
+				float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
+				float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
+				
+				float3 lightAccumulator = 0;
+				float alpha = 0.0f;
+				if(surfaceData.worldNormal.w > 0.0f)
+				{
+					for(uint i = 0; i < gLightOffsets[0]; ++i)
+						lightAccumulator += getDirLightContibution(surfaceData, gLights[i]);
+					
+                    for (uint i = 0; i < sNumLightsPerType[0]; ++i)
+                    {
+                        uint lightIdx = sLightIndices[i];
+                        lightAccumulator += getPointLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
+                    }
+
+					for(uint i = sNumLightsPerType[0]; i < sTotalNumLights; ++i)
+                    {
+                        uint lightIdx = sLightIndices[i];
+                        lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
+                    }
+
+					alpha = 1.0f;
+				}
+				
+				float3 diffuse = surfaceData.albedo.xyz / PI; // TODO - Add better lighting model later
+				return float4(lightAccumulator * diffuse, alpha);
+			}			
+			
 			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
 			[numthreads(TILE_SIZE, TILE_SIZE, 1)]
 			void main(
 			void main(
 				uint3 groupId : SV_GroupID,
 				uint3 groupId : SV_GroupID,
@@ -99,8 +181,25 @@ Technique
 				uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
 				uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
 				uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
 				uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
 				
 				
-				float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
-				float depth = convertFromDeviceZ(deviceZ);
+				// Get data for all samples, and determine per-pixel minimum and maximum depth values
+				SurfaceData surfaceData[MSAA_COUNT];
+				uint sampleMinZ = 0x7F7FFFFF;
+				uint sampleMaxZ = 0;
+
+				#if MSAA_COUNT > 1
+				[unroll]
+				for(uint i = 0; i < MSAA_COUNT; ++i)
+				{
+					surfaceData[i] = getGBufferData(pixelPos, i);
+					
+					sampleMinZ = min(sampleMinZ, asuint(-surfaceData[i].depth));
+					sampleMaxZ = max(sampleMaxZ, asuint(-surfaceData[i].depth));
+				}
+				#else
+				surfaceData[0] = getGBufferData(pixelPos);
+				sampleMinZ = asuint(-surfaceData[0].depth);
+				sampleMaxZ = asuint(-surfaceData[0].depth);
+				#endif
 
 
 				// Set initial values
 				// Set initial values
 				if(threadIndex == 0)
 				if(threadIndex == 0)
@@ -114,10 +213,10 @@ Technique
 				
 				
 				GroupMemoryBarrierWithGroupSync();
 				GroupMemoryBarrierWithGroupSync();
 				
 				
-				// Determine minimum and maximum depth values
-				InterlockedMin(sTileMinZ, asuint(-depth));
-				InterlockedMax(sTileMaxZ, asuint(-depth));
-
+				// Determine minimum and maximum depth values for a tile			
+				InterlockedMin(sTileMinZ, sampleMinZ);
+				InterlockedMax(sTileMaxZ, sampleMaxZ);
+				
 				GroupMemoryBarrierWithGroupSync();
 				GroupMemoryBarrierWithGroupSync();
 				
 				
 			    float minTileZ = asfloat(sTileMinZ);
 			    float minTileZ = asfloat(sTileMinZ);
@@ -178,18 +277,6 @@ Technique
 				frustumPlanes[4] = float4(0.0f, 0.0f, -1.0f, -minTileZ); 
 				frustumPlanes[4] = float4(0.0f, 0.0f, -1.0f, -minTileZ); 
 				frustumPlanes[5] = float4(0.0f, 0.0f, 1.0f, maxTileZ);
 				frustumPlanes[5] = float4(0.0f, 0.0f, 1.0f, maxTileZ);
 				
 				
-				// Generate world position
-				float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
-				float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
-			
-				// x, y are now in clip space, z, w are in view space
-				// We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
-				// z, w eliminated (since they are already in view space)
-				// Note: Multiply by depth should be avoided if using ortographic projection
-				float4 mixedSpacePos = float4(clipSpacePos.xy * -depth, depth, 1);
-				float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
-				float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
-				
                 // Find radial & spot lights overlapping the tile
                 // Find radial & spot lights overlapping the tile
 				for(uint type = 0; type < 2; type++)
 				for(uint type = 0; type < 2; type++)
 				{
 				{
@@ -242,37 +329,35 @@ Technique
 
 
                 GroupMemoryBarrierWithGroupSync();
                 GroupMemoryBarrierWithGroupSync();
 
 
-				// Note: This unnecessarily samples depth again
-				SurfaceData surfaceData = getGBufferData(pixelPos);
-				
-				float3 lightAccumulator = 0;
-				float alpha = 0.0f;
-				if(surfaceData.worldNormal.w > 0.0f)
-				{
-					for(uint i = 0; i < gLightOffsets[0]; ++i)
-						lightAccumulator += getDirLightContibution(surfaceData, gLights[i]);
-					
-                    for (uint i = 0; i < sNumLightsPerType[0]; ++i)
-                    {
-                        uint lightIdx = sLightIndices[i];
-                        lightAccumulator += getPointLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
-                    }
-
-					for(uint i = sNumLightsPerType[0]; i < sTotalNumLights; ++i)
-                    {
-                        uint lightIdx = sLightIndices[i];
-                        lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
-                    }
-
-					alpha = 1.0f;
-				}
-				
-				float3 diffuse = surfaceData.albedo.xyz / PI; // TODO - Add better lighting model later
+				// Generate world position
+				float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
+				float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
+			
 				uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
 				uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
 
 
 				// Ignore pixels out of valid range
 				// Ignore pixels out of valid range
-				if (all(dispatchThreadId.xy < viewportMax)) 
-					gOutput[pixelPos] = float4(gOutput[pixelPos].xyz + diffuse * lightAccumulator, alpha);
+				if (all(dispatchThreadId.xy < viewportMax))
+				{
+					#if MSAA_COUNT > 1
+					float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
+					writeBufferSample(pixelPos, 0, lighting);
+
+					bool needsPerSampleShading = needsPerSampleShading(pixelPos);
+					if(needsPerSampleShading)
+					{
+						[unroll]
+						for(uint i = 1; i < MSAA_COUNT; ++i)
+						{
+							lighting = getLighting(clipSpacePos.xy, surfaceData[i]);
+							writeBufferSample(pixelPos, i, lighting);
+						}
+					}
+					
+					#else
+					float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
+					gOutput[pixelPos] = float4(gOutput[pixelPos].rgb + lighting.rgb, lighting.a);
+					#endif
+				}
 			}
 			}
 		};
 		};
 	};
 	};
@@ -311,15 +396,6 @@ Technique
 				return surfaceData;
 				return surfaceData;
 			}			
 			}			
 			
 			
-			SurfaceData getGBufferData(vec2 uv)
-			{
-				vec4 GBufferAData = textureLod(gGBufferATex, uv, 0);
-				vec4 GBufferBData = textureLod(gGBufferBTex, uv, 0);
-				float deviceZ = textureLod(gDepthBufferTex, uv, 0).r;
-				
-				return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
-			}	
-			
 			SurfaceData getGBufferData(ivec2 pixelPos)
 			SurfaceData getGBufferData(ivec2 pixelPos)
 			{
 			{
 				vec4 GBufferAData = texelFetch(gGBufferATex, pixelPos, 0);
 				vec4 GBufferAData = texelFetch(gGBufferATex, pixelPos, 0);

+ 1 - 0
Source/RenderBeast/Source/BsLightRendering.cpp

@@ -111,6 +111,7 @@ namespace bs { namespace ct
 	void TiledDeferredLightingMat::_initDefines(ShaderDefines& defines)
 	void TiledDeferredLightingMat::_initDefines(ShaderDefines& defines)
 	{
 	{
 		defines.set("TILE_SIZE", TILE_SIZE);
 		defines.set("TILE_SIZE", TILE_SIZE);
+		defines.set("MSAA_COUNT", 1);
 	}
 	}
 
 
 	void TiledDeferredLightingMat::execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera)
 	void TiledDeferredLightingMat::execute(const SPtr<RenderTargets>& gbuffer, const SPtr<GpuParamBlockBuffer>& perCamera)