|
@@ -32,13 +32,19 @@ Technique
|
|
|
// Arbitrary limit, increase if needed
|
|
// Arbitrary limit, increase if needed
|
|
|
#define MAX_LIGHTS 512
|
|
#define MAX_LIGHTS 512
|
|
|
|
|
|
|
|
- SamplerState gGBufferASamp : register(s0);
|
|
|
|
|
- SamplerState gGBufferBSamp : register(s1);
|
|
|
|
|
- SamplerState gDepthBufferSamp : register(s2);
|
|
|
|
|
|
|
+ SamplerState gGBufferASamp : register(s0);
|
|
|
|
|
+ SamplerState gGBufferBSamp : register(s1);
|
|
|
|
|
+ SamplerState gDepthBufferSamp : register(s2);
|
|
|
|
|
|
|
|
- Texture2D gGBufferATex : register(t0);
|
|
|
|
|
- Texture2D gGBufferBTex : register(t1);
|
|
|
|
|
- Texture2D gDepthBufferTex : register(t2);
|
|
|
|
|
|
|
+ #if MSAA_COUNT > 1
|
|
|
|
|
+ Texture2DMS<float4, MSAA_COUNT> gGBufferATex : register(t0);
|
|
|
|
|
+ Texture2DMS<float4, MSAA_COUNT> gGBufferBTex : register(t1);
|
|
|
|
|
+ Texture2DMS<float4, MSAA_COUNT> gDepthBufferTex : register(t2);
|
|
|
|
|
+ #else
|
|
|
|
|
+ Texture2D gGBufferATex : register(t0);
|
|
|
|
|
+ Texture2D gGBufferBTex : register(t1);
|
|
|
|
|
+ Texture2D gDepthBufferTex : register(t2);
|
|
|
|
|
+ #endif
|
|
|
|
|
|
|
|
SurfaceData decodeGBuffer(float4 GBufferAData, float4 GBufferBData, float deviceZ)
|
|
SurfaceData decodeGBuffer(float4 GBufferAData, float4 GBufferBData, float deviceZ)
|
|
|
{
|
|
{
|
|
@@ -52,16 +58,67 @@ Technique
|
|
|
|
|
|
|
|
return output;
|
|
return output;
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ StructuredBuffer<LightData> gLights : register(t3);
|
|
|
|
|
+
|
|
|
|
|
+ cbuffer Params : register(b0)
|
|
|
|
|
+ {
|
|
|
|
|
+ // Offsets at which specific light types begin in gLights buffer
|
|
|
|
|
+ // Assumed directional lights start at 0
|
|
|
|
|
+ // x - offset to point lights, y - offset to spot lights, z - total number of lights
|
|
|
|
|
+ uint3 gLightOffsets;
|
|
|
|
|
+ uint2 gFramebufferSize;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ #if MSAA_COUNT > 1
|
|
|
|
|
+ RWBuffer<float4> gOutput : register(u0);
|
|
|
|
|
+
|
|
|
|
|
+ uint getLinearAddress(uint2 coord, uint sampleIndex)
|
|
|
|
|
+ {
|
|
|
|
|
+ return (coord.y * gFramebufferSize.x + coord.x) * MSAA_COUNT + sampleIndex;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- SurfaceData getGBufferData(float2 uv)
|
|
|
|
|
|
|
+ void writeBufferSample(uint2 coord, uint sampleIndex, float4 color)
|
|
|
{
|
|
{
|
|
|
- float4 GBufferAData = gGBufferATex.SampleLevel(gGBufferASamp, uv, 0);
|
|
|
|
|
- float4 GBufferBData = gGBufferBTex.SampleLevel(gGBufferBSamp, uv, 0);
|
|
|
|
|
- float deviceZ = gDepthBufferTex.SampleLevel(gDepthBufferSamp, uv, 0).r;
|
|
|
|
|
|
|
+ uint idx = getLinearAddress(coord, sampleIndex);
|
|
|
|
|
+ gOutput[idx] = color;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool needsPerSampleShading(SurfaceData samples[MSAA_COUNT])
|
|
|
|
|
+ {
|
|
|
|
|
+ float3 albedo = samples[0].albedo.xyz;
|
|
|
|
|
+ float3 normal = samples[0].worldNormal.xyz;
|
|
|
|
|
+ float depth = samples[0].depth;
|
|
|
|
|
+
|
|
|
|
|
+ [unroll]
|
|
|
|
|
+ for(int i = 1; i < MSAA_COUNT; i++)
|
|
|
|
|
+ {
|
|
|
|
|
+ float3 otherAlbedo = samples[i].albedo.xyz;
|
|
|
|
|
+ float3 otherNormal = samples[i].worldNormal.xyz;
|
|
|
|
|
+ float otherDepth = samples[i].depth;
|
|
|
|
|
+
|
|
|
|
|
+ [branch]
|
|
|
|
|
+ if(abs(depth - otherDepth) > 0.1f || abs(dot(abs(normal - otherNormal), float3(1, 1, 1))) > 0.1f || abs(dot(albedo - otherAlbedo, float3(1, 1, 1))) > 0.1f)
|
|
|
|
|
+ {
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ SurfaceData getGBufferData(uint2 pixelPos, uint sampleIndex)
|
|
|
|
|
+ {
|
|
|
|
|
+ float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0), sampleIndex);
|
|
|
|
|
+ float4 GBufferBData = gGBufferBTex.Load(int3(pixelPos, 0), sampleIndex);
|
|
|
|
|
+ float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0), sampleIndex).r;
|
|
|
|
|
|
|
|
return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
|
|
return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ #else
|
|
|
|
|
+ RWTexture2D<float4> gOutput : register(u0);
|
|
|
|
|
+
|
|
|
SurfaceData getGBufferData(uint2 pixelPos)
|
|
SurfaceData getGBufferData(uint2 pixelPos)
|
|
|
{
|
|
{
|
|
|
float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0));
|
|
float4 GBufferAData = gGBufferATex.Load(int3(pixelPos, 0));
|
|
@@ -69,20 +126,9 @@ Technique
|
|
|
float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
|
|
float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
|
|
|
|
|
|
|
|
return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
|
|
return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- StructuredBuffer<LightData> gLights : register(t3);
|
|
|
|
|
-
|
|
|
|
|
- RWTexture2D<float4> gOutput : register(u0);
|
|
|
|
|
-
|
|
|
|
|
- cbuffer Params : register(b0)
|
|
|
|
|
- {
|
|
|
|
|
- // Offsets at which specific light types begin in gLights buffer
|
|
|
|
|
- // Assumed directional lights start at 0
|
|
|
|
|
- // x - offset to point lights, y - offset to spot lights, z - total number of lights
|
|
|
|
|
- uint3 gLightOffsets;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+ #endif
|
|
|
|
|
+
|
|
|
groupshared uint sTileMinZ;
|
|
groupshared uint sTileMinZ;
|
|
|
groupshared uint sTileMaxZ;
|
|
groupshared uint sTileMaxZ;
|
|
|
|
|
|
|
@@ -90,6 +136,42 @@ Technique
|
|
|
groupshared uint sTotalNumLights;
|
|
groupshared uint sTotalNumLights;
|
|
|
groupshared uint sLightIndices[MAX_LIGHTS];
|
|
groupshared uint sLightIndices[MAX_LIGHTS];
|
|
|
|
|
|
|
|
|
|
+ float4 getLighting(float2 clipSpacePos, SurfaceData surfaceData)
|
|
|
|
|
+ {
|
|
|
|
|
+ // x, y are now in clip space, z, w are in view space
|
|
|
|
|
+ // We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
|
|
|
|
|
+ // z, w eliminated (since they are already in view space)
|
|
|
|
|
+ // Note: Multiply by depth should be avoided if using ortographic projection
|
|
|
|
|
+ float4 mixedSpacePos = float4(clipSpacePos * -surfaceData.depth, surfaceData.depth, 1);
|
|
|
|
|
+ float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
|
|
|
|
|
+ float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
|
|
|
|
|
+
|
|
|
|
|
+ float3 lightAccumulator = 0;
|
|
|
|
|
+ float alpha = 0.0f;
|
|
|
|
|
+ if(surfaceData.worldNormal.w > 0.0f)
|
|
|
|
|
+ {
|
|
|
|
|
+ for(uint i = 0; i < gLightOffsets[0]; ++i)
|
|
|
|
|
+ lightAccumulator += getDirLightContibution(surfaceData, gLights[i]);
|
|
|
|
|
+
|
|
|
|
|
+ for (uint i = 0; i < sNumLightsPerType[0]; ++i)
|
|
|
|
|
+ {
|
|
|
|
|
+ uint lightIdx = sLightIndices[i];
|
|
|
|
|
+ lightAccumulator += getPointLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for(uint i = sNumLightsPerType[0]; i < sTotalNumLights; ++i)
|
|
|
|
|
+ {
|
|
|
|
|
+ uint lightIdx = sLightIndices[i];
|
|
|
|
|
+ lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ alpha = 1.0f;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ float3 diffuse = surfaceData.albedo.xyz / PI; // TODO - Add better lighting model later
|
|
|
|
|
+ return float4(lightAccumulator * diffuse, alpha);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
[numthreads(TILE_SIZE, TILE_SIZE, 1)]
|
|
[numthreads(TILE_SIZE, TILE_SIZE, 1)]
|
|
|
void main(
|
|
void main(
|
|
|
uint3 groupId : SV_GroupID,
|
|
uint3 groupId : SV_GroupID,
|
|
@@ -99,8 +181,25 @@ Technique
|
|
|
uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
|
|
uint threadIndex = groupThreadId.y * TILE_SIZE + groupThreadId.x;
|
|
|
uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
|
|
uint2 pixelPos = dispatchThreadId.xy + gViewportRectangle.xy;
|
|
|
|
|
|
|
|
- float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
|
|
|
|
|
- float depth = convertFromDeviceZ(deviceZ);
|
|
|
|
|
|
|
+ // Get data for all samples, and determine per-pixel minimum and maximum depth values
|
|
|
|
|
+ SurfaceData surfaceData[MSAA_COUNT];
|
|
|
|
|
+ uint sampleMinZ = 0x7F7FFFFF;
|
|
|
|
|
+ uint sampleMaxZ = 0;
|
|
|
|
|
+
|
|
|
|
|
+ #if MSAA_COUNT > 1
|
|
|
|
|
+ [unroll]
|
|
|
|
|
+ for(uint i = 0; i < MSAA_COUNT; ++i)
|
|
|
|
|
+ {
|
|
|
|
|
+ surfaceData[i] = getGBufferData(pixelPos, i);
|
|
|
|
|
+
|
|
|
|
|
+ sampleMinZ = min(sampleMinZ, asuint(-surfaceData[i].depth));
|
|
|
|
|
+ sampleMaxZ = max(sampleMaxZ, asuint(-surfaceData[i].depth));
|
|
|
|
|
+ }
|
|
|
|
|
+ #else
|
|
|
|
|
+ surfaceData[0] = getGBufferData(pixelPos);
|
|
|
|
|
+ sampleMinZ = asuint(-surfaceData[0].depth);
|
|
|
|
|
+ sampleMaxZ = asuint(-surfaceData[0].depth);
|
|
|
|
|
+ #endif
|
|
|
|
|
|
|
|
// Set initial values
|
|
// Set initial values
|
|
|
if(threadIndex == 0)
|
|
if(threadIndex == 0)
|
|
@@ -114,10 +213,10 @@ Technique
|
|
|
|
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
|
|
|
|
- // Determine minimum and maximum depth values
|
|
|
|
|
- InterlockedMin(sTileMinZ, asuint(-depth));
|
|
|
|
|
- InterlockedMax(sTileMaxZ, asuint(-depth));
|
|
|
|
|
-
|
|
|
|
|
|
|
+ // Determine minimum and maximum depth values for a tile
|
|
|
|
|
+ InterlockedMin(sTileMinZ, sampleMinZ);
|
|
|
|
|
+ InterlockedMax(sTileMaxZ, sampleMaxZ);
|
|
|
|
|
+
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
|
|
|
|
float minTileZ = asfloat(sTileMinZ);
|
|
float minTileZ = asfloat(sTileMinZ);
|
|
@@ -178,18 +277,6 @@ Technique
|
|
|
frustumPlanes[4] = float4(0.0f, 0.0f, -1.0f, -minTileZ);
|
|
frustumPlanes[4] = float4(0.0f, 0.0f, -1.0f, -minTileZ);
|
|
|
frustumPlanes[5] = float4(0.0f, 0.0f, 1.0f, maxTileZ);
|
|
frustumPlanes[5] = float4(0.0f, 0.0f, 1.0f, maxTileZ);
|
|
|
|
|
|
|
|
- // Generate world position
|
|
|
|
|
- float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
|
|
|
|
|
- float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
|
|
|
|
|
-
|
|
|
|
|
- // x, y are now in clip space, z, w are in view space
|
|
|
|
|
- // We multiply them by a special inverse view-projection matrix, that had the projection entries that effect
|
|
|
|
|
- // z, w eliminated (since they are already in view space)
|
|
|
|
|
- // Note: Multiply by depth should be avoided if using ortographic projection
|
|
|
|
|
- float4 mixedSpacePos = float4(clipSpacePos.xy * -depth, depth, 1);
|
|
|
|
|
- float4 worldPosition4D = mul(gMatScreenToWorld, mixedSpacePos);
|
|
|
|
|
- float3 worldPosition = worldPosition4D.xyz / worldPosition4D.w;
|
|
|
|
|
-
|
|
|
|
|
// Find radial & spot lights overlapping the tile
|
|
// Find radial & spot lights overlapping the tile
|
|
|
for(uint type = 0; type < 2; type++)
|
|
for(uint type = 0; type < 2; type++)
|
|
|
{
|
|
{
|
|
@@ -242,37 +329,35 @@ Technique
|
|
|
|
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
|
|
|
|
- // Note: This unnecessarily samples depth again
|
|
|
|
|
- SurfaceData surfaceData = getGBufferData(pixelPos);
|
|
|
|
|
-
|
|
|
|
|
- float3 lightAccumulator = 0;
|
|
|
|
|
- float alpha = 0.0f;
|
|
|
|
|
- if(surfaceData.worldNormal.w > 0.0f)
|
|
|
|
|
- {
|
|
|
|
|
- for(uint i = 0; i < gLightOffsets[0]; ++i)
|
|
|
|
|
- lightAccumulator += getDirLightContibution(surfaceData, gLights[i]);
|
|
|
|
|
-
|
|
|
|
|
- for (uint i = 0; i < sNumLightsPerType[0]; ++i)
|
|
|
|
|
- {
|
|
|
|
|
- uint lightIdx = sLightIndices[i];
|
|
|
|
|
- lightAccumulator += getPointLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- for(uint i = sNumLightsPerType[0]; i < sTotalNumLights; ++i)
|
|
|
|
|
- {
|
|
|
|
|
- uint lightIdx = sLightIndices[i];
|
|
|
|
|
- lightAccumulator += getSpotLightContribution(worldPosition, surfaceData, gLights[lightIdx]);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- alpha = 1.0f;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- float3 diffuse = surfaceData.albedo.xyz / PI; // TODO - Add better lighting model later
|
|
|
|
|
|
|
+ // Generate world position
|
|
|
|
|
+ float2 screenUv = ((float2)(gViewportRectangle.xy + pixelPos) + 0.5f) / (float2)gViewportRectangle.zw;
|
|
|
|
|
+ float2 clipSpacePos = (screenUv - gClipToUVScaleOffset.zw) / gClipToUVScaleOffset.xy;
|
|
|
|
|
+
|
|
|
uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
|
|
uint2 viewportMax = gViewportRectangle.xy + gViewportRectangle.zw;
|
|
|
|
|
|
|
|
// Ignore pixels out of valid range
|
|
// Ignore pixels out of valid range
|
|
|
- if (all(dispatchThreadId.xy < viewportMax))
|
|
|
|
|
- gOutput[pixelPos] = float4(gOutput[pixelPos].xyz + diffuse * lightAccumulator, alpha);
|
|
|
|
|
|
|
+ if (all(dispatchThreadId.xy < viewportMax))
|
|
|
|
|
+ {
|
|
|
|
|
+ #if MSAA_COUNT > 1
|
|
|
|
|
+ float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
|
|
|
|
|
+ writeBufferSample(pixelPos, 0, lighting);
|
|
|
|
|
+
|
|
|
|
|
+ bool needsPerSampleShading = needsPerSampleShading(pixelPos);
|
|
|
|
|
+ if(needsPerSampleShading)
|
|
|
|
|
+ {
|
|
|
|
|
+ [unroll]
|
|
|
|
|
+ for(uint i = 1; i < MSAA_COUNT; ++i)
|
|
|
|
|
+ {
|
|
|
|
|
+ lighting = getLighting(clipSpacePos.xy, surfaceData[i]);
|
|
|
|
|
+ writeBufferSample(pixelPos, i, lighting);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ #else
|
|
|
|
|
+ float4 lighting = getLighting(clipSpacePos.xy, surfaceData[0]);
|
|
|
|
|
+ gOutput[pixelPos] = float4(gOutput[pixelPos].rgb + lighting.rgb, lighting.a);
|
|
|
|
|
+ #endif
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
};
|
|
};
|
|
@@ -311,15 +396,6 @@ Technique
|
|
|
return surfaceData;
|
|
return surfaceData;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- SurfaceData getGBufferData(vec2 uv)
|
|
|
|
|
- {
|
|
|
|
|
- vec4 GBufferAData = textureLod(gGBufferATex, uv, 0);
|
|
|
|
|
- vec4 GBufferBData = textureLod(gGBufferBTex, uv, 0);
|
|
|
|
|
- float deviceZ = textureLod(gDepthBufferTex, uv, 0).r;
|
|
|
|
|
-
|
|
|
|
|
- return decodeGBuffer(GBufferAData, GBufferBData, deviceZ);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
SurfaceData getGBufferData(ivec2 pixelPos)
|
|
SurfaceData getGBufferData(ivec2 pixelPos)
|
|
|
{
|
|
{
|
|
|
vec4 GBufferAData = texelFetch(gGBufferATex, pixelPos, 0);
|
|
vec4 GBufferAData = texelFetch(gGBufferATex, pixelPos, 0);
|