#include "$ENGINE$\PPBase.bslinc" #include "$ENGINE$\PerCameraData.bslinc" technique PPSSAO { mixin PPBase; mixin PerCameraData; code { [internal] cbuffer Input { float gSampleRadius; float gWorldSpaceRadiusMask; float2 gTanHalfFOV; // x - horz FOV, y - vert FOV float2 gRandomTileScale; float gCotHalfFOV; float gBias; float2 gDownsampledPixelSize; float2 gFadeMultiplyAdd; float gPower; float gIntensity; } SamplerState gInputSamp; Texture2D gDepthTex; Texture2D gNormalsTex; Texture2D gDownsampledAO; Texture2D gSetupAO; SamplerState gRandomSamp; Texture2D gRandomTex; #if QUALITY < 3 #define SAMPLE_STEPS 1 #else #define SAMPLE_STEPS 3 #endif #if QUALITY < 4 #define SAMPLE_SET 0 #else #define SAMPLE_SET 1 #endif // Points within a disc, at equally separated angles from 0 to 2PI. // Each point is also placed further away from the disc center, up to unit disc radius. // f[x_, s_] := {((x + 1)/(s + 1))*Cos[(x/s)*2 Pi], (x + 1)/(s + 1)*Sin[(x/s)*2 Pi]} #if SAMPLE_SET == 0 #define SAMPLE_COUNT 3 static const float2 SAMPLES[3] = { float2( 0.250f, 0.000f), float2(-0.250f, 0.433f), float2(-0.375f, -0.649f) }; #else #define SAMPLE_COUNT 6 static const float2 SAMPLES[6] = { float2( 0.142f, 0.000f), float2( 0.142f, 0.247f), float2(-0.214f, 0.371f), float2(-0.571f, 0.000f), float2(-0.357f, -0.618f), float2( 0.428f, -0.742f) }; #endif float2 ndcToDepthUV(float2 ndc) { return NDCToUV(ndc); } float3 getViewSpacePos(float2 ndc, float depth) { float2 clipSpace = ndc * -depth; // Use the tan(FOV/2) & aspect to move from clip to view space (basically just scaling). // This is the equivalent of multiplying by mixedToView matrix that's used in most // depth -> world space calculations, but if we make some assumptions we can avoid the // matrix multiply and get the same result. We can also avoid division by .w since we know // the depth is in view space and the mixedToView matrix wouldn't affect it. // The only entries that effect the coordinate are 0,0 and 1,1 entries in the matrix // (if the matrix is symmetric, which we assume is true), which are just the cotangent // of the half of the two aspect ratios. return float3(clipSpace * gTanHalfFOV, depth); } float getUpsampledAO(float2 uv, float depth, float3 normal) { float2 uvs[9]; uvs[0] = uv + float2(-1, -1) * gDownsampledPixelSize; uvs[1] = uv + float2( 0, -1) * gDownsampledPixelSize; uvs[2] = uv + float2( 1, -1) * gDownsampledPixelSize; uvs[3] = uv + float2(-1, 0) * gDownsampledPixelSize; uvs[4] = uv + float2( 0, 0) * gDownsampledPixelSize; uvs[5] = uv + float2( 1, 0) * gDownsampledPixelSize; uvs[6] = uv + float2(-1, 1) * gDownsampledPixelSize; uvs[7] = uv + float2( 0, 1) * gDownsampledPixelSize; uvs[8] = uv + float2( 1, 1) * gDownsampledPixelSize; float weightedSum = 0.00001f; float weightSum = 0.00001f; [unroll] for(int i = 0; i < 9; ++i) { // Get AO from previous step (half-resolution buffer) float sampleAO = gDownsampledAO.Sample(gInputSamp, uvs[i]).r; // Get filtered normal/depth float4 sampleNormalAndDepth = gSetupAO.Sample(gInputSamp, uvs[i]); float3 sampleNormal = sampleNormalAndDepth.xyz * 2.0f - 1.0f; float sampleDepth = sampleNormalAndDepth.w; // Compute sample contribution depending on how close it is to current // depth and normal float weight = saturate(1.0f - abs(sampleDepth - depth) * 0.3f); weight *= saturate(dot(sampleNormal, normal)); weightedSum += sampleAO * weight; weightSum += weight; } return weightedSum / weightSum; } float fsmain(VStoFS input, float4 pixelPos : SV_Position) : SV_Target0 { #if FINAL_AO // Final uses gbuffer input float sceneDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, input.uv0).r); float3 worldNormal = gNormalsTex.Sample(gInputSamp, input.uv0).xyz * 2.0f - 1.0f; #else // Input from AO setup pass float4 aoSetup = gSetupAO.Sample(gInputSamp, input.uv0); float sceneDepth = aoSetup.w; float3 worldNormal = aoSetup.xyz * 2.0f - 1.0f; #endif float3 viewNormal = normalize(mul((float3x3)gMatView, worldNormal)); float3 viewPos = getViewSpacePos(input.screenPos, sceneDepth); // Apply bias to avoid false occlusion due to depth quantization or other precision issues viewPos += viewNormal * gBias * -sceneDepth; // Note: Do I want to recalculate screen position from this new view position? // Project sample radius to screen space (approximately), using the formula: // screenRadius = worldRadius * 1/tan(fov/2) / z // The formula approximates sphere projection and is more accurate the closer to the screen center // the sphere origin is. float sampleRadius = gSampleRadius * lerp(-sceneDepth, 1, gWorldSpaceRadiusMask) * gCotHalfFOV / -sceneDepth; // Get random rotation #if QUALITY == 0 float2 rotateDir = float2(0, 1); // No random rotation #else float2 rotateDir = gRandomTex.Sample(gRandomSamp, input.uv0 * gRandomTileScale) * 2 - 1; #endif // Scale by screen space sample radius rotateDir *= sampleRadius; // Construct rotation matrix float2 rotateDir90 = float2(-rotateDir.y, rotateDir.x); // Rotate 90 degrees float2x2 rotateTfrm = float2x2( rotateDir.x, rotateDir90.x, rotateDir.y, rotateDir90.y ); float invRange = 1.0f / gSampleRadius; // For every sample, find the highest horizon angle in the direction of the sample float2 accumulator = 0.00001f; [unroll] for(int i = 0; i < SAMPLE_COUNT; ++i) { float2 sampleOffset = mul(rotateTfrm, SAMPLES[i]); // Step along the direction of the sample offset, looking for the maximum angle in two directions // (positive dir of the sample offset, and negative). Steps are weighted so that those that are // further away from the origin contribute less. float3 stepAccum = 0; [unroll] for(int j = 1; j <= SAMPLE_STEPS; ++j) { float scale = j / (float)SAMPLE_STEPS; float2 screenPosL = input.screenPos + sampleOffset * scale; float2 screenPosR = input.screenPos - sampleOffset * scale; // TODO - Sample HiZ here to minimize cache trashing (depending on quality) #if FINAL_AO // Final uses gbuffer input float depthL = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosL)).r; float depthR = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosR)).r; depthL = convertFromDeviceZ(depthL); depthR = convertFromDeviceZ(depthR); #else float depthL = gSetupAO.Sample(gInputSamp, ndcToDepthUV(screenPosL)).w; float depthR = gSetupAO.Sample(gInputSamp, ndcToDepthUV(screenPosR)).w; #endif float3 viewPosL = getViewSpacePos(screenPosL, depthL); float3 viewPosR = getViewSpacePos(screenPosR, depthR); float3 diffL = viewPosL - viewPos; float3 diffR = viewPosR - viewPos; float angleL = saturate(dot(diffL, viewNormal) * rsqrt(dot(diffL, diffL))); float angleR = saturate(dot(diffR, viewNormal) * rsqrt(dot(diffR, diffR))); // Avoid blending if depths are too different to avoid leaking float weight = saturate(1.0f - length(diffL) * invRange); weight *= saturate(1.0f - length(diffR) * invRange); float2 angles = float2(angleL, angleR); stepAccum = lerp(stepAccum, float3(max(angles, stepAccum.xy), 1), weight); } // Negate since higher angle means more occlusion float2 weightedValue = 1.0f - stepAccum.xy; // Square to reduce impact on areas with low AO, and increase impact on areas with high AO weightedValue *= weightedValue; // Multiply by weight since we calculate the weighted average weightedValue *= stepAccum.z; // Accumulate sum total and weight total accumulator += float2(weightedValue.x + weightedValue.y, 2.0f * stepAccum.z); } float output = 0; // Divide by total weight to get the weighted average output = accumulator.x / accumulator.y; #if MIX_WITH_UPSAMPLED float upsampledAO = getUpsampledAO(input.uv0, sceneDepth, worldNormal); // Note: 0.6f just an arbitrary constant that looks good. Make this adjustable externally? output = lerp(output, upsampledAO, 0.6f); #endif #if FINAL_AO // Fade out far away AO // Reference: 1 - saturate((depth - fadeDistance) / fadeRange) output = lerp(output, 1.0f, saturate(-sceneDepth * gFadeMultiplyAdd.x + gFadeMultiplyAdd.y)); // Adjust power and intensity output = 1.0f - saturate((1.0f - pow(output, gPower)) * gIntensity); #endif // On quality 0 we don't blur at all. At qualities higher than 1 we use a proper bilateral blur. #if QUALITY == 1 // Perform a 2x2 ad-hoc blur to hide the dither pattern // Note: Ideally the blur would be 4x4 since the pattern is 4x4 float4 myVal = float4(output, viewNormal); float4 dX = ddx_fine(myVal); float4 dY = ddy_fine(myVal); int2 mod = (int2)(pixelPos.xy) % 2; float4 horzVal = myVal - dX * (mod.x * 2 - 1); float4 vertVal = myVal - dY * (mod.y * 2 - 1); // Do weighted average depending on how similar the normals are float weightHorz = saturate(pow(saturate(dot(viewNormal, horzVal.yzw)), 4.0f)); float weightVert = saturate(pow(saturate(dot(viewNormal, vertVal.yzw)), 4.0f)); float myWeight = 1.0f; float invWeight = 1.0f / (myWeight + weightHorz + weightVert); myWeight *= invWeight; weightHorz *= invWeight; weightVert *= invWeight; output = output * myWeight + horzVal.r * weightHorz + vertVal.r * weightVert; #endif return output; } }; };