| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288 |
- #include "$ENGINE$\PPBase.bslinc"
- #include "$ENGINE$\PerCameraData.bslinc"
- technique PPSSAO
- {
- mixin PPBase;
- mixin PerCameraData;
- code
- {
- [internal]
- cbuffer Input
- {
- float gSampleRadius;
- float gWorldSpaceRadiusMask;
- float2 gTanHalfFOV; // x - horz FOV, y - vert FOV
- float2 gRandomTileScale;
- float gCotHalfFOV;
- float gBias;
- float2 gDownsampledPixelSize;
- float2 gFadeMultiplyAdd;
- float gPower;
- float gIntensity;
- }
- SamplerState gInputSamp;
- Texture2D gDepthTex;
- Texture2D gNormalsTex;
- Texture2D gDownsampledAO;
- Texture2D gSetupAO;
-
- SamplerState gRandomSamp;
- Texture2D gRandomTex;
-
- #if QUALITY < 3
- #define SAMPLE_STEPS 1
- #else
- #define SAMPLE_STEPS 3
- #endif
-
- #if QUALITY < 4
- #define SAMPLE_SET 0
- #else
- #define SAMPLE_SET 1
- #endif
-
- // Points within a disc, at equally separated angles from 0 to 2PI.
- // Each point is also placed further away from the disc center, up to unit disc radius.
- // f[x_, s_] := {((x + 1)/(s + 1))*Cos[(x/s)*2 Pi], (x + 1)/(s + 1)*Sin[(x/s)*2 Pi]}
- #if SAMPLE_SET == 0
- #define SAMPLE_COUNT 3
- static const float2 SAMPLES[3] =
- {
- float2( 0.250f, 0.000f),
- float2(-0.250f, 0.433f),
- float2(-0.375f, -0.649f)
- };
- #else
- #define SAMPLE_COUNT 6
- static const float2 SAMPLES[6] =
- {
- float2( 0.142f, 0.000f),
- float2( 0.142f, 0.247f),
- float2(-0.214f, 0.371f),
- float2(-0.571f, 0.000f),
- float2(-0.357f, -0.618f),
- float2( 0.428f, -0.742f)
- };
- #endif
-
- float2 ndcToDepthUV(float2 ndc)
- {
- return NDCToUV(ndc);
- }
-
- float3 getViewSpacePos(float2 ndc, float depth)
- {
- float2 clipSpace = ndc * -depth;
-
- // Use the tan(FOV/2) & aspect to move from clip to view space (basically just scaling).
- // This is the equivalent of multiplying by mixedToView matrix that's used in most
- // depth -> world space calculations, but if we make some assumptions we can avoid the
- // matrix multiply and get the same result. We can also avoid division by .w since we know
- // the depth is in view space and the mixedToView matrix wouldn't affect it.
- // The only entries that effect the coordinate are 0,0 and 1,1 entries in the matrix
- // (if the matrix is symmetric, which we assume is true), which are just the cotangent
- // of the half of the two aspect ratios.
-
- return float3(clipSpace * gTanHalfFOV, depth);
- }
-
- float getUpsampledAO(float2 uv, float depth, float3 normal)
- {
- float2 uvs[9];
- uvs[0] = uv + float2(-1, -1) * gDownsampledPixelSize;
- uvs[1] = uv + float2( 0, -1) * gDownsampledPixelSize;
- uvs[2] = uv + float2( 1, -1) * gDownsampledPixelSize;
- uvs[3] = uv + float2(-1, 0) * gDownsampledPixelSize;
- uvs[4] = uv + float2( 0, 0) * gDownsampledPixelSize;
- uvs[5] = uv + float2( 1, 0) * gDownsampledPixelSize;
- uvs[6] = uv + float2(-1, 1) * gDownsampledPixelSize;
- uvs[7] = uv + float2( 0, 1) * gDownsampledPixelSize;
- uvs[8] = uv + float2( 1, 1) * gDownsampledPixelSize;
-
- float weightedSum = 0.00001f;
- float weightSum = 0.00001f;
-
- [unroll]
- for(int i = 0; i < 9; ++i)
- {
- // Get AO from previous step (half-resolution buffer)
- float sampleAO = gDownsampledAO.Sample(gInputSamp, uvs[i]).r;
-
- // Get filtered normal/depth
- float4 sampleNormalAndDepth = gSetupAO.Sample(gInputSamp, uvs[i]);
- float3 sampleNormal = sampleNormalAndDepth.xyz * 2.0f - 1.0f;
- float sampleDepth = sampleNormalAndDepth.w;
-
- // Compute sample contribution depending on how close it is to current
- // depth and normal
- float weight = saturate(1.0f - abs(sampleDepth - depth) * 0.3f);
- weight *= saturate(dot(sampleNormal, normal));
-
- weightedSum += sampleAO * weight;
- weightSum += weight;
- }
-
- return weightedSum / weightSum;
- }
-
- float fsmain(VStoFS input, float4 pixelPos : SV_Position) : SV_Target0
- {
- #if FINAL_AO // Final uses gbuffer input
- float sceneDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, input.uv0).r);
- float3 worldNormal = gNormalsTex.Sample(gInputSamp, input.uv0).xyz * 2.0f - 1.0f;
- #else // Input from AO setup pass
- float4 aoSetup = gSetupAO.Sample(gInputSamp, input.uv0);
- float sceneDepth = aoSetup.w;
- float3 worldNormal = aoSetup.xyz * 2.0f - 1.0f;
- #endif
-
- float3 viewNormal = normalize(mul((float3x3)gMatView, worldNormal));
- float3 viewPos = getViewSpacePos(input.screenPos, sceneDepth);
-
- // Apply bias to avoid false occlusion due to depth quantization or other precision issues
- viewPos += viewNormal * gBias * -sceneDepth;
- // Note: Do I want to recalculate screen position from this new view position?
-
- // Project sample radius to screen space (approximately), using the formula:
- // screenRadius = worldRadius * 1/tan(fov/2) / z
- // The formula approximates sphere projection and is more accurate the closer to the screen center
- // the sphere origin is.
- float sampleRadius = gSampleRadius * lerp(-sceneDepth, 1, gWorldSpaceRadiusMask) * gCotHalfFOV / -sceneDepth;
-
- // Get random rotation
- #if QUALITY == 0
- float2 rotateDir = float2(0, 1); // No random rotation
- #else
- float2 rotateDir = gRandomTex.Sample(gRandomSamp, input.uv0 * gRandomTileScale) * 2 - 1;
- #endif
-
- // Scale by screen space sample radius
- rotateDir *= sampleRadius;
-
- // Construct rotation matrix
- float2 rotateDir90 = float2(-rotateDir.y, rotateDir.x); // Rotate 90 degrees
- float2x2 rotateTfrm = float2x2(
- rotateDir.x, rotateDir90.x,
- rotateDir.y, rotateDir90.y
- );
-
- float invRange = 1.0f / gSampleRadius;
-
- // For every sample, find the highest horizon angle in the direction of the sample
- float2 accumulator = 0.00001f;
- [unroll]
- for(int i = 0; i < SAMPLE_COUNT; ++i)
- {
- float2 sampleOffset = mul(rotateTfrm, SAMPLES[i]);
-
- // Step along the direction of the sample offset, looking for the maximum angle in two directions
- // (positive dir of the sample offset, and negative). Steps are weighted so that those that are
- // further away from the origin contribute less.
- float3 stepAccum = 0;
-
- [unroll]
- for(int j = 1; j <= SAMPLE_STEPS; ++j)
- {
- float scale = j / (float)SAMPLE_STEPS;
-
- float2 screenPosL = input.screenPos + sampleOffset * scale;
- float2 screenPosR = input.screenPos - sampleOffset * scale;
-
- // TODO - Sample HiZ here to minimize cache trashing (depending on quality)
- #if FINAL_AO // Final uses gbuffer input
- float depthL = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosL)).r;
- float depthR = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosR)).r;
-
- depthL = convertFromDeviceZ(depthL);
- depthR = convertFromDeviceZ(depthR);
- #else
- float depthL = gSetupAO.Sample(gInputSamp, ndcToDepthUV(screenPosL)).w;
- float depthR = gSetupAO.Sample(gInputSamp, ndcToDepthUV(screenPosR)).w;
- #endif
-
- float3 viewPosL = getViewSpacePos(screenPosL, depthL);
- float3 viewPosR = getViewSpacePos(screenPosR, depthR);
-
- float3 diffL = viewPosL - viewPos;
- float3 diffR = viewPosR - viewPos;
-
- float angleL = saturate(dot(diffL, viewNormal) * rsqrt(dot(diffL, diffL)));
- float angleR = saturate(dot(diffR, viewNormal) * rsqrt(dot(diffR, diffR)));
-
- // Avoid blending if depths are too different to avoid leaking
- float weight = saturate(1.0f - length(diffL) * invRange);
- weight *= saturate(1.0f - length(diffR) * invRange);
-
- float2 angles = float2(angleL, angleR);
- stepAccum = lerp(stepAccum, float3(max(angles, stepAccum.xy), 1), weight);
- }
-
- // Negate since higher angle means more occlusion
- float2 weightedValue = 1.0f - stepAccum.xy;
-
- // Square to reduce impact on areas with low AO, and increase impact on areas with high AO
- weightedValue *= weightedValue;
-
- // Multiply by weight since we calculate the weighted average
- weightedValue *= stepAccum.z;
-
- // Accumulate sum total and weight total
- accumulator += float2(weightedValue.x + weightedValue.y, 2.0f * stepAccum.z);
- }
-
- float output = 0;
-
- // Divide by total weight to get the weighted average
- output = accumulator.x / accumulator.y;
-
- #if MIX_WITH_UPSAMPLED
- float upsampledAO = getUpsampledAO(input.uv0, sceneDepth, worldNormal);
-
- // Note: 0.6f just an arbitrary constant that looks good. Make this adjustable externally?
- output = lerp(output, upsampledAO, 0.6f);
- #endif
-
- #if FINAL_AO
- // Fade out far away AO
- // Reference: 1 - saturate((depth - fadeDistance) / fadeRange)
- output = lerp(output, 1.0f, saturate(-sceneDepth * gFadeMultiplyAdd.x + gFadeMultiplyAdd.y));
-
- // Adjust power and intensity
- output = 1.0f - saturate((1.0f - pow(output, gPower)) * gIntensity);
- #endif
-
- // On quality 0 we don't blur at all. At qualities higher than 1 we use a proper bilateral blur.
- #if QUALITY == 1
- // Perform a 2x2 ad-hoc blur to hide the dither pattern
- // Note: Ideally the blur would be 4x4 since the pattern is 4x4
-
- float4 myVal = float4(output, viewNormal);
- float4 dX = ddx_fine(myVal);
- float4 dY = ddy_fine(myVal);
-
- int2 mod = (int2)(pixelPos.xy) % 2;
- float4 horzVal = myVal - dX * (mod.x * 2 - 1);
- float4 vertVal = myVal - dY * (mod.y * 2 - 1);
-
- // Do weighted average depending on how similar the normals are
- float weightHorz = saturate(pow(saturate(dot(viewNormal, horzVal.yzw)), 4.0f));
- float weightVert = saturate(pow(saturate(dot(viewNormal, vertVal.yzw)), 4.0f));
-
- float myWeight = 1.0f;
- float invWeight = 1.0f / (myWeight + weightHorz + weightVert);
-
- myWeight *= invWeight;
- weightHorz *= invWeight;
- weightVert *= invWeight;
-
- output = output * myWeight + horzVal.r * weightHorz + vertVal.r * weightVert;
- #endif
-
- return output;
- }
- };
- };
|