PPSSAO.bsl 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. #include "$ENGINE$\PPBase.bslinc"
  2. #include "$ENGINE$\PerCameraData.bslinc"
  3. technique PPSSAO
  4. {
  5. mixin PPBase;
  6. mixin PerCameraData;
  7. code
  8. {
  9. [internal]
  10. cbuffer Input
  11. {
  12. float gSampleRadius;
  13. float gWorldSpaceRadiusMask;
  14. float2 gTanHalfFOV; // x - horz FOV, y - vert FOV
  15. float2 gRandomTileScale;
  16. float gCotHalfFOV;
  17. float gBias;
  18. float2 gDownsampledPixelSize;
  19. float2 gFadeMultiplyAdd;
  20. float gPower;
  21. float gIntensity;
  22. }
  23. SamplerState gInputSamp;
  24. Texture2D gDepthTex;
  25. Texture2D gNormalsTex;
  26. Texture2D gDownsampledAO;
  27. Texture2D gSetupAO;
  28. SamplerState gRandomSamp;
  29. Texture2D gRandomTex;
  30. #if QUALITY < 3
  31. #define SAMPLE_STEPS 1
  32. #else
  33. #define SAMPLE_STEPS 3
  34. #endif
  35. #if QUALITY < 4
  36. #define SAMPLE_SET 0
  37. #else
  38. #define SAMPLE_SET 1
  39. #endif
  40. // Points within a disc, at equally separated angles from 0 to 2PI.
  41. // Each point is also placed further away from the disc center, up to unit disc radius.
  42. // f[x_, s_] := {((x + 1)/(s + 1))*Cos[(x/s)*2 Pi], (x + 1)/(s + 1)*Sin[(x/s)*2 Pi]}
  43. #if SAMPLE_SET == 0
  44. #define SAMPLE_COUNT 3
  45. static const float2 SAMPLES[3] =
  46. {
  47. float2( 0.250f, 0.000f),
  48. float2(-0.250f, 0.433f),
  49. float2(-0.375f, -0.649f)
  50. };
  51. #else
  52. #define SAMPLE_COUNT 6
  53. static const float2 SAMPLES[6] =
  54. {
  55. float2( 0.142f, 0.000f),
  56. float2( 0.142f, 0.247f),
  57. float2(-0.214f, 0.371f),
  58. float2(-0.571f, 0.000f),
  59. float2(-0.357f, -0.618f),
  60. float2( 0.428f, -0.742f)
  61. };
  62. #endif
  63. float2 ndcToDepthUV(float2 ndc)
  64. {
  65. return NDCToUV(ndc);
  66. }
  67. float3 getViewSpacePos(float2 ndc, float depth)
  68. {
  69. float2 clipSpace = ndc * -depth;
  70. // Use the tan(FOV/2) & aspect to move from clip to view space (basically just scaling).
  71. // This is the equivalent of multiplying by mixedToView matrix that's used in most
  72. // depth -> world space calculations, but if we make some assumptions we can avoid the
  73. // matrix multiply and get the same result. We can also avoid division by .w since we know
  74. // the depth is in view space and the mixedToView matrix wouldn't affect it.
  75. // The only entries that effect the coordinate are 0,0 and 1,1 entries in the matrix
  76. // (if the matrix is symmetric, which we assume is true), which are just the cotangent
  77. // of the half of the two aspect ratios.
  78. return float3(clipSpace * gTanHalfFOV, depth);
  79. }
  80. float getUpsampledAO(float2 uv, float depth, float3 normal)
  81. {
  82. float2 uvs[9];
  83. uvs[0] = uv + float2(-1, -1) * gDownsampledPixelSize;
  84. uvs[1] = uv + float2( 0, -1) * gDownsampledPixelSize;
  85. uvs[2] = uv + float2( 1, -1) * gDownsampledPixelSize;
  86. uvs[3] = uv + float2(-1, 0) * gDownsampledPixelSize;
  87. uvs[4] = uv + float2( 0, 0) * gDownsampledPixelSize;
  88. uvs[5] = uv + float2( 1, 0) * gDownsampledPixelSize;
  89. uvs[6] = uv + float2(-1, 1) * gDownsampledPixelSize;
  90. uvs[7] = uv + float2( 0, 1) * gDownsampledPixelSize;
  91. uvs[8] = uv + float2( 1, 1) * gDownsampledPixelSize;
  92. float weightedSum = 0.00001f;
  93. float weightSum = 0.00001f;
  94. [unroll]
  95. for(int i = 0; i < 9; ++i)
  96. {
  97. // Get AO from previous step (half-resolution buffer)
  98. float sampleAO = gDownsampledAO.Sample(gInputSamp, uvs[i]).r;
  99. // Get filtered normal/depth
  100. float4 sampleNormalAndDepth = gSetupAO.Sample(gInputSamp, uvs[i]);
  101. float3 sampleNormal = sampleNormalAndDepth.xyz * 2.0f - 1.0f;
  102. float sampleDepth = sampleNormalAndDepth.w;
  103. // Compute sample contribution depending on how close it is to current
  104. // depth and normal
  105. float weight = saturate(1.0f - abs(sampleDepth - depth) * 0.3f);
  106. weight *= saturate(dot(sampleNormal, normal));
  107. weightedSum += sampleAO * weight;
  108. weightSum += weight;
  109. }
  110. return weightedSum / weightSum;
  111. }
  112. float fsmain(VStoFS input, float4 pixelPos : SV_Position) : SV_Target0
  113. {
  114. #if FINAL_AO // Final uses gbuffer input
  115. float sceneDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, input.uv0).r);
  116. float3 worldNormal = gNormalsTex.Sample(gInputSamp, input.uv0).xyz * 2.0f - 1.0f;
  117. #else // Input from AO setup pass
  118. float4 aoSetup = gSetupAO.Sample(gInputSamp, input.uv0);
  119. float sceneDepth = aoSetup.w;
  120. float3 worldNormal = aoSetup.xyz * 2.0f - 1.0f;
  121. #endif
  122. float3 viewNormal = normalize(mul((float3x3)gMatView, worldNormal));
  123. float3 viewPos = getViewSpacePos(input.screenPos, sceneDepth);
  124. // Apply bias to avoid false occlusion due to depth quantization or other precision issues
  125. viewPos += viewNormal * gBias * -sceneDepth;
  126. // Note: Do I want to recalculate screen position from this new view position?
  127. // Project sample radius to screen space (approximately), using the formula:
  128. // screenRadius = worldRadius * 1/tan(fov/2) / z
  129. // The formula approximates sphere projection and is more accurate the closer to the screen center
  130. // the sphere origin is.
  131. float sampleRadius = gSampleRadius * lerp(-sceneDepth, 1, gWorldSpaceRadiusMask) * gCotHalfFOV / -sceneDepth;
  132. // Get random rotation
  133. #if QUALITY == 0
  134. float2 rotateDir = float2(0, 1); // No random rotation
  135. #else
  136. float2 rotateDir = gRandomTex.Sample(gRandomSamp, input.uv0 * gRandomTileScale) * 2 - 1;
  137. #endif
  138. // Scale by screen space sample radius
  139. rotateDir *= sampleRadius;
  140. // Construct rotation matrix
  141. float2 rotateDir90 = float2(-rotateDir.y, rotateDir.x); // Rotate 90 degrees
  142. float2x2 rotateTfrm = float2x2(
  143. rotateDir.x, rotateDir90.x,
  144. rotateDir.y, rotateDir90.y
  145. );
  146. float invRange = 1.0f / gSampleRadius;
  147. // For every sample, find the highest horizon angle in the direction of the sample
  148. float2 accumulator = 0.00001f;
  149. [unroll]
  150. for(int i = 0; i < SAMPLE_COUNT; ++i)
  151. {
  152. float2 sampleOffset = mul(rotateTfrm, SAMPLES[i]);
  153. // Step along the direction of the sample offset, looking for the maximum angle in two directions
  154. // (positive dir of the sample offset, and negative). Steps are weighted so that those that are
  155. // further away from the origin contribute less.
  156. float3 stepAccum = 0;
  157. [unroll]
  158. for(int j = 1; j <= SAMPLE_STEPS; ++j)
  159. {
  160. float scale = j / (float)SAMPLE_STEPS;
  161. float2 screenPosL = input.screenPos + sampleOffset * scale;
  162. float2 screenPosR = input.screenPos - sampleOffset * scale;
  163. // TODO - Sample HiZ here to minimize cache trashing (depending on quality)
  164. #if FINAL_AO // Final uses gbuffer input
  165. float depthL = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosL)).r;
  166. float depthR = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosR)).r;
  167. depthL = convertFromDeviceZ(depthL);
  168. depthR = convertFromDeviceZ(depthR);
  169. #else
  170. float depthL = gSetupAO.Sample(gInputSamp, ndcToDepthUV(screenPosL)).w;
  171. float depthR = gSetupAO.Sample(gInputSamp, ndcToDepthUV(screenPosR)).w;
  172. #endif
  173. float3 viewPosL = getViewSpacePos(screenPosL, depthL);
  174. float3 viewPosR = getViewSpacePos(screenPosR, depthR);
  175. float3 diffL = viewPosL - viewPos;
  176. float3 diffR = viewPosR - viewPos;
  177. float angleL = saturate(dot(diffL, viewNormal) * rsqrt(dot(diffL, diffL)));
  178. float angleR = saturate(dot(diffR, viewNormal) * rsqrt(dot(diffR, diffR)));
  179. // Avoid blending if depths are too different to avoid leaking
  180. float weight = saturate(1.0f - length(diffL) * invRange);
  181. weight *= saturate(1.0f - length(diffR) * invRange);
  182. float2 angles = float2(angleL, angleR);
  183. stepAccum = lerp(stepAccum, float3(max(angles, stepAccum.xy), 1), weight);
  184. }
  185. // Negate since higher angle means more occlusion
  186. float2 weightedValue = 1.0f - stepAccum.xy;
  187. // Square to reduce impact on areas with low AO, and increase impact on areas with high AO
  188. weightedValue *= weightedValue;
  189. // Multiply by weight since we calculate the weighted average
  190. weightedValue *= stepAccum.z;
  191. // Accumulate sum total and weight total
  192. accumulator += float2(weightedValue.x + weightedValue.y, 2.0f * stepAccum.z);
  193. }
  194. float output = 0;
  195. // Divide by total weight to get the weighted average
  196. output = accumulator.x / accumulator.y;
  197. #if MIX_WITH_UPSAMPLED
  198. float upsampledAO = getUpsampledAO(input.uv0, sceneDepth, worldNormal);
  199. // Note: 0.6f just an arbitrary constant that looks good. Make this adjustable externally?
  200. output = lerp(output, upsampledAO, 0.6f);
  201. #endif
  202. #if FINAL_AO
  203. // Fade out far away AO
  204. // Reference: 1 - saturate((depth - fadeDistance) / fadeRange)
  205. output = lerp(output, 1.0f, saturate(-sceneDepth * gFadeMultiplyAdd.x + gFadeMultiplyAdd.y));
  206. // Adjust power and intensity
  207. output = 1.0f - saturate((1.0f - pow(output, gPower)) * gIntensity);
  208. #endif
  209. // On quality 0 we don't blur at all. At qualities higher than 1 we use a proper bilateral blur.
  210. #if QUALITY == 1
  211. // Perform a 2x2 ad-hoc blur to hide the dither pattern
  212. // Note: Ideally the blur would be 4x4 since the pattern is 4x4
  213. float4 myVal = float4(output, viewNormal);
  214. float4 dX = ddx_fine(myVal);
  215. float4 dY = ddy_fine(myVal);
  216. int2 mod = (int2)(pixelPos.xy) % 2;
  217. float4 horzVal = myVal - dX * (mod.x * 2 - 1);
  218. float4 vertVal = myVal - dY * (mod.y * 2 - 1);
  219. // Do weighted average depending on how similar the normals are
  220. float weightHorz = saturate(pow(saturate(dot(viewNormal, horzVal.yzw)), 4.0f));
  221. float weightVert = saturate(pow(saturate(dot(viewNormal, vertVal.yzw)), 4.0f));
  222. float myWeight = 1.0f;
  223. float invWeight = 1.0f / (myWeight + weightHorz + weightVert);
  224. myWeight *= invWeight;
  225. weightHorz *= invWeight;
  226. weightVert *= invWeight;
  227. output = output * myWeight + horzVal.r * weightHorz + vertVal.r * weightVert;
  228. #endif
  229. return output;
  230. }
  231. };
  232. };