PPSSAO.bsl 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. #include "$ENGINE$\PPBase.bslinc"
  2. #include "$ENGINE$\PerCameraData.bslinc"
  3. technique PPSSAO
  4. {
  5. mixin PPBase;
  6. mixin PerCameraData;
  7. variations
  8. {
  9. MIX_WITH_UPSAMPLED = { true, false };
  10. FINAL_AO = { true, false };
  11. QUALITY = { 0, 1, 2, 3, 4 };
  12. };
  13. code
  14. {
  15. [internal]
  16. cbuffer Input
  17. {
  18. float gSampleRadius;
  19. float gWorldSpaceRadiusMask;
  20. float2 gTanHalfFOV; // x - horz FOV, y - vert FOV
  21. float2 gRandomTileScale;
  22. float gCotHalfFOV;
  23. float gBias;
  24. float2 gDownsampledPixelSize;
  25. float2 gFadeMultiplyAdd;
  26. float gPower;
  27. float gIntensity;
  28. }
  29. SamplerState gInputSamp;
  30. Texture2D gDepthTex;
  31. Texture2D gNormalsTex;
  32. Texture2D gDownsampledAO;
  33. Texture2D gSetupAO;
  34. SamplerState gRandomSamp;
  35. Texture2D gRandomTex;
  36. #if QUALITY < 3
  37. #define SAMPLE_STEPS 1
  38. #else
  39. #define SAMPLE_STEPS 3
  40. #endif
  41. #if QUALITY < 4
  42. #define SAMPLE_SET 0
  43. #else
  44. #define SAMPLE_SET 1
  45. #endif
  46. // Points within a disc, at equally separated angles from 0 to 2PI.
  47. // Each point is also placed further away from the disc center, up to unit disc radius.
  48. // f[x_, s_] := {((x + 1)/(s + 1))*Cos[(x/s)*2 Pi], (x + 1)/(s + 1)*Sin[(x/s)*2 Pi]}
  49. #if SAMPLE_SET == 0
  50. #define SAMPLE_COUNT 3
  51. static const float2 SAMPLES[3] =
  52. {
  53. float2( 0.250f, 0.000f),
  54. float2(-0.250f, 0.433f),
  55. float2(-0.375f, -0.649f)
  56. };
  57. #else
  58. #define SAMPLE_COUNT 6
  59. static const float2 SAMPLES[6] =
  60. {
  61. float2( 0.142f, 0.000f),
  62. float2( 0.142f, 0.247f),
  63. float2(-0.214f, 0.371f),
  64. float2(-0.571f, 0.000f),
  65. float2(-0.357f, -0.618f),
  66. float2( 0.428f, -0.742f)
  67. };
  68. #endif
  69. float2 ndcToDepthUV(float2 ndc)
  70. {
  71. return NDCToUV(ndc);
  72. }
  73. float3 getViewSpacePos(float2 ndc, float depth)
  74. {
  75. float2 clipSpace = ndc * -depth;
  76. // Use the tan(FOV/2) & aspect to move from clip to view space (basically just scaling).
  77. // This is the equivalent of multiplying by mixedToView matrix that's used in most
  78. // depth -> world space calculations, but if we make some assumptions we can avoid the
  79. // matrix multiply and get the same result. We can also avoid division by .w since we know
  80. // the depth is in view space and the mixedToView matrix wouldn't affect it.
  81. // The only entries that effect the coordinate are 0,0 and 1,1 entries in the matrix
  82. // (if the matrix is symmetric, which we assume is true), which are just the cotangent
  83. // of the half of the two aspect ratios.
  84. return float3(clipSpace * gTanHalfFOV, depth);
  85. }
  86. float getUpsampledAO(float2 uv, float depth, float3 normal)
  87. {
  88. float2 uvs[9];
  89. uvs[0] = uv + float2(-1, -1) * gDownsampledPixelSize;
  90. uvs[1] = uv + float2( 0, -1) * gDownsampledPixelSize;
  91. uvs[2] = uv + float2( 1, -1) * gDownsampledPixelSize;
  92. uvs[3] = uv + float2(-1, 0) * gDownsampledPixelSize;
  93. uvs[4] = uv + float2( 0, 0) * gDownsampledPixelSize;
  94. uvs[5] = uv + float2( 1, 0) * gDownsampledPixelSize;
  95. uvs[6] = uv + float2(-1, 1) * gDownsampledPixelSize;
  96. uvs[7] = uv + float2( 0, 1) * gDownsampledPixelSize;
  97. uvs[8] = uv + float2( 1, 1) * gDownsampledPixelSize;
  98. float weightedSum = 0.00001f;
  99. float weightSum = 0.00001f;
  100. [unroll]
  101. for(int i = 0; i < 9; ++i)
  102. {
  103. // Get AO from previous step (half-resolution buffer)
  104. float sampleAO = gDownsampledAO.Sample(gInputSamp, uvs[i]).r;
  105. // Get filtered normal/depth
  106. float4 sampleNormalAndDepth = gSetupAO.Sample(gInputSamp, uvs[i]);
  107. float3 sampleNormal = sampleNormalAndDepth.xyz * 2.0f - 1.0f;
  108. float sampleDepth = sampleNormalAndDepth.w;
  109. // Compute sample contribution depending on how close it is to current
  110. // depth and normal
  111. float weight = saturate(1.0f - abs(sampleDepth - depth) * 0.3f);
  112. weight *= saturate(dot(sampleNormal, normal));
  113. weightedSum += sampleAO * weight;
  114. weightSum += weight;
  115. }
  116. return weightedSum / weightSum;
  117. }
  118. float fsmain(VStoFS input, float4 pixelPos : SV_Position) : SV_Target0
  119. {
  120. #if FINAL_AO // Final uses gbuffer input
  121. float sceneDepth = convertFromDeviceZ(gDepthTex.Sample(gInputSamp, input.uv0).r);
  122. float3 worldNormal = gNormalsTex.Sample(gInputSamp, input.uv0).xyz * 2.0f - 1.0f;
  123. #else // Input from AO setup pass
  124. float4 aoSetup = gSetupAO.Sample(gInputSamp, input.uv0);
  125. float sceneDepth = aoSetup.w;
  126. float3 worldNormal = aoSetup.xyz * 2.0f - 1.0f;
  127. #endif
  128. float3 viewNormal = normalize(mul((float3x3)gMatView, worldNormal));
  129. float3 viewPos = getViewSpacePos(input.screenPos, sceneDepth);
  130. // Apply bias to avoid false occlusion due to depth quantization or other precision issues
  131. viewPos += viewNormal * gBias * -sceneDepth;
  132. // Note: Do I want to recalculate screen position from this new view position?
  133. // Project sample radius to screen space (approximately), using the formula:
  134. // screenRadius = worldRadius * 1/tan(fov/2) / z
  135. // The formula approximates sphere projection and is more accurate the closer to the screen center
  136. // the sphere origin is.
  137. float sampleRadius = gSampleRadius * lerp(-sceneDepth, 1, gWorldSpaceRadiusMask) * gCotHalfFOV / -sceneDepth;
  138. // Get random rotation
  139. #if QUALITY == 0
  140. float2 rotateDir = float2(0, 1); // No random rotation
  141. #else
  142. float2 rotateDir = gRandomTex.Sample(gRandomSamp, input.uv0 * gRandomTileScale) * 2 - 1;
  143. #endif
  144. // Scale by screen space sample radius
  145. rotateDir *= sampleRadius;
  146. // Construct rotation matrix
  147. float2 rotateDir90 = float2(-rotateDir.y, rotateDir.x); // Rotate 90 degrees
  148. float2x2 rotateTfrm = float2x2(
  149. rotateDir.x, rotateDir90.x,
  150. rotateDir.y, rotateDir90.y
  151. );
  152. float invRange = 1.0f / gSampleRadius;
  153. // For every sample, find the highest horizon angle in the direction of the sample
  154. float2 accumulator = 0.00001f;
  155. [unroll]
  156. for(int i = 0; i < SAMPLE_COUNT; ++i)
  157. {
  158. float2 sampleOffset = mul(rotateTfrm, SAMPLES[i]);
  159. // Step along the direction of the sample offset, looking for the maximum angle in two directions
  160. // (positive dir of the sample offset, and negative). Steps are weighted so that those that are
  161. // further away from the origin contribute less.
  162. float3 stepAccum = 0;
  163. [unroll]
  164. for(int j = 1; j <= SAMPLE_STEPS; ++j)
  165. {
  166. float scale = j / (float)SAMPLE_STEPS;
  167. float2 screenPosL = input.screenPos + sampleOffset * scale;
  168. float2 screenPosR = input.screenPos - sampleOffset * scale;
  169. // TODO - Sample HiZ here to minimize cache trashing (depending on quality)
  170. #if FINAL_AO // Final uses gbuffer input
  171. float depthL = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosL)).r;
  172. float depthR = gDepthTex.Sample(gInputSamp, ndcToDepthUV(screenPosR)).r;
  173. depthL = convertFromDeviceZ(depthL);
  174. depthR = convertFromDeviceZ(depthR);
  175. #else
  176. float depthL = gSetupAO.Sample(gInputSamp, ndcToDepthUV(screenPosL)).w;
  177. float depthR = gSetupAO.Sample(gInputSamp, ndcToDepthUV(screenPosR)).w;
  178. #endif
  179. float3 viewPosL = getViewSpacePos(screenPosL, depthL);
  180. float3 viewPosR = getViewSpacePos(screenPosR, depthR);
  181. float3 diffL = viewPosL - viewPos;
  182. float3 diffR = viewPosR - viewPos;
  183. float angleL = saturate(dot(diffL, viewNormal) * rsqrt(dot(diffL, diffL)));
  184. float angleR = saturate(dot(diffR, viewNormal) * rsqrt(dot(diffR, diffR)));
  185. // Avoid blending if depths are too different to avoid leaking
  186. float weight = saturate(1.0f - length(diffL) * invRange);
  187. weight *= saturate(1.0f - length(diffR) * invRange);
  188. float2 angles = float2(angleL, angleR);
  189. stepAccum = lerp(stepAccum, float3(max(angles, stepAccum.xy), 1), weight);
  190. }
  191. // Negate since higher angle means more occlusion
  192. float2 weightedValue = 1.0f - stepAccum.xy;
  193. // Square to reduce impact on areas with low AO, and increase impact on areas with high AO
  194. weightedValue *= weightedValue;
  195. // Multiply by weight since we calculate the weighted average
  196. weightedValue *= stepAccum.z;
  197. // Accumulate sum total and weight total
  198. accumulator += float2(weightedValue.x + weightedValue.y, 2.0f * stepAccum.z);
  199. }
  200. float output = 0;
  201. // Divide by total weight to get the weighted average
  202. output = accumulator.x / accumulator.y;
  203. #if MIX_WITH_UPSAMPLED
  204. float upsampledAO = getUpsampledAO(input.uv0, sceneDepth, worldNormal);
  205. // Note: 0.6f just an arbitrary constant that looks good. Make this adjustable externally?
  206. output = lerp(output, upsampledAO, 0.6f);
  207. #endif
  208. #if FINAL_AO
  209. // Fade out far away AO
  210. // Reference: 1 - saturate((depth - fadeDistance) / fadeRange)
  211. output = lerp(output, 1.0f, saturate(-sceneDepth * gFadeMultiplyAdd.x + gFadeMultiplyAdd.y));
  212. // Adjust power and intensity
  213. output = 1.0f - saturate((1.0f - pow(output, gPower)) * gIntensity);
  214. #endif
  215. // On quality 0 we don't blur at all. At qualities higher than 1 we use a proper bilateral blur.
  216. #if QUALITY == 1
  217. // Perform a 2x2 ad-hoc blur to hide the dither pattern
  218. // Note: Ideally the blur would be 4x4 since the pattern is 4x4
  219. float4 myVal = float4(output, viewNormal);
  220. float4 dX = ddx_fine(myVal);
  221. float4 dY = ddy_fine(myVal);
  222. int2 mod = (int2)(pixelPos.xy) % 2;
  223. float4 horzVal = myVal - dX * (mod.x * 2 - 1);
  224. float4 vertVal = myVal - dY * (mod.y * 2 - 1);
  225. // Do weighted average depending on how similar the normals are
  226. float weightHorz = saturate(pow(saturate(dot(viewNormal, horzVal.yzw)), 4.0f));
  227. float weightVert = saturate(pow(saturate(dot(viewNormal, vertVal.yzw)), 4.0f));
  228. float myWeight = 1.0f;
  229. float invWeight = 1.0f / (myWeight + weightHorz + weightVert);
  230. myWeight *= invWeight;
  231. weightHorz *= invWeight;
  232. weightVert *= invWeight;
  233. output = output * myWeight + horzVal.r * weightHorz + vertVal.r * weightVert;
  234. #endif
  235. return output;
  236. }
  237. };
  238. };