AoBlurAndUpsampleCS.hlsli 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. //
  2. // Copyright (c) Microsoft. All rights reserved.
  3. // This code is licensed under the MIT License (MIT).
  4. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
  5. // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
  6. // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
  7. // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
  8. //
  9. // Developed by Minigraph
  10. //
  11. // Author: James Stanard
  12. //
  13. #include "SSAORS.hlsli"
  14. Texture2D<float> LoResDB : register(t0);
  15. Texture2D<float> HiResDB : register(t1);
  16. Texture2D<float> LoResAO1 : register(t2);
  17. #ifdef COMBINE_LOWER_RESOLUTIONS
  18. Texture2D<float> LoResAO2 : register(t3);
  19. #endif
  20. #ifdef BLEND_WITH_HIGHER_RESOLUTION
  21. Texture2D<float> HiResAO : register(t4);
  22. #endif
  23. RWTexture2D<float> AoResult : register(u0);
  24. SamplerState LinearSampler : register(s0);
  25. cbuffer ConstantBuffer_x : register(b1)
  26. {
  27. float2 InvLowResolution;
  28. float2 InvHighResolution;
  29. float NoiseFilterStrength;
  30. float StepSize;
  31. float kBlurTolerance;
  32. float kUpsampleTolerance;
  33. }
  34. groupshared float DepthCache[256];
  35. groupshared float AOCache1[256];
  36. groupshared float AOCache2[256];
  37. void PrefetchData( uint index, float2 uv )
  38. {
  39. float4 AO1 = LoResAO1.Gather( LinearSampler, uv );
  40. #ifdef COMBINE_LOWER_RESOLUTIONS
  41. AO1 = min(AO1, LoResAO2.Gather( LinearSampler, uv ));
  42. #endif
  43. AOCache1[index ] = AO1.w;
  44. AOCache1[index+ 1] = AO1.z;
  45. AOCache1[index+16] = AO1.x;
  46. AOCache1[index+17] = AO1.y;
  47. float4 ID = 1.0 / LoResDB.Gather( LinearSampler, uv );
  48. DepthCache[index ] = ID.w;
  49. DepthCache[index+ 1] = ID.z;
  50. DepthCache[index+16] = ID.x;
  51. DepthCache[index+17] = ID.y;
  52. }
  53. float SmartBlur( float a, float b, float c, float d, float e, bool Left, bool Middle, bool Right )
  54. {
  55. b = Left | Middle ? b : c;
  56. a = Left ? a : b;
  57. d = Right | Middle ? d : c;
  58. e = Right ? e : d;
  59. return ((a + e) / 2.0 + b + c + d) / 4.0;
  60. }
  61. bool CompareDeltas( float d1, float d2, float l1, float l2 )
  62. {
  63. float temp = d1 * d2 + StepSize;
  64. return temp * temp > l1 * l2 * kBlurTolerance;
  65. }
  66. void BlurHorizontally( uint leftMostIndex )
  67. {
  68. float a0 = AOCache1[leftMostIndex ];
  69. float a1 = AOCache1[leftMostIndex+1];
  70. float a2 = AOCache1[leftMostIndex+2];
  71. float a3 = AOCache1[leftMostIndex+3];
  72. float a4 = AOCache1[leftMostIndex+4];
  73. float a5 = AOCache1[leftMostIndex+5];
  74. float a6 = AOCache1[leftMostIndex+6];
  75. float d0 = DepthCache[leftMostIndex ];
  76. float d1 = DepthCache[leftMostIndex+1];
  77. float d2 = DepthCache[leftMostIndex+2];
  78. float d3 = DepthCache[leftMostIndex+3];
  79. float d4 = DepthCache[leftMostIndex+4];
  80. float d5 = DepthCache[leftMostIndex+5];
  81. float d6 = DepthCache[leftMostIndex+6];
  82. float d01 = d1 - d0;
  83. float d12 = d2 - d1;
  84. float d23 = d3 - d2;
  85. float d34 = d4 - d3;
  86. float d45 = d5 - d4;
  87. float d56 = d6 - d5;
  88. float l01 = d01 * d01 + StepSize;
  89. float l12 = d12 * d12 + StepSize;
  90. float l23 = d23 * d23 + StepSize;
  91. float l34 = d34 * d34 + StepSize;
  92. float l45 = d45 * d45 + StepSize;
  93. float l56 = d56 * d56 + StepSize;
  94. bool c02 = CompareDeltas( d01, d12, l01, l12 );
  95. bool c13 = CompareDeltas( d12, d23, l12, l23 );
  96. bool c24 = CompareDeltas( d23, d34, l23, l34 );
  97. bool c35 = CompareDeltas( d34, d45, l34, l45 );
  98. bool c46 = CompareDeltas( d45, d56, l45, l56 );
  99. AOCache2[leftMostIndex ] = SmartBlur( a0, a1, a2, a3, a4, c02, c13, c24 );
  100. AOCache2[leftMostIndex+1] = SmartBlur( a1, a2, a3, a4, a5, c13, c24, c35 );
  101. AOCache2[leftMostIndex+2] = SmartBlur( a2, a3, a4, a5, a6, c24, c35, c46 );
  102. }
  103. void BlurVertically( uint topMostIndex )
  104. {
  105. float a0 = AOCache2[topMostIndex ];
  106. float a1 = AOCache2[topMostIndex+16];
  107. float a2 = AOCache2[topMostIndex+32];
  108. float a3 = AOCache2[topMostIndex+48];
  109. float a4 = AOCache2[topMostIndex+64];
  110. float a5 = AOCache2[topMostIndex+80];
  111. float d0 = DepthCache[topMostIndex+ 2];
  112. float d1 = DepthCache[topMostIndex+18];
  113. float d2 = DepthCache[topMostIndex+34];
  114. float d3 = DepthCache[topMostIndex+50];
  115. float d4 = DepthCache[topMostIndex+66];
  116. float d5 = DepthCache[topMostIndex+82];
  117. float d01 = d1 - d0;
  118. float d12 = d2 - d1;
  119. float d23 = d3 - d2;
  120. float d34 = d4 - d3;
  121. float d45 = d5 - d4;
  122. float l01 = d01 * d01 + StepSize;
  123. float l12 = d12 * d12 + StepSize;
  124. float l23 = d23 * d23 + StepSize;
  125. float l34 = d34 * d34 + StepSize;
  126. float l45 = d45 * d45 + StepSize;
  127. bool c02 = CompareDeltas( d01, d12, l01, l12 );
  128. bool c13 = CompareDeltas( d12, d23, l12, l23 );
  129. bool c24 = CompareDeltas( d23, d34, l23, l34 );
  130. bool c35 = CompareDeltas( d34, d45, l34, l45 );
  131. float aoResult1 = SmartBlur( a0, a1, a2, a3, a4, c02, c13, c24 );
  132. float aoResult2 = SmartBlur( a1, a2, a3, a4, a5, c13, c24, c35 );
  133. AOCache1[topMostIndex ] = aoResult1;
  134. AOCache1[topMostIndex+16] = aoResult2;
  135. }
  136. // We essentially want 5 weights: 4 for each low-res pixel and 1 to blend in when none of the 4 really
  137. // match. The filter strength is 1 / DeltaZTolerance. So a tolerance of 0.01 would yield a strength of 100.
  138. // Note that a perfect match of low to high depths would yield a weight of 10^6, completely superceding any
  139. // noise filtering. The noise filter is intended to soften the effects of shimmering when the high-res depth
  140. // buffer has a lot of small holes in it causing the low-res depth buffer to inaccurately represent it.
  141. float BilateralUpsample( float HiDepth, float HiAO, float4 LowDepths, float4 LowAO )
  142. {
  143. float4 weights = float4(9, 3, 1, 3) / ( abs(HiDepth - LowDepths) + kUpsampleTolerance );
  144. float TotalWeight = dot(weights, 1) + NoiseFilterStrength;
  145. float WeightedSum = dot(LowAO, weights) + NoiseFilterStrength;// * HiAO;
  146. return HiAO * WeightedSum / TotalWeight;
  147. }
  148. [RootSignature(SSAO_RootSig)]
  149. [numthreads( 8, 8, 1 )]
  150. void main( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID )
  151. {
  152. //
  153. // Load 4 pixels per thread into LDS to fill the 16x16 LDS cache with depth and AO
  154. //
  155. PrefetchData( GTid.x << 1 | GTid.y << 5, int2(DTid.xy + GTid.xy - 2) * InvLowResolution );
  156. GroupMemoryBarrierWithGroupSync();
  157. // Goal: End up with a 9x9 patch that is blurred so we can upsample. Blur radius is 2 pixels, so start with 13x13 area.
  158. //
  159. // Horizontally blur the pixels. 13x13 -> 9x13
  160. //
  161. if (GI < 39)
  162. BlurHorizontally((GI / 3) * 16 + (GI % 3) * 3);
  163. GroupMemoryBarrierWithGroupSync();
  164. //
  165. // Vertically blur the pixels. 9x13 -> 9x9
  166. //
  167. if (GI < 45)
  168. BlurVertically((GI / 9) * 32 + GI % 9);
  169. GroupMemoryBarrierWithGroupSync();
  170. //
  171. // Bilateral upsample
  172. //
  173. uint Idx0 = GTid.x + GTid.y * 16;
  174. float4 LoSSAOs = float4( AOCache1[Idx0+16], AOCache1[Idx0+17], AOCache1[Idx0+1], AOCache1[Idx0] );
  175. // We work on a quad of pixels at once because then we can gather 4 each of high and low-res depth values
  176. float2 UV0 = DTid.xy * InvLowResolution;
  177. float2 UV1 = DTid.xy * 2 * InvHighResolution;
  178. #ifdef BLEND_WITH_HIGHER_RESOLUTION
  179. float4 HiSSAOs = HiResAO.Gather(LinearSampler, UV1);
  180. #else
  181. float4 HiSSAOs = 1.0;
  182. #endif
  183. float4 LoDepths = LoResDB.Gather(LinearSampler, UV0);
  184. float4 HiDepths = HiResDB.Gather(LinearSampler, UV1);
  185. int2 OutST = DTid.xy << 1;
  186. AoResult[OutST + int2(-1, 0)] = BilateralUpsample( HiDepths.x, HiSSAOs.x, LoDepths.xyzw, LoSSAOs.xyzw );
  187. AoResult[OutST + int2( 0, 0)] = BilateralUpsample( HiDepths.y, HiSSAOs.y, LoDepths.yzwx, LoSSAOs.yzwx );
  188. AoResult[OutST + int2( 0, -1)] = BilateralUpsample( HiDepths.z, HiSSAOs.z, LoDepths.zwxy, LoSSAOs.zwxy );
  189. AoResult[OutST + int2(-1, -1)] = BilateralUpsample( HiDepths.w, HiSSAOs.w, LoDepths.wxyz, LoSSAOs.wxyz );
  190. }