ShadowProject.bsl 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. #include "$ENGINE$/GBufferInput.bslinc"
  2. #include "$ENGINE$/ShadowProjectionCommon.bslinc"
  3. technique ShadowProject
  4. {
  5. mixin GBufferInput;
  6. mixin ShadowProjectionCommon;
  7. variations
  8. {
  9. SHADOW_QUALITY = { 1, 2, 3, 4 };
  10. CASCADING = { true, false };
  11. NEEDS_TRANSFORM = { true, false };
  12. MSAA_COUNT = { 1, 2 };
  13. };
  14. depth
  15. {
  16. read = false;
  17. write = false;
  18. };
  19. stencil
  20. {
  21. enabled = true;
  22. // This clears the stencil at the same time as performing the test
  23. // Note: Need to test performance clearing the stencil this way vs. clearing it separately,
  24. // as this disables HiStencil optimization.
  25. front = { zero, zero, zero, neq };
  26. readmask = 0x7F;
  27. writemask = 0x7F;
  28. };
  29. #if CASCADING
  30. blend
  31. {
  32. target
  33. {
  34. enabled = true;
  35. writemask = R;
  36. color = { srcA, srcIA, add };
  37. };
  38. };
  39. #else
  40. blend
  41. {
  42. target
  43. {
  44. enabled = true;
  45. writemask = R;
  46. color = { one, one, max };
  47. };
  48. };
  49. #endif
  50. code
  51. {
  52. #if CASCADING
  53. Texture2DArray gShadowTex;
  54. #else
  55. Texture2D gShadowTex;
  56. #endif
  57. SamplerState gShadowSampler;
  58. [internal]
  59. cbuffer Params
  60. {
  61. // Transform a point in mixed space (xy - clip space, z - view space) to a point
  62. // in shadow space
  63. float4x4 gMixedToShadowSpace;
  64. float2 gShadowMapSize;
  65. float2 gShadowMapSizeInv;
  66. float gSoftTransitionScale;
  67. float gFadePercent;
  68. float gFadePlaneDepth;
  69. float gInvFadePlaneRange;
  70. float gFace;
  71. };
  72. // Converts a set of shadow depths into occlusion values, where 1 means scene object is occluded and 0
  73. // not occluded. Values between 1 and 0 are used for soft transitions on receivers that are near casters.
  74. float4 getOcclusion(float4 shadowDepth, float sceneDepth)
  75. {
  76. // Offset the shadow a bit to reduce shadow acne and use scale for soft transitions.
  77. // Visualization (Mathematica): Plot[1.0 - Clip[(500 - x)*0.5 + 1, {0, 1}], {x, 480, 520}]
  78. return 1.0f - saturate((shadowDepth - sceneDepth) * gSoftTransitionScale + 1);
  79. }
  80. // Takes UV coordinates as input and returns a location to sample from, and a fraction
  81. // that can be used for bilinear interpolation between the samples. Returned sample
  82. // center is always located on a border between texels, in UV space.
  83. float2 getFilteringInfo(float2 uv, out float2 fraction)
  84. {
  85. // UV to texel position
  86. float2 texelXY = uv * gShadowMapSize;
  87. // -0.5f offset because UV (0, 0) maps to (-0.5, -0.5) texel position
  88. texelXY -= 0.5f;
  89. // Get fraction to use for interpolation
  90. fraction = frac(texelXY);
  91. // Get center location to gather from (in UV coordinates)
  92. return (floor(texelXY) + 0.5f) * gShadowMapSizeInv;
  93. }
  94. float PCF1x1(float2 uv, float sceneDepth)
  95. {
  96. #if CASCADING
  97. float3 sampleCenter = float3(uv, gFace);
  98. #else
  99. float2 sampleCenter = uv;
  100. #endif
  101. float depthSample = gShadowTex.Sample(gShadowSampler, sampleCenter).r;
  102. return getOcclusion(depthSample.rrrr, sceneDepth).r;
  103. }
  104. float PCF2x2(float2 uv, float sceneDepth)
  105. {
  106. float2 fraction;
  107. #if CASCADING
  108. float3 sampleCenter = float3(getFilteringInfo(uv, fraction), gFace);
  109. #else
  110. float2 sampleCenter = getFilteringInfo(uv, fraction);
  111. #endif
  112. // Gather four samples. Samples are returned in counter-clockwise order, starting with lower left
  113. float4 depthSamples = gShadowTex.GatherRed(gShadowSampler, sampleCenter);
  114. // Convert samples to occlusion
  115. float4 occlusion = getOcclusion(depthSamples, sceneDepth);
  116. // Perform bilinear interpolation
  117. float2 lerpHorz = lerp(occlusion.wx, occlusion.zy, fraction.xx);
  118. return lerp(lerpHorz.x, lerpHorz.y, fraction.y);
  119. }
  120. float PCF4x4(float2 uv, float sceneDepth)
  121. {
  122. float2 fraction;
  123. #if CASCADING
  124. float3 sampleCenter = float3(getFilteringInfo(uv, fraction), gFace);
  125. #else
  126. float2 sampleCenter = getFilteringInfo(uv, fraction);
  127. #endif
  128. // Gather 16 samples in four 2x2 gathers. Samples are returned in counter-clockwise order, starting with lower left.
  129. // Gathers are performed in clockwise order, starting with top left block.
  130. float4 topLeftSamples = gShadowTex.GatherRed(gShadowSampler, sampleCenter, int2(-1, -1));
  131. float4 topRightSamples = gShadowTex.GatherRed(gShadowSampler, sampleCenter, int2(1, -1));
  132. float4 botLeftSamples = gShadowTex.GatherRed(gShadowSampler, sampleCenter, int2(-1, 1));
  133. float4 botRightSamples = gShadowTex.GatherRed(gShadowSampler, sampleCenter, int2(1, 1));
  134. // Convert samples to occlusion
  135. float4 topLeftOcclusion = getOcclusion(topLeftSamples, sceneDepth);
  136. float4 topRightOcclusion = getOcclusion(topRightSamples, sceneDepth);
  137. float4 botLeftOcclusion = getOcclusion(botLeftSamples, sceneDepth);
  138. float4 botRightOcclusion = getOcclusion(botRightSamples, sceneDepth);
  139. // Get the average occusion value. Fraction only needs to be applied to edge samples.
  140. //// Acculate occlusion per row
  141. float4 rowOcclusion;
  142. //// Add column 1, top to bottom
  143. rowOcclusion.x = topLeftOcclusion.w * (1.0f - fraction.x);
  144. rowOcclusion.y = topLeftOcclusion.x * (1.0f - fraction.x);
  145. rowOcclusion.z = botLeftOcclusion.w * (1.0f - fraction.x);
  146. rowOcclusion.w = botLeftOcclusion.x * (1.0f - fraction.x);
  147. //// Add column 2 & 3, top to bottom
  148. rowOcclusion.x += topLeftOcclusion.z + topRightOcclusion.w;
  149. rowOcclusion.y += topLeftOcclusion.y + topRightOcclusion.x;
  150. rowOcclusion.z += botLeftOcclusion.z + botRightOcclusion.w;
  151. rowOcclusion.w += botLeftOcclusion.y + botRightOcclusion.x;
  152. //// Add column 4, top to bottom
  153. rowOcclusion.x += topRightOcclusion.z * fraction.x;
  154. rowOcclusion.y += topRightOcclusion.y * fraction.x;
  155. rowOcclusion.z += botRightOcclusion.z * fraction.x;
  156. rowOcclusion.w += botRightOcclusion.w * fraction.x;
  157. //// Accumulate occlusion per columns
  158. float4 occlusionAccumulator = dot(rowOcclusion, float4(1.0f - fraction.y, 1.0f, 1.0f, fraction.y));
  159. // Calc average occlusion using a 3x3 area and return
  160. return occlusionAccumulator * (1.0f / 9.0f);
  161. }
  162. // Accumulates samples for all columns in a row, for 6x2 samples. Samples are provided in three 2x2
  163. // blocks. Samples in a block are in counter-clockwise order, starting with lower left. Returns two
  164. // rows with their accumulated values, starting with top row.
  165. float2 accumulateRows6x2(float fraction, float4 left, float4 mid, float4 right)
  166. {
  167. float2 row;
  168. // Column 1, top to bottom
  169. row.x = left.w * (1.0f - fraction);
  170. row.y = left.x * (1.0f - fraction);
  171. // Columns 2, 3, 4, 5, top to bottom
  172. row.x += left.z + mid.w + mid.z + right.w;
  173. row.y += left.y + mid.x + mid.y + right.x;
  174. // Column 6, top to bottom
  175. row.x += right.z * fraction;
  176. row.y += right.y * fraction;
  177. return row;
  178. }
  179. float PCF6x6(float2 uv, float sceneDepth)
  180. {
  181. float2 fraction;
  182. #if CASCADING
  183. float3 sampleCenter = float3(getFilteringInfo(uv, fraction), gFace);
  184. #else
  185. float2 sampleCenter = getFilteringInfo(uv, fraction);
  186. #endif
  187. // Gather 36 samples in nine 2x2 gathers. Gathers are performed in clockwise order, starting with top left block.
  188. // Every three gathers (one row), the values are accumulated to their corresponding row.
  189. // Samples for individual gather operations are returned in counter-clockwise order, starting with lower left.
  190. float2 rows[3];
  191. [unroll]
  192. for(int i = 0; i < 3; i++)
  193. {
  194. int y = -2 + i * 2;
  195. float4 left = getOcclusion(gShadowTex.GatherRed(gShadowSampler, sampleCenter, int2(-2, y)), sceneDepth);
  196. float4 middle = getOcclusion(gShadowTex.GatherRed(gShadowSampler, sampleCenter, int2(0, y)), sceneDepth);
  197. float4 right = getOcclusion(gShadowTex.GatherRed(gShadowSampler, sampleCenter, int2(2, y)), sceneDepth);
  198. rows[i] = accumulateRows6x2(fraction.x, left, middle, right);
  199. }
  200. // Accumulate all rows
  201. float occlusionAccumulator;
  202. occlusionAccumulator = rows[0].x * (1.0f - fraction.y);
  203. occlusionAccumulator += rows[0].y + rows[1].x + rows[1].y + rows[2].x;
  204. occlusionAccumulator += rows[2].y * fraction.y;
  205. // Calc average occlusion using 5x5 area and return
  206. return occlusionAccumulator * (1.0f / 25.0f);
  207. }
  208. float4 fsmain(VStoFS input, uint sampleIdx : SV_SampleIndex) : SV_Target0
  209. {
  210. float2 ndcPos = input.clipSpacePos.xy / input.clipSpacePos.w;
  211. uint2 pixelPos = NDCToScreen(ndcPos);
  212. // Get depth & calculate world position
  213. #if MSAA_COUNT > 1
  214. float deviceZ = gDepthBufferTex.Load(pixelPos, sampleIdx).r;
  215. #else
  216. float deviceZ = gDepthBufferTex.Load(int3(pixelPos, 0)).r;
  217. #endif
  218. float depth = convertFromDeviceZ(deviceZ);
  219. float4 mixedSpacePos = float4(ndcPos * -depth, depth, 1);
  220. float4 shadowPosition = mul(gMixedToShadowSpace, mixedSpacePos);
  221. shadowPosition.xy /= shadowPosition.w;
  222. // Clamp depth range because pixels in the shadow map that haven't been rendered to will have a value of 1,
  223. // and we want those to remain unshadowed.
  224. float lightSpaceDepth = min(shadowPosition.z, 0.999999f);
  225. float occlusion = 0.0f;
  226. #if SHADOW_QUALITY <= 1
  227. occlusion = PCF1x1(shadowPosition.xy, lightSpaceDepth);
  228. #elif SHADOW_QUALITY == 2
  229. occlusion = PCF2x2(shadowPosition.xy, lightSpaceDepth);
  230. #elif SHADOW_QUALITY == 3
  231. occlusion = PCF4x4(shadowPosition.xy, lightSpaceDepth);
  232. #else
  233. occlusion = PCF6x6(shadowPosition.xy, lightSpaceDepth);
  234. #endif
  235. float alpha = 1.0f;
  236. #if CASCADING
  237. alpha = 1.0f - saturate((-depth - gFadePlaneDepth) * gInvFadePlaneRange);
  238. #endif
  239. occlusion *= gFadePercent;
  240. // Encode to get better precision in the blacks, similar to gamma correction but cheaper to execute
  241. return float4(sqrt(occlusion), 0.0f, 0.0f, alpha);
  242. }
  243. };
  244. };