ssao_downsample.glsl 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  2. // Copyright (c) 2016, Intel Corporation
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  4. // documentation files (the "Software"), to deal in the Software without restriction, including without limitation
  5. // the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
  6. // permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7. // The above copyright notice and this permission notice shall be included in all copies or substantial portions of
  8. // the Software.
  9. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  10. // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  11. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  12. // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  13. // SOFTWARE.
  14. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  15. // File changes (yyyy-mm-dd)
  16. // 2016-09-07: [email protected]: first commit
  17. // 2020-12-05: clayjohn: convert to Vulkan and Godot
  18. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  19. #[compute]
  20. #version 450
  21. VERSION_DEFINES
  22. layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  23. layout(push_constant, binding = 1, std430) uniform Params {
  24. vec2 pixel_size;
  25. float z_far;
  26. float z_near;
  27. bool orthogonal;
  28. float radius_sq;
  29. uvec2 pad;
  30. }
  31. params;
  32. layout(set = 0, binding = 0) uniform sampler2D source_depth;
  33. layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_image0; //rename
  34. #ifdef GENERATE_MIPS
  35. layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray dest_image1;
  36. layout(r16f, set = 2, binding = 1) uniform restrict writeonly image2DArray dest_image2;
  37. layout(r16f, set = 2, binding = 2) uniform restrict writeonly image2DArray dest_image3;
  38. #endif
  39. vec4 screen_space_to_view_space_depth(vec4 p_depth) {
  40. if (params.orthogonal) {
  41. vec4 depth = p_depth * 2.0 - 1.0;
  42. return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
  43. }
  44. float depth_linearize_mul = params.z_near;
  45. float depth_linearize_add = params.z_far;
  46. // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
  47. // Set your depth_linearize_mul and depth_linearize_add to:
  48. // depth_linearize_mul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear );
  49. // depth_linearize_add = cameraClipFar / ( cameraClipFar - cameraClipNear );
  50. return depth_linearize_mul / (depth_linearize_add - p_depth);
  51. }
  52. float screen_space_to_view_space_depth(float p_depth) {
  53. if (params.orthogonal) {
  54. float depth = p_depth * 2.0 - 1.0;
  55. return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / (2.0 * params.z_far);
  56. }
  57. float depth_linearize_mul = params.z_near;
  58. float depth_linearize_add = params.z_far;
  59. return depth_linearize_mul / (depth_linearize_add - p_depth);
  60. }
  61. #ifdef GENERATE_MIPS
  62. shared float depth_buffer[4][8][8];
  63. float mip_smart_average(vec4 p_depths) {
  64. float closest = min(min(p_depths.x, p_depths.y), min(p_depths.z, p_depths.w));
  65. float fallof_sq = -1.0f / params.radius_sq;
  66. vec4 dists = p_depths - closest.xxxx;
  67. vec4 weights = clamp(dists * dists * fallof_sq + 1.0, 0.0, 1.0);
  68. return dot(weights, p_depths) / dot(weights, vec4(1.0, 1.0, 1.0, 1.0));
  69. }
  70. void prepare_depths_and_mips(vec4 p_samples, uvec2 p_output_coord, uvec2 p_gtid) {
  71. p_samples = screen_space_to_view_space_depth(p_samples);
  72. depth_buffer[0][p_gtid.x][p_gtid.y] = p_samples.w;
  73. depth_buffer[1][p_gtid.x][p_gtid.y] = p_samples.z;
  74. depth_buffer[2][p_gtid.x][p_gtid.y] = p_samples.x;
  75. depth_buffer[3][p_gtid.x][p_gtid.y] = p_samples.y;
  76. imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 0), vec4(p_samples.w));
  77. imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 1), vec4(p_samples.z));
  78. imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 2), vec4(p_samples.x));
  79. imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 3), vec4(p_samples.y));
  80. uint depth_array_index = 2 * (p_gtid.y % 2) + (p_gtid.x % 2);
  81. uvec2 depth_array_offset = ivec2(p_gtid.x % 2, p_gtid.y % 2);
  82. ivec2 buffer_coord = ivec2(p_gtid) - ivec2(depth_array_offset);
  83. p_output_coord /= 2;
  84. groupMemoryBarrier();
  85. barrier();
  86. // if (still_alive) <-- all threads alive here
  87. {
  88. float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0];
  89. float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 1];
  90. float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 0];
  91. float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 1];
  92. float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11));
  93. imageStore(dest_image1, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg));
  94. depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg;
  95. }
  96. bool still_alive = p_gtid.x % 4 == depth_array_offset.x && p_gtid.y % 4 == depth_array_offset.y;
  97. p_output_coord /= 2;
  98. groupMemoryBarrier();
  99. barrier();
  100. if (still_alive) {
  101. float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0];
  102. float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 2];
  103. float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 0];
  104. float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 2];
  105. float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11));
  106. imageStore(dest_image2, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg));
  107. depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg;
  108. }
  109. still_alive = p_gtid.x % 8 == depth_array_offset.x && depth_array_offset.y % 8 == depth_array_offset.y;
  110. p_output_coord /= 2;
  111. groupMemoryBarrier();
  112. barrier();
  113. if (still_alive) {
  114. float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0];
  115. float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 4];
  116. float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 0];
  117. float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 4];
  118. float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11));
  119. imageStore(dest_image3, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg));
  120. }
  121. }
  122. #else
  123. #ifndef USE_HALF_BUFFERS
  124. void prepare_depths(vec4 p_samples, uvec2 p_tid) {
  125. p_samples = screen_space_to_view_space_depth(p_samples);
  126. imageStore(dest_image0, ivec3(p_tid, 0), vec4(p_samples.w));
  127. imageStore(dest_image0, ivec3(p_tid, 1), vec4(p_samples.z));
  128. imageStore(dest_image0, ivec3(p_tid, 2), vec4(p_samples.x));
  129. imageStore(dest_image0, ivec3(p_tid, 3), vec4(p_samples.y));
  130. }
  131. #endif
  132. #endif
  133. void main() {
  134. #ifdef USE_HALF_BUFFERS
  135. #ifdef USE_HALF_SIZE
  136. float sample_00 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 0, 4 * gl_GlobalInvocationID.y + 0), 0).x;
  137. float sample_11 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 2, 4 * gl_GlobalInvocationID.y + 2), 0).x;
  138. #else
  139. float sample_00 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 0, 2 * gl_GlobalInvocationID.y + 0), 0).x;
  140. float sample_11 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 1, 2 * gl_GlobalInvocationID.y + 1), 0).x;
  141. #endif
  142. sample_00 = screen_space_to_view_space_depth(sample_00);
  143. sample_11 = screen_space_to_view_space_depth(sample_11);
  144. imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 0), vec4(sample_00));
  145. imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 3), vec4(sample_11));
  146. #else //!USE_HALF_BUFFERS
  147. #ifdef USE_HALF_SIZE
  148. ivec2 depth_buffer_coord = 4 * ivec2(gl_GlobalInvocationID.xy);
  149. ivec2 output_coord = ivec2(gl_GlobalInvocationID);
  150. vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size;
  151. vec4 samples;
  152. samples.x = textureLodOffset(source_depth, uv, 0, ivec2(0, 2)).x;
  153. samples.y = textureLodOffset(source_depth, uv, 0, ivec2(2, 2)).x;
  154. samples.z = textureLodOffset(source_depth, uv, 0, ivec2(2, 0)).x;
  155. samples.w = textureLodOffset(source_depth, uv, 0, ivec2(0, 0)).x;
  156. #else
  157. ivec2 depth_buffer_coord = 2 * ivec2(gl_GlobalInvocationID.xy);
  158. ivec2 output_coord = ivec2(gl_GlobalInvocationID);
  159. vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size;
  160. vec4 samples = textureGather(source_depth, uv);
  161. #endif
  162. #ifdef GENERATE_MIPS
  163. prepare_depths_and_mips(samples, output_coord, gl_LocalInvocationID.xy);
  164. #else
  165. prepare_depths(samples, gl_GlobalInvocationID.xy);
  166. #endif
  167. #endif
  168. }