copy.glsl 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. #[compute]
  2. #version 450
  3. VERSION_DEFINES
  4. layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  5. #define FLAG_HORIZONTAL (1 << 0)
  6. #define FLAG_USE_BLUR_SECTION (1 << 1)
  7. #define FLAG_USE_ORTHOGONAL_PROJECTION (1 << 2)
  8. #define FLAG_DOF_NEAR_FIRST_TAP (1 << 3)
  9. #define FLAG_GLOW_FIRST_PASS (1 << 4)
  10. #define FLAG_FLIP_Y (1 << 5)
  11. #define FLAG_FORCE_LUMINANCE (1 << 6)
  12. #define FLAG_COPY_ALL_SOURCE (1 << 7)
  13. #define FLAG_HIGH_QUALITY_GLOW (1 << 8)
  14. #define FLAG_ALPHA_TO_ONE (1 << 9)
  15. layout(push_constant, binding = 1, std430) uniform Params {
  16. ivec4 section;
  17. ivec2 target;
  18. uint flags;
  19. uint pad;
  20. // Glow.
  21. float glow_strength;
  22. float glow_bloom;
  23. float glow_hdr_threshold;
  24. float glow_hdr_scale;
  25. float glow_exposure;
  26. float glow_white;
  27. float glow_luminance_cap;
  28. float glow_auto_exposure_grey;
  29. // DOF.
  30. float camera_z_far;
  31. float camera_z_near;
  32. uint pad2[2];
  33. vec4 set_color;
  34. }
  35. params;
  36. #ifdef MODE_CUBEMAP_ARRAY_TO_PANORAMA
  37. layout(set = 0, binding = 0) uniform samplerCubeArray source_color;
  38. #elif defined(MODE_CUBEMAP_TO_PANORAMA)
  39. layout(set = 0, binding = 0) uniform samplerCube source_color;
  40. #elif !defined(MODE_SET_COLOR)
  41. layout(set = 0, binding = 0) uniform sampler2D source_color;
  42. #endif
  43. #ifdef GLOW_USE_AUTO_EXPOSURE
  44. layout(set = 1, binding = 0) uniform sampler2D source_auto_exposure;
  45. #endif
  46. #if defined(MODE_LINEARIZE_DEPTH_COPY) || defined(MODE_SIMPLE_COPY_DEPTH)
  47. layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer;
  48. #elif defined(DST_IMAGE_8BIT)
  49. layout(rgba8, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer;
  50. #else
  51. layout(rgba32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer;
  52. #endif
  53. #ifdef MODE_GAUSSIAN_GLOW
  54. shared vec4 local_cache[256];
  55. shared vec4 temp_cache[128];
  56. #endif
  57. void main() {
  58. // Pixel being shaded
  59. ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
  60. #ifndef MODE_GAUSSIAN_GLOW // Glow needs the extra threads
  61. if (any(greaterThanEqual(pos, params.section.zw))) { //too large, do nothing
  62. return;
  63. }
  64. #endif
  65. #ifdef MODE_MIPMAP
  66. ivec2 base_pos = (pos + params.section.xy) << 1;
  67. vec4 color = texelFetch(source_color, base_pos, 0);
  68. color += texelFetch(source_color, base_pos + ivec2(0, 1), 0);
  69. color += texelFetch(source_color, base_pos + ivec2(1, 0), 0);
  70. color += texelFetch(source_color, base_pos + ivec2(1, 1), 0);
  71. color /= 4.0;
  72. imageStore(dest_buffer, pos + params.target, color);
  73. #endif
  74. #ifdef MODE_GAUSSIAN_BLUR
  75. //Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect
  76. if (bool(params.flags & FLAG_HORIZONTAL)) {
  77. ivec2 base_pos = (pos + params.section.xy) << 1;
  78. vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.214607;
  79. color += texelFetch(source_color, base_pos + ivec2(1, 0), 0) * 0.189879;
  80. color += texelFetch(source_color, base_pos + ivec2(2, 0), 0) * 0.131514;
  81. color += texelFetch(source_color, base_pos + ivec2(3, 0), 0) * 0.071303;
  82. color += texelFetch(source_color, base_pos + ivec2(-1, 0), 0) * 0.189879;
  83. color += texelFetch(source_color, base_pos + ivec2(-2, 0), 0) * 0.131514;
  84. color += texelFetch(source_color, base_pos + ivec2(-3, 0), 0) * 0.071303;
  85. imageStore(dest_buffer, pos + params.target, color);
  86. } else {
  87. ivec2 base_pos = (pos + params.section.xy);
  88. vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.38774;
  89. color += texelFetch(source_color, base_pos + ivec2(0, 1), 0) * 0.24477;
  90. color += texelFetch(source_color, base_pos + ivec2(0, 2), 0) * 0.06136;
  91. color += texelFetch(source_color, base_pos + ivec2(0, -1), 0) * 0.24477;
  92. color += texelFetch(source_color, base_pos + ivec2(0, -2), 0) * 0.06136;
  93. imageStore(dest_buffer, pos + params.target, color);
  94. }
  95. #endif
  96. #ifdef MODE_GAUSSIAN_GLOW
  97. // First pass copy texture into 16x16 local memory for every 8x8 thread block
  98. vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw));
  99. uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * 16;
  100. if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) {
  101. vec2 quad_offset_uv = clamp((vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.0)) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw));
  102. local_cache[dest_index] = (textureLod(source_color, quad_center_uv, 0) + textureLod(source_color, quad_offset_uv, 0)) * 0.5;
  103. local_cache[dest_index + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.z, 0.0), 0)) * 0.5;
  104. local_cache[dest_index + 16] = (textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0) + textureLod(source_color, quad_offset_uv + vec2(0.0, 1.0 / params.section.w), 0)) * 0.5;
  105. local_cache[dest_index + 16 + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.zw), 0)) * 0.5;
  106. } else {
  107. local_cache[dest_index] = textureLod(source_color, quad_center_uv, 0);
  108. local_cache[dest_index + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0);
  109. local_cache[dest_index + 16] = textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0);
  110. local_cache[dest_index + 16 + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0);
  111. }
  112. memoryBarrierShared();
  113. barrier();
  114. // Horizontal pass. Needs to copy into 8x16 chunk of local memory so vertical pass has full resolution
  115. uint read_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 32 + 4;
  116. vec4 color_top = vec4(0.0);
  117. color_top += local_cache[read_index] * 0.174938;
  118. color_top += local_cache[read_index + 1] * 0.165569;
  119. color_top += local_cache[read_index + 2] * 0.140367;
  120. color_top += local_cache[read_index + 3] * 0.106595;
  121. color_top += local_cache[read_index - 1] * 0.165569;
  122. color_top += local_cache[read_index - 2] * 0.140367;
  123. color_top += local_cache[read_index - 3] * 0.106595;
  124. vec4 color_bottom = vec4(0.0);
  125. color_bottom += local_cache[read_index + 16] * 0.174938;
  126. color_bottom += local_cache[read_index + 1 + 16] * 0.165569;
  127. color_bottom += local_cache[read_index + 2 + 16] * 0.140367;
  128. color_bottom += local_cache[read_index + 3 + 16] * 0.106595;
  129. color_bottom += local_cache[read_index - 1 + 16] * 0.165569;
  130. color_bottom += local_cache[read_index - 2 + 16] * 0.140367;
  131. color_bottom += local_cache[read_index - 3 + 16] * 0.106595;
  132. // rotate samples to take advantage of cache coherency
  133. uint write_index = gl_LocalInvocationID.y * 2 + gl_LocalInvocationID.x * 16;
  134. temp_cache[write_index] = color_top;
  135. temp_cache[write_index + 1] = color_bottom;
  136. memoryBarrierShared();
  137. barrier();
  138. // Vertical pass
  139. uint index = gl_LocalInvocationID.y + gl_LocalInvocationID.x * 16 + 4;
  140. vec4 color = vec4(0.0);
  141. color += temp_cache[index] * 0.174938;
  142. color += temp_cache[index + 1] * 0.165569;
  143. color += temp_cache[index + 2] * 0.140367;
  144. color += temp_cache[index + 3] * 0.106595;
  145. color += temp_cache[index - 1] * 0.165569;
  146. color += temp_cache[index - 2] * 0.140367;
  147. color += temp_cache[index - 3] * 0.106595;
  148. color *= params.glow_strength;
  149. if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) {
  150. #ifdef GLOW_USE_AUTO_EXPOSURE
  151. color /= texelFetch(source_auto_exposure, ivec2(0, 0), 0).r / params.glow_auto_exposure_grey;
  152. #endif
  153. color *= params.glow_exposure;
  154. float luminance = max(color.r, max(color.g, color.b));
  155. float feedback = max(smoothstep(params.glow_hdr_threshold, params.glow_hdr_threshold + params.glow_hdr_scale, luminance), params.glow_bloom);
  156. color = min(color * feedback, vec4(params.glow_luminance_cap));
  157. }
  158. imageStore(dest_buffer, pos + params.target, color);
  159. #endif
  160. #ifdef MODE_SIMPLE_COPY
  161. vec4 color;
  162. if (bool(params.flags & FLAG_COPY_ALL_SOURCE)) {
  163. vec2 uv = vec2(pos) / vec2(params.section.zw);
  164. if (bool(params.flags & FLAG_FLIP_Y)) {
  165. uv.y = 1.0 - uv.y;
  166. }
  167. color = textureLod(source_color, uv, 0.0);
  168. } else {
  169. color = texelFetch(source_color, pos + params.section.xy, 0);
  170. if (bool(params.flags & FLAG_FLIP_Y)) {
  171. pos.y = params.section.w - pos.y - 1;
  172. }
  173. }
  174. if (bool(params.flags & FLAG_FORCE_LUMINANCE)) {
  175. color.rgb = vec3(max(max(color.r, color.g), color.b));
  176. }
  177. if (bool(params.flags & FLAG_ALPHA_TO_ONE)) {
  178. color.a = 1.0;
  179. }
  180. imageStore(dest_buffer, pos + params.target, color);
  181. #endif
  182. #ifdef MODE_SIMPLE_COPY_DEPTH
  183. vec4 color = texelFetch(source_color, pos + params.section.xy, 0);
  184. if (bool(params.flags & FLAG_FLIP_Y)) {
  185. pos.y = params.section.w - pos.y - 1;
  186. }
  187. imageStore(dest_buffer, pos + params.target, vec4(color.r));
  188. #endif
  189. #ifdef MODE_LINEARIZE_DEPTH_COPY
  190. float depth = texelFetch(source_color, pos + params.section.xy, 0).r;
  191. depth = depth * 2.0 - 1.0;
  192. depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near));
  193. vec4 color = vec4(depth / params.camera_z_far);
  194. if (bool(params.flags & FLAG_FLIP_Y)) {
  195. pos.y = params.section.w - pos.y - 1;
  196. }
  197. imageStore(dest_buffer, pos + params.target, color);
  198. #endif
  199. #if defined(MODE_CUBEMAP_TO_PANORAMA) || defined(MODE_CUBEMAP_ARRAY_TO_PANORAMA)
  200. const float PI = 3.14159265359;
  201. vec2 uv = vec2(pos) / vec2(params.section.zw);
  202. uv.y = 1.0 - uv.y;
  203. float phi = uv.x * 2.0 * PI;
  204. float theta = uv.y * PI;
  205. vec3 normal;
  206. normal.x = sin(phi) * sin(theta) * -1.0;
  207. normal.y = cos(theta);
  208. normal.z = cos(phi) * sin(theta) * -1.0;
  209. #ifdef MODE_CUBEMAP_TO_PANORAMA
  210. vec4 color = textureLod(source_color, normal, params.camera_z_far); //the biggest the lod the least the acne
  211. #else
  212. vec4 color = textureLod(source_color, vec4(normal, params.camera_z_far), 0.0); //the biggest the lod the least the acne
  213. #endif
  214. imageStore(dest_buffer, pos + params.target, color);
  215. #endif
  216. #ifdef MODE_SET_COLOR
  217. imageStore(dest_buffer, pos + params.target, params.set_color);
  218. #endif
  219. }