IndirectDiffuseVrsSriGeneration.ankiprog 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. // Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma anki mutator SRI_TEXEL_DIMENSION 8 16
  6. #pragma anki mutator SHARED_MEMORY 0 1
  7. #pragma anki mutator LIMIT_RATE_TO_2X2 0 1
  8. #pragma anki start comp
  9. #include <AnKi/Shaders/Functions.glsl>
  10. layout(set = 0, binding = 0) uniform texture2D u_inputTex;
  11. layout(set = 0, binding = 1) uniform sampler u_nearestClampSampler;
  12. #if SRI_TEXEL_DIMENSION == 8
  13. const UVec2 REGION_SIZE = UVec2(2u, 2u);
  14. #else
  15. const UVec2 REGION_SIZE = UVec2(2u, 4u);
  16. #endif
  17. const UVec2 WORKGROUP_SIZE = UVec2(SRI_TEXEL_DIMENSION) / REGION_SIZE;
  18. layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
  19. layout(set = 0, binding = 2) uniform writeonly uimage2D u_sriImg;
  20. layout(push_constant, std140, row_major) uniform b_pc
  21. {
  22. Vec2 u_oneOverViewportSize;
  23. F32 u_thresholdMeters;
  24. F32 u_padding0;
  25. Mat4 u_invertedProjectionJitter;
  26. };
  27. #if SHARED_MEMORY
  28. // Ideally, we'd be able to calculate the min/max/average using subgroup operations, but there's no guarantee
  29. // subgroupSize is large enough so we need shared memory as a fallback. We need gl_NumSubgroups entries, but it is not a
  30. // constant, so estimate it assuming a subgroupSize of at least 8.
  31. const U32 SHARED_MEMORY_ENTRIES = WORKGROUP_SIZE.x * WORKGROUP_SIZE.y / 8u;
  32. shared Vec2 s_maxDerivative[SHARED_MEMORY_ENTRIES];
  33. #endif
  34. F32 sampleViewPositionZ(Vec2 uv, I32 offsetX, I32 offsetY)
  35. {
  36. uv += Vec2(offsetX, offsetY) * u_oneOverViewportSize;
  37. const Vec2 ndc = UV_TO_NDC(uv);
  38. const F32 depth = textureLod(sampler2D(u_inputTex, u_nearestClampSampler), uv, 0.0).x;
  39. const Vec4 v4 = u_invertedProjectionJitter * Vec4(ndc, depth, 1.0);
  40. return v4.z / v4.w;
  41. }
  42. void main()
  43. {
  44. const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) * Vec2(REGION_SIZE) + 0.5) * u_oneOverViewportSize;
  45. const Vec2 ndc = UV_TO_NDC(uv);
  46. #if SRI_TEXEL_DIMENSION == 8
  47. // Get positions
  48. // l0.z l0.w
  49. // l0.x l0.y
  50. Vec4 l0;
  51. l0.x = sampleViewPositionZ(uv, 0, 0);
  52. l0.y = sampleViewPositionZ(uv, 1, 0);
  53. l0.z = sampleViewPositionZ(uv, 0, 1);
  54. l0.w = sampleViewPositionZ(uv, 1, 1);
  55. // Calculate derivatives.
  56. Vec2 a = Vec2(l0.y, l0.z);
  57. Vec2 b = Vec2(l0.x, l0.w);
  58. const Vec2 dx = abs(a - b);
  59. a = Vec2(l0.z, l0.w);
  60. b = Vec2(l0.x, l0.y);
  61. const Vec2 dy = abs(a - b);
  62. F32 maxDerivativeX = max(dx.x, dx.y);
  63. F32 maxDerivativeY = max(dy.x, dy.y);
  64. #else
  65. // Get positions
  66. // l1.z l1.w
  67. // l1.x l1.y
  68. // l0.z l0.w
  69. // l0.x l0.y
  70. Vec4 l0;
  71. l0.x = sampleViewPositionZ(uv, 0, 0);
  72. l0.y = sampleViewPositionZ(uv, 1, 0);
  73. l0.z = sampleViewPositionZ(uv, 0, 1);
  74. l0.w = sampleViewPositionZ(uv, 1, 1);
  75. Vec4 l1;
  76. l1.x = sampleViewPositionZ(uv, 0, 2);
  77. l1.y = sampleViewPositionZ(uv, 1, 2);
  78. l1.z = sampleViewPositionZ(uv, 0, 3);
  79. l1.w = sampleViewPositionZ(uv, 1, 3);
  80. // Calculate derivatives.
  81. Vec4 a = Vec4(l0.y, l0.z, l1.y, l1.z);
  82. Vec4 b = Vec4(l0.x, l0.w, l1.x, l1.w);
  83. const Vec4 dx = abs(a - b);
  84. a = Vec4(l0.z, l0.w, l1.z, l1.y);
  85. b = Vec4(l0.x, l0.y, l1.x, l1.w);
  86. const Vec4 dy = abs(a - b);
  87. F32 maxDerivativeX = max(max(dx.x, dx.y), max(dx.z, dx.w));
  88. F32 maxDerivativeY = max(max(dy.x, dy.y), max(dy.z, dy.w));
  89. #endif
  90. maxDerivativeX = subgroupMax(maxDerivativeX);
  91. maxDerivativeY = subgroupMax(maxDerivativeY);
  92. #if SHARED_MEMORY
  93. // Store results in shared memory.
  94. ANKI_BRANCH if(subgroupElect())
  95. {
  96. s_maxDerivative[gl_SubgroupID] = Vec2(maxDerivativeX, maxDerivativeY);
  97. }
  98. memoryBarrierShared();
  99. barrier();
  100. #endif
  101. // Write the result
  102. ANKI_BRANCH if(gl_LocalInvocationIndex == 0u)
  103. {
  104. // Get max across all subgroups.
  105. #if SHARED_MEMORY
  106. Vec2 maxDerivative = s_maxDerivative[0];
  107. for(U32 i = 1u; i < gl_NumSubgroups; ++i)
  108. {
  109. maxDerivative = max(maxDerivative, s_maxDerivative[i]);
  110. }
  111. #else
  112. const Vec2 maxDerivative = Vec2(maxDerivativeX, maxDerivativeY);
  113. #endif
  114. // Determine shading rate.
  115. const F32 threshold1 = u_thresholdMeters;
  116. const F32 threshold2 = threshold1 * 0.4;
  117. UVec2 rate;
  118. rate.x = (maxDerivative.x > threshold1) ? 1u : ((maxDerivative.x > threshold2) ? 2u : 4u);
  119. rate.y = (maxDerivative.y > threshold1) ? 1u : ((maxDerivative.y > threshold2) ? 2u : 4u);
  120. #if LIMIT_RATE_TO_2X2
  121. rate = min(rate, UVec2(2u));
  122. #endif
  123. const UVec2 outTexelCoord = gl_WorkGroupID.xy;
  124. imageStore(u_sriImg, IVec2(outTexelCoord), UVec4(encodeVrsRate(rate)));
  125. }
  126. }
  127. #pragma anki end