TonemappingAverageLuminance.ankiprog 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma anki technique comp
  6. #include <AnKi/Shaders/TonemappingFunctions.hlsl>
  7. #include <AnKi/Shaders/Functions.hlsl>
  8. #define THREAD_COUNT_X 32u
  9. #define THREAD_COUNT_Y 16u
  10. #define THREAD_COUNT UVec2(THREAD_COUNT_X, THREAD_COUNT_Y)
  11. Texture2D<Vec4> g_tex : register(t0);
  12. #define TONEMAPPING_REGISTER u0
  13. #include <AnKi/Shaders/TonemappingResources.hlsl>
  14. groupshared F32 s_avgLum[THREAD_COUNT_X * THREAD_COUNT_Y];
  15. [numthreads(THREAD_COUNT_X, THREAD_COUNT_Y, 1)] void main(UVec3 svGroupThreadId : SV_GROUPTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
  16. {
  17. UVec2 inputTexSize;
  18. g_tex.GetDimensions(inputTexSize.x, inputTexSize.y);
  19. // Align the tex size to workgroup size
  20. const UVec2 alignedInputTexSize = THREAD_COUNT * ((inputTexSize + THREAD_COUNT - 1u) / THREAD_COUNT);
  21. const UVec2 pixelsPerTile = alignedInputTexSize / THREAD_COUNT;
  22. // Gather the log-average luminance of a tile. It will miss some pixels but not too many
  23. const U32 yStart = svGroupThreadId.y * pixelsPerTile.y;
  24. const U32 xStart = svGroupThreadId.x * pixelsPerTile.x;
  25. const F32 weight = (1.0 / F32(inputTexSize.x * inputTexSize.y));
  26. F32 avgLum = 0.0;
  27. for(U32 y = 0u; y < pixelsPerTile.y; ++y)
  28. {
  29. for(U32 x = 0u; x < pixelsPerTile.x; ++x)
  30. {
  31. const UVec2 uv = UVec2(xStart, yStart) + UVec2(x, y);
  32. if(uv.x >= F32(inputTexSize.x) || uv.y >= F32(inputTexSize.y))
  33. {
  34. continue;
  35. }
  36. const Vec3 color = g_tex.Load(IVec3(uv, 0)).rgb;
  37. const F32 lum = computeLuminance(color);
  38. avgLum += lum * weight;
  39. }
  40. }
  41. s_avgLum[svGroupIndex] = avgLum;
  42. GroupMemoryBarrierWithGroupSync();
  43. // Gather the results into one
  44. [loop] for(U32 s = (THREAD_COUNT_X * THREAD_COUNT_Y) / 2u; s > 0u; s >>= 1u)
  45. {
  46. if(svGroupIndex < s)
  47. {
  48. s_avgLum[svGroupIndex] += s_avgLum[svGroupIndex + s];
  49. }
  50. #if ANKI_PLATFORM_MOBILE
  51. if(s > WaveGetLaneCount())
  52. {
  53. GroupMemoryBarrierWithGroupSync();
  54. }
  55. #else
  56. GroupMemoryBarrierWithGroupSync();
  57. #endif
  58. }
  59. // Write the result
  60. [branch] if(svGroupIndex == 0u)
  61. {
  62. const F32 crntLum = s_avgLum[0];
  63. #if 1
  64. const F32 prevLum = readExposureAndAverageLuminance<F32>().y;
  65. // Lerp between previous and new L value
  66. const F32 interpolationFactor = 0.05;
  67. F32 finalAvgLum = lerp(prevLum, crntLum, interpolationFactor);
  68. #else
  69. F32 finalAvgLum = crntLum;
  70. #endif
  71. // This is a workaround because sometimes the avg lum becomes nan
  72. finalAvgLum = clamp(finalAvgLum, kEpsilonF32, kMaxF32);
  73. writeExposureAndAverageLuminance(computeExposure(finalAvgLum, 0.0f), finalAvgLum);
  74. }
  75. }