BloomDownsampleCS.azsl 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <Atom/Features/SrgSemantics.azsli>
  9. #include <Atom/Features/ColorManagement/TransformColor.azsli>
  10. ShaderResourceGroup PassSrg : SRG_PerPass
  11. {
  12. Texture2D<float4> m_sourceTexture;
  13. RWTexture2D<float4> m_targetTexture;
  14. RWTexture2D<float4> m_targetMipLevel0;
  15. RWTexture2D<float4> m_targetMipLevel1;
  16. RWTexture2D<float4> m_targetMipLevel2;
  17. RWTexture2D<float4> m_targetMipLevel3;
  18. RWTexture2D<float4> m_targetMipLevel4;
  19. Sampler LinearSampler
  20. {
  21. AddressU = Clamp;
  22. AddressV = Clamp;
  23. MinFilter = Linear;
  24. MagFilter = Linear;
  25. MipFilter = Linear;
  26. };
  27. // 1.0 / source image size
  28. float2 m_sourceImageTexelSize;
  29. // Elements:
  30. // x -> threshold
  31. // y -> (knee - 1) * threshold
  32. // z -> 2 * knee * threshold
  33. // w -> 1 / (4 * threshold * knee + EPSILON(1e-5))
  34. float4 m_thresholdConstants;
  35. groupshared float3 smColor1[256];
  36. groupshared float3 smColor2[64];
  37. float3 Threshold(float3 color)
  38. {
  39. float luminance = CalculateLuminance(color, ColorSpaceId::ACEScg);
  40. float softMin = luminance + m_thresholdConstants.y;
  41. softMin = clamp(softMin, 0.0, m_thresholdConstants.z);
  42. softMin = softMin * softMin * m_thresholdConstants.w;
  43. float weight = max(softMin, luminance - m_thresholdConstants.x);
  44. weight /= max(luminance, 1e-5);
  45. return weight * color;
  46. }
  47. }
  48. // This function is used to reorder texels in the shared memory in a way of:
  49. // -+---+---+---+---+-
  50. // | 0 | 1 | 2 | 3 | -+---+---+---+---+---+---+---+---+-
  51. // -+---+---+---+---+- ===> | 0 | 1 | 4 | 5 | 2 | 3 | 6 | 7 |
  52. // | 4 | 5 | 6 | 7 | -+---+---+---+---+---+---+---+---+-
  53. // -+---+---+---+---+-
  54. // to place elements in each 2x2 block to 4 consecutvie slots
  55. uint FlatID(uint2 xy, uint gridSize)
  56. {
  57. uint rowOffset = gridSize * (xy.y & 0xFFFFFFFE);
  58. uint rowPos = (xy.y & 1) * 2 + xy.x + (xy.x &0xFFFFFFFE);
  59. return rowOffset + rowPos;
  60. }
  61. [numthreads(16, 16, 1)]
  62. void MainCS(uint3 dID : SV_DispatchThreadID, uint3 gtID : SV_GroupThreadID)
  63. {
  64. // Group thread ID for downsampling level 2
  65. uint2 gtID2 = gtID.xy >> 1;
  66. uint2 gtID3 = gtID.xy >> 2;
  67. uint2 gtID4 = gtID.xy >> 3;
  68. // Uv of sampled point in the source image, offseted to the center of a 2x2 tap
  69. float2 uv = (dID.xy * 2 + 1) * PassSrg::m_sourceImageTexelSize;
  70. //------------------ Mip 0 (1 / 2 downscale, 16x16 tap for each thread group) -----------------
  71. // Apply threshold to extract bright pixels
  72. float3 colorDownsample1 =
  73. PassSrg::Threshold(PassSrg::m_sourceTexture.SampleLevel(PassSrg::LinearSampler, uv, 0).rgb);
  74. // Cache 16x16 tap in each thread group for following downsampling steps
  75. PassSrg::smColor1[FlatID(gtID.xy, 16)] = colorDownsample1;
  76. // Wait shared memory ready
  77. PassSrg::m_targetMipLevel0[dID.xy] = float4(colorDownsample1, 1.0);
  78. GroupMemoryBarrierWithGroupSync();
  79. //---------------------------------------------------------------------------------------------
  80. uint parity = dID.x | dID.y;
  81. //------------------ Mip 1 (1 / 4 downscale, 8x8 tap for each thread group) -------------------
  82. float3 colorDownsample2;
  83. if((parity & 1) == 0)
  84. {
  85. uint index = FlatID(gtID.xy, 16);
  86. colorDownsample2 = (colorDownsample1 + PassSrg::smColor1[index + 1] +
  87. PassSrg::smColor1[index + 2] + PassSrg::smColor1[index + 3]) * 0.25;
  88. PassSrg::smColor2[FlatID(gtID2, 8)] = colorDownsample2;
  89. PassSrg::m_targetMipLevel1[dID.xy >> 1] = float4(colorDownsample2, 1.0);
  90. }
  91. GroupMemoryBarrierWithGroupSync();
  92. //---------------------------------------------------------------------------------------------
  93. //------------------ Mip 2 (1 / 8 downscale, 4x4 tap for each thread group) -------------------
  94. float3 colorDownsample3;
  95. if((parity & 3) == 0)
  96. {
  97. uint index = FlatID(gtID2, 8);
  98. colorDownsample3 = (colorDownsample2 + PassSrg::smColor2[index + 1] +
  99. PassSrg::smColor2[index + 2] + PassSrg::smColor2[index + 3]) * 0.25;
  100. PassSrg::smColor1[FlatID(gtID3, 4)] = colorDownsample3;
  101. PassSrg::m_targetMipLevel2[dID.xy >> 2] = float4(colorDownsample3, 1.0);
  102. }
  103. GroupMemoryBarrierWithGroupSync();
  104. //---------------------------------------------------------------------------------------------
  105. //------------------ Mip 3 (1 / 16 downscale, 2x2 tap for each thread group) ------------------
  106. float3 colorDownsample4;
  107. if((parity & 7) == 0)
  108. {
  109. uint index = FlatID(gtID3, 4);
  110. colorDownsample4 = (colorDownsample3 + PassSrg::smColor1[index + 1] +
  111. PassSrg::smColor1[index + 2] + PassSrg::smColor1[index + 3]) * 0.25;
  112. PassSrg::smColor2[FlatID(gtID4, 2)] = colorDownsample4;
  113. PassSrg::m_targetMipLevel3[dID.xy >> 3] = float4(colorDownsample4, 1.0);
  114. }
  115. GroupMemoryBarrierWithGroupSync();
  116. //---------------------------------------------------------------------------------------------
  117. //------------------ Mip 4 (1 / 32 downscale, 1x1 tap for each thread group) ------------------
  118. if((gtID.x | gtID.y) == 0)
  119. {
  120. float3 colorDownsample5 = (colorDownsample4 + PassSrg::smColor2[1] +
  121. PassSrg::smColor2[2] + PassSrg::smColor2[3]) * 0.25;
  122. PassSrg::m_targetMipLevel4[dID.xy >> 4] = float4(colorDownsample5, 1.0);
  123. }
  124. //---------------------------------------------------------------------------------------------
  125. }