PPEyeAdaptHistogram.bsl 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. Parameters =
  2. {
  3. Texture2D gSceneColorTex;
  4. RWTexture2D gOutputTex;
  5. };
  6. Blocks =
  7. {
  8. Block Input;
  9. };
  10. #define NUM_BUCKETS (THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y)
  11. Technique =
  12. {
  13. Language = "HLSL11";
  14. Pass =
  15. {
  16. Compute =
  17. {
  18. cbuffer Input
  19. {
  20. // xy - offset, zw - size
  21. uint4 gPixelOffsetAndSize;
  22. // x - histogram scale, y - histogram offset
  23. float2 gHistogramParams;
  24. uint2 gThreadGroupCount;
  25. }
  26. Texture2D gSceneColorTex;
  27. RWTexture2D<float4> gOutputTex;
  28. // Keep elements in this order as it ensures coalesced memory operations for non-random ops
  29. groupshared float sharedData[NUM_BUCKETS][THREADGROUP_SIZE_X][THREADGROUP_SIZE_Y];
  30. float calcHistogramPos(float luminance)
  31. {
  32. return saturate(log2(luminance) * gHistogramParams.x + gHistogramParams.y);
  33. }
  34. [numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)]
  35. void main(
  36. uint3 groupId : SV_GroupID,
  37. uint3 groupThreadId : SV_GroupThreadID,
  38. uint3 dispatchThreadId : SV_DispatchThreadID,
  39. uint threadIndex : SV_GroupIndex)
  40. {
  41. // Clear everything
  42. for(uint i = 0; i < NUM_BUCKETS; i++)
  43. sharedData[i][groupThreadId.x][groupThreadId.y] = 0.0f;
  44. GroupMemoryBarrierWithGroupSync();
  45. // Sort all pixel luminance for the current thread into histogram buckets
  46. uint2 tileSize = uint2(LOOP_COUNT_X, LOOP_COUNT_Y);
  47. uint2 maxExtent = gPixelOffsetAndSize.xy + gPixelOffsetAndSize.zw;
  48. uint2 tileStart = dispatchThreadId.xy * tileSize + gPixelOffsetAndSize.xy;
  49. for(uint y = 0; y < LOOP_COUNT_Y; y++)
  50. {
  51. uint2 texelPos = tileStart + uint2(0, y);
  52. if(texelPos.y > maxExtent.y)
  53. break;
  54. for(uint x = 0; x < LOOP_COUNT_X; x++)
  55. {
  56. if(texelPos.x > maxExtent.x)
  57. break;
  58. float4 hdrColor = gSceneColorTex.Load(int3(texelPos, 0));
  59. float luminance = dot(hdrColor.rgb, float3(0.299f, 0.587f, 0.114f)); // TODO - Perhaps just use max() of all values?
  60. float histogramPos = calcHistogramPos(luminance);
  61. float bucket = histogramPos * (NUM_BUCKETS - 1) * 0.9999f;
  62. uint bucketAIdx = (uint)bucket;
  63. uint bucketBIdx = bucketAIdx + 1;
  64. float weightB = frac(bucket);
  65. float weightA = 1.0f - weightB;
  66. if(bucketAIdx != 0)
  67. sharedData[bucketAIdx][groupThreadId.x][groupThreadId.y] += weightA;
  68. sharedData[bucketBIdx][groupThreadId.x][groupThreadId.y] += weightB;
  69. texelPos.x++;
  70. }
  71. }
  72. GroupMemoryBarrierWithGroupSync();
  73. // Accumulate bucketed values from all threads in the group
  74. if(threadIndex < (NUM_BUCKETS / 4))
  75. {
  76. float4 sum = 0.0f;
  77. for(uint y = 0; y < THREADGROUP_SIZE_Y; y++)
  78. {
  79. for(uint x = 0; x < THREADGROUP_SIZE_X; x++)
  80. {
  81. sum += float4(
  82. sharedData[threadIndex * 4 + 0][x][y],
  83. sharedData[threadIndex * 4 + 1][x][y],
  84. sharedData[threadIndex * 4 + 2][x][y],
  85. sharedData[threadIndex * 4 + 3][x][y]
  86. );
  87. }
  88. }
  89. // Normalize and output histogram for the group (single line per group)
  90. float groupArea = THREADGROUP_SIZE_X * LOOP_COUNT_X * THREADGROUP_SIZE_Y * LOOP_COUNT_Y;
  91. gOutputTex[uint2(threadIndex, groupId.x + groupId.y * gThreadGroupCount.x)] = sum / groupArea;
  92. }
  93. }
  94. };
  95. };
  96. };
  97. Technique =
  98. {
  99. Language = "GLSL";
  100. Pass =
  101. {
  102. Compute =
  103. {
  104. layout (local_size_x = THREADGROUP_SIZE_X, local_size_y = THREADGROUP_SIZE_Y) in;
  105. uniform Input
  106. {
  107. // xy - offset, zw - size
  108. uvec4 gPixelOffsetAndSize;
  109. // x - histogram scale, y - histogram offset
  110. vec2 gHistogramParams;
  111. uvec2 gThreadGroupCount;
  112. };
  113. uniform sampler2D gSceneColorTex;
  114. layout (rgba16f) uniform image2D gOutputTex;
  115. // Keep elements in this order as it ensures coalesced memory operations for non-random ops
  116. shared float sharedData[NUM_BUCKETS][THREADGROUP_SIZE_X][THREADGROUP_SIZE_Y];
  117. void calcHistogramPos(float luminance, out float result)
  118. {
  119. result = clamp(log2(luminance) * gHistogramParams.x + gHistogramParams.y, 0.0f, 1.0f);
  120. }
  121. void main()
  122. {
  123. // Clear everything
  124. for(uint i = 0; i < NUM_BUCKETS; i++)
  125. sharedData[i][gl_LocalInvocationID.x][gl_LocalInvocationID.y] = 0.0f;
  126. groupMemoryBarrier();
  127. barrier();
  128. // Sort all pixel luminance for the current thread into histogram buckets
  129. uvec2 tileSize = uvec2(LOOP_COUNT_X, LOOP_COUNT_Y);
  130. uvec2 maxExtent = gPixelOffsetAndSize.xy + gPixelOffsetAndSize.zw;
  131. uvec2 tileStart = gl_GlobalInvocationID.xy * tileSize + gPixelOffsetAndSize.xy;
  132. for(uint y = 0; y < LOOP_COUNT_Y; y++)
  133. {
  134. uvec2 texelPos = tileStart + uvec2(0, y);
  135. if(texelPos.y > maxExtent.y)
  136. break;
  137. for(uint x = 0; x < LOOP_COUNT_X; x++)
  138. {
  139. if(texelPos.x > maxExtent.x)
  140. break;
  141. vec4 hdrColor = texelFetch(gSceneColorTex, ivec2(texelPos), 0);
  142. float luminance = dot(hdrColor.rgb, vec3(0.299f, 0.587f, 0.114f)); // TODO - Perhaps just use max() of all values?
  143. float histogramPos;
  144. calcHistogramPos(luminance, histogramPos);
  145. float bucket = histogramPos * (NUM_BUCKETS - 1) * 0.9999f;
  146. uint bucketAIdx = uint(bucket);
  147. uint bucketBIdx = bucketAIdx + 1;
  148. float weightB = fract(bucket);
  149. float weightA = 1.0f - weightB;
  150. if(bucketAIdx != 0)
  151. sharedData[bucketAIdx][gl_LocalInvocationID.x][gl_LocalInvocationID.y] += weightA;
  152. sharedData[bucketBIdx][gl_LocalInvocationID.x][gl_LocalInvocationID.y] += weightB;
  153. texelPos.x++;
  154. }
  155. }
  156. groupMemoryBarrier();
  157. barrier();
  158. // Accumulate bucketed values from all threads in the group
  159. if(gl_LocalInvocationIndex < (NUM_BUCKETS / 4))
  160. {
  161. vec4 sum = vec4(0.0f);
  162. for(uint y = 0; y < THREADGROUP_SIZE_Y; y++)
  163. {
  164. for(uint x = 0; x < THREADGROUP_SIZE_X; x++)
  165. {
  166. sum += vec4(
  167. sharedData[gl_LocalInvocationIndex * 4 + 0][x][y],
  168. sharedData[gl_LocalInvocationIndex * 4 + 1][x][y],
  169. sharedData[gl_LocalInvocationIndex * 4 + 2][x][y],
  170. sharedData[gl_LocalInvocationIndex * 4 + 3][x][y]
  171. );
  172. }
  173. }
  174. // Normalize and output histogram for the group (single line per group)
  175. float groupArea = THREADGROUP_SIZE_X * LOOP_COUNT_X * THREADGROUP_SIZE_Y * LOOP_COUNT_Y;
  176. ivec2 outCoords = ivec2(gl_LocalInvocationIndex, gl_WorkGroupID.x + gl_WorkGroupID.y * gThreadGroupCount.x);
  177. imageStore(gOutputTex, outCoords, sum / groupArea);
  178. }
  179. }
  180. };
  181. };
  182. };