PPEyeAdaptHistogram.bsl 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #define NUM_BUCKETS (THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y)
  2. technique PPEyeAdaptHistogram
  3. {
  4. featureset = HighEnd;
  5. code
  6. {
  7. [internal]
  8. cbuffer Input
  9. {
  10. // xy - offset, zw - size
  11. uint4 gPixelOffsetAndSize;
  12. // x - histogram scale, y - histogram offset
  13. float2 gHistogramParams;
  14. uint2 gThreadGroupCount;
  15. }
  16. Texture2D gSceneColorTex;
  17. RWTexture2D<float4> gOutputTex;
  18. // Keep elements in this order as it ensures coalesced memory operations for non-random ops
  19. groupshared float sharedData[NUM_BUCKETS][THREADGROUP_SIZE_X][THREADGROUP_SIZE_Y];
  20. float calcHistogramPos(float luminance)
  21. {
  22. return saturate(log2(luminance) * gHistogramParams.x + gHistogramParams.y);
  23. }
  24. [numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)]
  25. void csmain(
  26. uint3 groupId : SV_GroupID,
  27. uint3 groupThreadId : SV_GroupThreadID,
  28. uint3 dispatchThreadId : SV_DispatchThreadID,
  29. uint threadIndex : SV_GroupIndex)
  30. {
  31. // Clear everything
  32. for(uint i = 0; i < NUM_BUCKETS; i++)
  33. sharedData[i][groupThreadId.x][groupThreadId.y] = 0.0f;
  34. GroupMemoryBarrierWithGroupSync();
  35. // Sort all pixel luminance for the current thread into histogram buckets
  36. uint2 tileSize = uint2(LOOP_COUNT_X, LOOP_COUNT_Y);
  37. uint2 maxExtent = gPixelOffsetAndSize.xy + gPixelOffsetAndSize.zw;
  38. uint2 tileStart = dispatchThreadId.xy * tileSize + gPixelOffsetAndSize.xy;
  39. for(uint y = 0; y < LOOP_COUNT_Y; y++)
  40. {
  41. uint2 texelPos = tileStart + uint2(0, y);
  42. if(texelPos.y > maxExtent.y)
  43. break;
  44. for(uint x = 0; x < LOOP_COUNT_X; x++)
  45. {
  46. if(texelPos.x > maxExtent.x)
  47. break;
  48. float4 hdrColor = gSceneColorTex.Load(int3(texelPos, 0));
  49. float luminance = dot(hdrColor.rgb, float3(0.299f, 0.587f, 0.114f)); // TODO - Perhaps just use max() of all values?
  50. float histogramPos = calcHistogramPos(luminance);
  51. float bucket = histogramPos * (NUM_BUCKETS - 1) * 0.9999f;
  52. uint bucketAIdx = (uint)bucket;
  53. uint bucketBIdx = bucketAIdx + 1;
  54. float weightB = frac(bucket);
  55. float weightA = 1.0f - weightB;
  56. if(bucketAIdx != 0)
  57. sharedData[bucketAIdx][groupThreadId.x][groupThreadId.y] += weightA;
  58. sharedData[bucketBIdx][groupThreadId.x][groupThreadId.y] += weightB;
  59. texelPos.x++;
  60. }
  61. }
  62. GroupMemoryBarrierWithGroupSync();
  63. // Accumulate bucketed values from all threads in the group
  64. if(threadIndex < (NUM_BUCKETS / 4))
  65. {
  66. float4 sum = 0.0f;
  67. for(uint y = 0; y < THREADGROUP_SIZE_Y; y++)
  68. {
  69. for(uint x = 0; x < THREADGROUP_SIZE_X; x++)
  70. {
  71. sum += float4(
  72. sharedData[threadIndex * 4 + 0][x][y],
  73. sharedData[threadIndex * 4 + 1][x][y],
  74. sharedData[threadIndex * 4 + 2][x][y],
  75. sharedData[threadIndex * 4 + 3][x][y]
  76. );
  77. }
  78. }
  79. // Normalize and output histogram for the group (single line per group)
  80. float groupArea = THREADGROUP_SIZE_X * LOOP_COUNT_X * THREADGROUP_SIZE_Y * LOOP_COUNT_Y;
  81. gOutputTex[uint2(threadIndex, groupId.x + groupId.y * gThreadGroupCount.x)] = sum / groupArea;
  82. }
  83. }
  84. };
  85. };