| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- #define NUM_BUCKETS (THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y)
- technique PPEyeAdaptHistogram
- {
- code
- {
- [internal]
- cbuffer Input
- {
- // xy - offset, zw - size
- uint4 gPixelOffsetAndSize;
-
- // x - histogram scale, y - histogram offset
- float2 gHistogramParams;
- uint2 gThreadGroupCount;
- }
-
- Texture2D gSceneColorTex;
- RWTexture2D<float4> gOutputTex;
-
- // Keep elements in this order as it ensures coalesced memory operations for non-random ops
- groupshared float sharedData[NUM_BUCKETS][THREADGROUP_SIZE_X][THREADGROUP_SIZE_Y];
-
- float calcHistogramPos(float luminance)
- {
- return saturate(log2(luminance) * gHistogramParams.x + gHistogramParams.y);
- }
-
- [numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)]
- void csmain(
- uint3 groupId : SV_GroupID,
- uint3 groupThreadId : SV_GroupThreadID,
- uint3 dispatchThreadId : SV_DispatchThreadID,
- uint threadIndex : SV_GroupIndex)
- {
- // Clear everything
- for(uint i = 0; i < NUM_BUCKETS; i++)
- sharedData[i][groupThreadId.x][groupThreadId.y] = 0.0f;
-
- GroupMemoryBarrierWithGroupSync();
-
- // Sort all pixel luminance for the current thread into histogram buckets
- uint2 tileSize = uint2(LOOP_COUNT_X, LOOP_COUNT_Y);
- uint2 maxExtent = gPixelOffsetAndSize.xy + gPixelOffsetAndSize.zw;
-
- uint2 tileStart = dispatchThreadId.xy * tileSize + gPixelOffsetAndSize.xy;
- for(uint y = 0; y < LOOP_COUNT_Y; y++)
- {
- uint2 texelPos = tileStart + uint2(0, y);
- if(texelPos.y > maxExtent.y)
- break;
-
- for(uint x = 0; x < LOOP_COUNT_X; x++)
- {
- if(texelPos.x > maxExtent.x)
- break;
-
- float4 hdrColor = gSceneColorTex.Load(int3(texelPos, 0));
- float luminance = dot(hdrColor.rgb, float3(0.299f, 0.587f, 0.114f)); // TODO - Perhaps just use max() of all values?
-
- float histogramPos = calcHistogramPos(luminance);
- float bucket = histogramPos * (NUM_BUCKETS - 1) * 0.9999f;
-
- uint bucketAIdx = (uint)bucket;
- uint bucketBIdx = bucketAIdx + 1;
-
- float weightB = frac(bucket);
- float weightA = 1.0f - weightB;
-
- if(bucketAIdx != 0)
- sharedData[bucketAIdx][groupThreadId.x][groupThreadId.y] += weightA;
-
- sharedData[bucketBIdx][groupThreadId.x][groupThreadId.y] += weightB;
-
- texelPos.x++;
- }
- }
-
- GroupMemoryBarrierWithGroupSync();
-
- // Accumulate bucketed values from all threads in the group
- if(threadIndex < (NUM_BUCKETS / 4))
- {
- float4 sum = 0.0f;
- for(uint y = 0; y < THREADGROUP_SIZE_Y; y++)
- {
- for(uint x = 0; x < THREADGROUP_SIZE_X; x++)
- {
- sum += float4(
- sharedData[threadIndex * 4 + 0][x][y],
- sharedData[threadIndex * 4 + 1][x][y],
- sharedData[threadIndex * 4 + 2][x][y],
- sharedData[threadIndex * 4 + 3][x][y]
- );
- }
- }
-
- // Normalize and output histogram for the group (single line per group)
- float groupArea = THREADGROUP_SIZE_X * LOOP_COUNT_X * THREADGROUP_SIZE_Y * LOOP_COUNT_Y;
- gOutputTex[uint2(threadIndex, groupId.x + groupId.y * gThreadGroupCount.x)] = sum / groupArea;
-
- }
- }
- };
- };
|