GenerateHistogramCS.hlsl 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. // RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
  2. // CHECK: flattenedThreadIdInGroup
  3. // CHECK: threadId
  4. // CHECK: barrier
  5. // CHECK: textureLoad
  6. // CHECK: AtomicAdd
  7. //
  8. // Copyright (c) Microsoft. All rights reserved.
  9. // This code is licensed under the MIT License (MIT).
  10. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
  11. // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
  12. // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
  13. // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
  14. //
  15. // Developed by Minigraph
  16. //
  17. // Author: James Stanard
  18. //
  19. // The group size is 16x16, but one group iterates over an entire 16-wide column of pixels (384 pixels tall)
  20. // Assuming the total workspace is 640x384, there will be 40 thread groups computing the histogram in parallel.
  21. // The histogram measures logarithmic luminance ranging from 2^-12 up to 2^4. This should provide a nice window
  22. // where the exposure would range from 2^-4 up to 2^4.
  23. #include "PostEffectsRS.hlsli"
  24. Texture2D<uint> LumaBuf : register( t0 );
  25. RWByteAddressBuffer Histogram : register( u0 );
  26. groupshared uint g_TileHistogram[256];
  27. [RootSignature(PostEffects_RootSig)]
  28. [numthreads( 16, 16, 1 )]
  29. void main( uint GI : SV_GroupIndex, uint3 DTid : SV_DispatchThreadID )
  30. {
  31. g_TileHistogram[GI] = 0;
  32. GroupMemoryBarrierWithGroupSync();
  33. // Loop 24 times until the entire column has been processed
  34. for (uint TopY = 0; TopY < 384; TopY += 16)
  35. {
  36. uint QuantizedLogLuma = LumaBuf[DTid.xy + uint2(0, TopY)];
  37. InterlockedAdd( g_TileHistogram[QuantizedLogLuma], 1 );
  38. }
  39. GroupMemoryBarrierWithGroupSync();
  40. Histogram.InterlockedAdd( GI * 4, g_TileHistogram[GI] );
  41. }