BlurCS.hlsl 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. // RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
  2. // RUN: %dxc -E main -T cs_6_0 %s | %D3DReflect %s | FileCheck -check-prefix=REFL %s
  3. // CHECK: groupId
  4. // CHECK: threadIdInGroup
  5. // CHECK: threadId
  6. // CHECK: barrier
  7. // CHECK: addrspace(3)
  8. //
  9. // Copyright (c) Microsoft. All rights reserved.
  10. // This code is licensed under the MIT License (MIT).
  11. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
  12. // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
  13. // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
  14. // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
  15. //
  16. // Developed by Minigraph
  17. //
  18. // Author: James Stanard
  19. //
  20. // The CS for guassian blurring a single RGB buffer.
  21. //
  22. // For the intended bloom blurring algorithm, this shader is expected to be used only on
  23. // the lowest resolution bloom buffer before starting the series of upsample-and-blur
  24. // passes.
  25. #include "PostEffectsRS.hlsli"
  26. Texture2D<float3> InputBuf : register( t0 );
  27. RWTexture2D<float3> Result : register( u0 );
  28. cbuffer cb0 : register(b0)
  29. {
  30. float2 g_inverseDimensions;
  31. }
  32. // The guassian blur weights (derived from Pascal's triangle)
  33. static const float Weights[5] = { 70.0f / 256.0f, 56.0f / 256.0f, 28.0f / 256.0f, 8.0f / 256.0f, 1.0f / 256.0f };
  34. float3 BlurPixels( float3 a, float3 b, float3 c, float3 d, float3 e, float3 f, float3 g, float3 h, float3 i )
  35. {
  36. return Weights[0]*e + Weights[1]*(d+f) + Weights[2]*(c+g) + Weights[3]*(b+h) + Weights[4]*(a+i);
  37. }
  38. // 16x16 pixels with an 8x8 center that we will be blurring writing out. Each uint is two color channels packed together
  39. groupshared uint CacheR[128];
  40. groupshared uint CacheG[128];
  41. groupshared uint CacheB[128];
  42. void Store2Pixels( uint index, float3 pixel1, float3 pixel2 )
  43. {
  44. CacheR[index] = f32tof16(pixel1.r) | f32tof16(pixel2.r) << 16;
  45. CacheG[index] = f32tof16(pixel1.g) | f32tof16(pixel2.g) << 16;
  46. CacheB[index] = f32tof16(pixel1.b) | f32tof16(pixel2.b) << 16;
  47. }
  48. void Load2Pixels( uint index, out float3 pixel1, out float3 pixel2 )
  49. {
  50. uint rr = CacheR[index];
  51. uint gg = CacheG[index];
  52. uint bb = CacheB[index];
  53. pixel1 = float3( f16tof32(rr ), f16tof32(gg ), f16tof32(bb ) );
  54. pixel2 = float3( f16tof32(rr >> 16), f16tof32(gg >> 16), f16tof32(bb >> 16) );
  55. }
  56. void Store1Pixel( uint index, float3 pixel )
  57. {
  58. CacheR[index] = asuint(pixel.r);
  59. CacheG[index] = asuint(pixel.g);
  60. CacheB[index] = asuint(pixel.b);
  61. }
  62. void Load1Pixel( uint index, out float3 pixel )
  63. {
  64. pixel = asfloat( uint3(CacheR[index], CacheG[index], CacheB[index]) );
  65. }
  66. // Blur two pixels horizontally. This reduces LDS reads and pixel unpacking.
  67. void BlurHorizontally( uint outIndex, uint leftMostIndex )
  68. {
  69. float3 s0, s1, s2, s3, s4, s5, s6, s7, s8, s9;
  70. Load2Pixels( leftMostIndex + 0, s0, s1 );
  71. Load2Pixels( leftMostIndex + 1, s2, s3 );
  72. Load2Pixels( leftMostIndex + 2, s4, s5 );
  73. Load2Pixels( leftMostIndex + 3, s6, s7 );
  74. Load2Pixels( leftMostIndex + 4, s8, s9 );
  75. Store1Pixel(outIndex , BlurPixels(s0, s1, s2, s3, s4, s5, s6, s7, s8));
  76. Store1Pixel(outIndex+1, BlurPixels(s1, s2, s3, s4, s5, s6, s7, s8, s9));
  77. }
  78. void BlurVertically( uint2 pixelCoord, uint topMostIndex )
  79. {
  80. float3 s0, s1, s2, s3, s4, s5, s6, s7, s8;
  81. Load1Pixel( topMostIndex , s0 );
  82. Load1Pixel( topMostIndex+ 8, s1 );
  83. Load1Pixel( topMostIndex+16, s2 );
  84. Load1Pixel( topMostIndex+24, s3 );
  85. Load1Pixel( topMostIndex+32, s4 );
  86. Load1Pixel( topMostIndex+40, s5 );
  87. Load1Pixel( topMostIndex+48, s6 );
  88. Load1Pixel( topMostIndex+56, s7 );
  89. Load1Pixel( topMostIndex+64, s8 );
  90. Result[pixelCoord] = BlurPixels(s0, s1, s2, s3, s4, s5, s6, s7, s8);
  91. }
  92. [RootSignature(PostEffects_RootSig)]
  93. [numthreads( 8, 8, 1 )]
  94. void main( uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID )
  95. {
  96. //
  97. // Load 4 pixels per thread into LDS
  98. //
  99. int2 GroupUL = (Gid.xy << 3) - 4; // Upper-left pixel coordinate of group read location
  100. int2 ThreadUL = (GTid.xy << 1) + GroupUL; // Upper-left pixel coordinate of quad that this thread will read
  101. //
  102. // Store 4 unblurred pixels in LDS
  103. //
  104. int destIdx = GTid.x + (GTid.y << 4);
  105. Store2Pixels(destIdx+0, InputBuf[ThreadUL + uint2(0, 0)], InputBuf[ThreadUL + uint2(1, 0)]);
  106. Store2Pixels(destIdx+8, InputBuf[ThreadUL + uint2(0, 1)], InputBuf[ThreadUL + uint2(1, 1)]);
  107. GroupMemoryBarrierWithGroupSync();
  108. //
  109. // Horizontally blur the pixels in Cache
  110. //
  111. uint row = GTid.y << 4;
  112. BlurHorizontally(row + (GTid.x << 1), row + GTid.x + (GTid.x & 4));
  113. GroupMemoryBarrierWithGroupSync();
  114. //
  115. // Vertically blur the pixels and write the result to memory
  116. //
  117. BlurVertically(DTid.xy, (GTid.y << 3) + GTid.x);
  118. }
  119. // Note: TGSM is counted as part of temp array for now.
  120. // REFL: TempArrayCount: 1536
  121. // REFL: DynamicFlowControlCount: 0
  122. // REFL: ArrayInstructionCount: 54
  123. // REFL: TextureLoadInstructions: 4
  124. // REFL: TextureCompInstructions: 0
  125. // REFL: TextureBiasInstructions: 0
  126. // REFL: TextureGradientInstructions: 0
  127. // REFL: CutInstructionCount: 0
  128. // REFL: EmitInstructionCount: 0
  129. // REFL: cBarrierInstructions: 2
  130. // REFL: cInterlockedInstructions: 0
  131. // REFL: cTextureStoreInstructions: 1