UpsampleAndBlurCS.hlsl 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. // RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
  2. // CHECK: groupId
  3. // CHECK: threadIdInGroup
  4. // CHECK: threadId
  5. // CHECK: sampleLevel
  6. // CHECK: textureLoad
  7. // CHECK: barrier
  8. // CHECK: textureStore
  9. //
  10. // Copyright (c) Microsoft. All rights reserved.
  11. // This code is licensed under the MIT License (MIT).
  12. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
  13. // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
  14. // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
  15. // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
  16. //
  17. // Developed by Minigraph
  18. //
  19. // Author: James Stanard
  20. //
  21. // The CS for combining a lower resolution bloom buffer with a higher resolution buffer
  22. // (via bilinear upsampling) and then guassian blurring the resultant buffer.
  23. //
  24. // For the intended bloom blurring algorithm, it is expected that this shader will be
  25. // used repeatedly to upsample and blur successively higher resolutions until the final
  26. // bloom buffer is the destination.
  27. //
  28. #include "PostEffectsRS.hlsli"
  29. Texture2D<float3> HigherResBuf : register( t0 );
  30. Texture2D<float3> LowerResBuf : register( t1 );
  31. SamplerState LinearBorder : register( s1 );
  32. RWTexture2D<float3> Result : register( u0 );
  33. cbuffer cb0 : register(b0)
  34. {
  35. float2 g_inverseDimensions;
  36. float g_upsampleBlendFactor;
  37. }
  38. // The guassian blur weights (derived from Pascal's triangle)
  39. static const float Weights5[3] = { 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f };
  40. static const float Weights7[4] = { 20.0f / 64.0f, 15.0f / 64.0f, 6.0f / 64.0f, 1.0f / 64.0f };
  41. static const float Weights9[5] = { 70.0f / 256.0f, 56.0f / 256.0f, 28.0f / 256.0f, 8.0f / 256.0f, 1.0f / 256.0f };
  42. float3 Blur5( float3 a, float3 b, float3 c, float3 d, float3 e, float3 f, float3 g, float3 h, float3 i )
  43. {
  44. return Weights5[0]*e + Weights5[1]*(d+f) + Weights5[2]*(c+g);
  45. }
  46. float3 Blur7( float3 a, float3 b, float3 c, float3 d, float3 e, float3 f, float3 g, float3 h, float3 i )
  47. {
  48. return Weights7[0]*e + Weights7[1]*(d+f) + Weights7[2]*(c+g) + Weights7[3]*(b+h);
  49. }
  50. float3 Blur9( float3 a, float3 b, float3 c, float3 d, float3 e, float3 f, float3 g, float3 h, float3 i )
  51. {
  52. return Weights9[0]*e + Weights9[1]*(d+f) + Weights9[2]*(c+g) + Weights9[3]*(b+h) + Weights9[4]*(a+i);
  53. }
  54. #define BlurPixels Blur9
  55. // 16x16 pixels with an 8x8 center that we will be blurring writing out. Each uint is two color channels packed together
  56. groupshared uint CacheR[128];
  57. groupshared uint CacheG[128];
  58. groupshared uint CacheB[128];
  59. void Store2Pixels( uint index, float3 pixel1, float3 pixel2 )
  60. {
  61. CacheR[index] = f32tof16(pixel1.r) | f32tof16(pixel2.r) << 16;
  62. CacheG[index] = f32tof16(pixel1.g) | f32tof16(pixel2.g) << 16;
  63. CacheB[index] = f32tof16(pixel1.b) | f32tof16(pixel2.b) << 16;
  64. }
  65. void Load2Pixels( uint index, out float3 pixel1, out float3 pixel2 )
  66. {
  67. uint3 RGB = uint3(CacheR[index], CacheG[index], CacheB[index]);
  68. pixel1 = f16tof32(RGB);
  69. pixel2 = f16tof32(RGB >> 16);
  70. }
  71. void Store1Pixel( uint index, float3 pixel )
  72. {
  73. CacheR[index] = asuint(pixel.r);
  74. CacheG[index] = asuint(pixel.g);
  75. CacheB[index] = asuint(pixel.b);
  76. }
  77. void Load1Pixel( uint index, out float3 pixel )
  78. {
  79. pixel = asfloat( uint3(CacheR[index], CacheG[index], CacheB[index]) );
  80. }
  81. // Blur two pixels horizontally. This reduces LDS reads and pixel unpacking.
  82. void BlurHorizontally( uint outIndex, uint leftMostIndex )
  83. {
  84. float3 s0, s1, s2, s3, s4, s5, s6, s7, s8, s9;
  85. Load2Pixels( leftMostIndex + 0, s0, s1 );
  86. Load2Pixels( leftMostIndex + 1, s2, s3 );
  87. Load2Pixels( leftMostIndex + 2, s4, s5 );
  88. Load2Pixels( leftMostIndex + 3, s6, s7 );
  89. Load2Pixels( leftMostIndex + 4, s8, s9 );
  90. Store1Pixel(outIndex , BlurPixels(s0, s1, s2, s3, s4, s5, s6, s7, s8));
  91. Store1Pixel(outIndex+1, BlurPixels(s1, s2, s3, s4, s5, s6, s7, s8, s9));
  92. }
  93. void BlurVertically( uint2 pixelCoord, uint topMostIndex )
  94. {
  95. float3 s0, s1, s2, s3, s4, s5, s6, s7, s8;
  96. Load1Pixel( topMostIndex , s0 );
  97. Load1Pixel( topMostIndex+ 8, s1 );
  98. Load1Pixel( topMostIndex+16, s2 );
  99. Load1Pixel( topMostIndex+24, s3 );
  100. Load1Pixel( topMostIndex+32, s4 );
  101. Load1Pixel( topMostIndex+40, s5 );
  102. Load1Pixel( topMostIndex+48, s6 );
  103. Load1Pixel( topMostIndex+56, s7 );
  104. Load1Pixel( topMostIndex+64, s8 );
  105. Result[pixelCoord] = BlurPixels(s0, s1, s2, s3, s4, s5, s6, s7, s8);
  106. }
  107. [RootSignature(PostEffects_RootSig)]
  108. [numthreads( 8, 8, 1 )]
  109. void main( uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID )
  110. {
  111. //
  112. // Load 4 pixels per thread into LDS
  113. //
  114. int2 GroupUL = (Gid.xy << 3) - 4; // Upper-left pixel coordinate of group read location
  115. int2 ThreadUL = (GTid.xy << 1) + GroupUL; // Upper-left pixel coordinate of quad that this thread will read
  116. //
  117. // Store 4 blended-but-unblurred pixels in LDS
  118. //
  119. float2 uvUL = (float2(ThreadUL) + 0.5) * g_inverseDimensions;
  120. float2 uvLR = uvUL + g_inverseDimensions;
  121. float2 uvUR = float2(uvLR.x, uvUL.y);
  122. float2 uvLL = float2(uvUL.x, uvLR.y);
  123. int destIdx = GTid.x + (GTid.y << 4);
  124. float3 pixel1a = lerp(HigherResBuf[ThreadUL + uint2(0, 0)], LowerResBuf.SampleLevel(LinearBorder, uvUL, 0.0f), g_upsampleBlendFactor);
  125. float3 pixel1b = lerp(HigherResBuf[ThreadUL + uint2(1, 0)], LowerResBuf.SampleLevel(LinearBorder, uvUR, 0.0f), g_upsampleBlendFactor);
  126. Store2Pixels(destIdx+0, pixel1a, pixel1b);
  127. float3 pixel2a = lerp(HigherResBuf[ThreadUL + uint2(0, 1)], LowerResBuf.SampleLevel(LinearBorder, uvLL, 0.0f), g_upsampleBlendFactor);
  128. float3 pixel2b = lerp(HigherResBuf[ThreadUL + uint2(1, 1)], LowerResBuf.SampleLevel(LinearBorder, uvLR, 0.0f), g_upsampleBlendFactor);
  129. Store2Pixels(destIdx+8, pixel2a, pixel2b);
  130. GroupMemoryBarrierWithGroupSync();
  131. //
  132. // Horizontally blur the pixels in Cache
  133. //
  134. uint row = GTid.y << 4;
  135. BlurHorizontally(row + (GTid.x << 1), row + GTid.x + (GTid.x & 4));
  136. GroupMemoryBarrierWithGroupSync();
  137. //
  138. // Vertically blur the pixels and write the result to memory
  139. //
  140. BlurVertically(DTid.xy, (GTid.y << 3) + GTid.x);
  141. }