ParticleTileRenderCS.hlsl 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. // RUN: %dxc -E main -T cs_6_0 -O0 %s | FileCheck %s
  2. // CHECK: groupId
  3. // CHECK: threadIdInGroup
  4. // CHECK: bufferLoad
  5. // CHECK: textureLoad
  6. // CHECK: textureGather
  7. // CHECK: FirstbitLo
  8. // CHECK: Saturate
  9. // CHECK: sampleLevel
  10. // CHECK: textureLoad
  11. // CHECK: textureStore
  12. //
  13. // Copyright (c) Microsoft. All rights reserved.
  14. // This code is licensed under the MIT License (MIT).
  15. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
  16. // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
  17. // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
  18. // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
  19. //
  20. // Developed by Minigraph
  21. //
  22. // Author: James Stanard
  23. // Alex Nankervis
  24. // Julia Careaga
  25. //
  26. #include "ParticleUtility.hlsli"
  27. //#define DEBUG_LOW_RES
  28. #define ALPHA_THRESHOLD (252.0 / 255.0)
  29. cbuffer CB : register(b0)
  30. {
  31. float gDynamicResLevel;
  32. float gMipBias;
  33. };
  34. RWTexture2D<float3> g_OutputColorBuffer : register(u0);
  35. StructuredBuffer<ParticleScreenData> g_VisibleParticles : register(t0);
  36. ByteAddressBuffer g_HitMask : register(t1);
  37. Texture2DArray<float4> g_TexArray : register(t2);
  38. Texture2D<float3> g_InputColorBuffer : register(t3);
  39. StructuredBuffer<uint> g_SortedParticles : register(t5);
  40. #ifndef DISABLE_DEPTH_TESTS
  41. Texture2D<float> g_InputDepthBuffer : register(t4);
  42. StructuredBuffer<uint> g_DrawPackets : register(t6);
  43. Texture2D<uint> g_TileDepthBounds : register(t8);
  44. #else
  45. StructuredBuffer<uint> g_DrawPackets : register(t7);
  46. #endif
  47. float4 SampleParticleColor( ParticleScreenData Particle, SamplerState Sampler, float2 UV, float LevelBias )
  48. {
  49. float LOD = Particle.TextureLevel + LevelBias;
  50. float4 Color = g_TexArray.SampleLevel( Sampler, float3(UV, Particle.TextureIndex), LOD);
  51. // Multiply texture RGB with alpha. Pre-multiplied alpha blending also permits additive blending.
  52. Color.rgb *= Color.a;
  53. return Color * Particle.Color;
  54. }
  55. void BlendPixel( inout float4 Dst, float4 Src, float Mask )
  56. {
  57. Dst += Src * (1.0 - Dst.a) * Mask;
  58. }
  59. void BlendHighRes( inout float4x4 Quad, ParticleScreenData Particle, float2 PixelCoord, float4 Mask = 1 )
  60. {
  61. float2 UV = (PixelCoord - Particle.Corner) * Particle.RcpSize;
  62. float2 dUV = 0.5 * gRcpBufferDim * Particle.RcpSize;
  63. float2 UV1 = UV - dUV;
  64. float2 UV2 = UV + dUV;
  65. #if defined(DYNAMIC_RESOLUTION)
  66. // Use point sampling for high-res rendering because this implies we're not rendering
  67. // with the most detailed mip level anyway.
  68. SamplerState Sampler = gSampPointBorder;
  69. float LevelBias = gMipBias;
  70. #else
  71. SamplerState Sampler = gSampLinearBorder;
  72. float LevelBias = 0.0;
  73. #endif
  74. BlendPixel(Quad[0], SampleParticleColor(Particle, Sampler, float2(UV1.x, UV2.y), LevelBias), Mask.x);
  75. BlendPixel(Quad[1], SampleParticleColor(Particle, Sampler, float2(UV2.x, UV2.y), LevelBias), Mask.y);
  76. BlendPixel(Quad[2], SampleParticleColor(Particle, Sampler, float2(UV2.x, UV1.y), LevelBias), Mask.z);
  77. BlendPixel(Quad[3], SampleParticleColor(Particle, Sampler, float2(UV1.x, UV1.y), LevelBias), Mask.w);
  78. }
  79. void BlendLowRes( inout float4x4 Quad, ParticleScreenData Particle, float2 PixelCoord, float4 Mask = 1 )
  80. {
  81. float2 UV = (PixelCoord - Particle.Corner) * Particle.RcpSize;
  82. float4 Color = SampleParticleColor(Particle, gSampLinearBorder, UV, 1.0);
  83. #ifdef DEBUG_LOW_RES
  84. Color.g *= 0.5;
  85. #endif
  86. BlendPixel(Quad[0], Color, Mask.x);
  87. BlendPixel(Quad[1], Color, Mask.y);
  88. BlendPixel(Quad[2], Color, Mask.z);
  89. BlendPixel(Quad[3], Color, Mask.w);
  90. }
  91. void WriteBlendedColor( uint2 ST, float4 Color )
  92. {
  93. g_OutputColorBuffer[ST] = Color.rgb + g_InputColorBuffer[ST] * (1.0 - Color.a);
  94. }
  95. void WriteBlendedQuad( uint2 ST, float4x4 Quad )
  96. {
  97. WriteBlendedColor(ST + uint2(0, 0), Quad[3]);
  98. WriteBlendedColor(ST + uint2(1, 0), Quad[2]);
  99. WriteBlendedColor(ST + uint2(1, 1), Quad[1]);
  100. WriteBlendedColor(ST + uint2(0, 1), Quad[0]);
  101. }
  102. float4x4 RenderParticles( uint2 TileCoord, uint2 ST, uint NumParticles, uint HitMaskStart, uint BinStart )
  103. {
  104. #ifndef DISABLE_DEPTH_TESTS
  105. const uint TileNearZ = g_TileDepthBounds[TileCoord] << 18;
  106. float4 Depths = g_InputDepthBuffer.Gather(gSampPointClamp, (ST + 1) * gRcpBufferDim);
  107. #endif
  108. // VGPR
  109. float4x4 Quad = 0.0;
  110. const float2 PixelCoord = (ST + 1) * gRcpBufferDim;
  111. uint BlendedParticles = 0;
  112. while (BlendedParticles < NumParticles)
  113. {
  114. for (uint ParticleMask = g_HitMask.Load(HitMaskStart); ParticleMask != 0; ++BlendedParticles)
  115. {
  116. // Get the next bit and then clear it
  117. uint SubIdx = firstbitlow(ParticleMask);
  118. ParticleMask ^= 1 << SubIdx;
  119. // Get global particle index from sorted buffer and then load the particle
  120. uint SortKey = g_SortedParticles[BinStart + SubIdx];
  121. uint ParticleIdx = SortKey & 0x3FFFF;
  122. ParticleScreenData Particle = g_VisibleParticles[ParticleIdx];
  123. #if defined(DYNAMIC_RESOLUTION)
  124. bool DoFullRes = (Particle.TextureLevel > gDynamicResLevel);
  125. #elif defined(LOW_RESOLUTION)
  126. static const bool DoFullRes = false;
  127. #else
  128. static const bool DoFullRes = true;
  129. #endif
  130. if (DoFullRes)
  131. {
  132. #ifndef DISABLE_DEPTH_TESTS
  133. if (SortKey > TileNearZ)
  134. {
  135. float4 DepthMask = saturate(1000.0 * (Depths - Particle.Depth));
  136. BlendHighRes(Quad, Particle, PixelCoord, DepthMask);
  137. }
  138. else
  139. #endif
  140. {
  141. BlendHighRes(Quad, Particle, PixelCoord);
  142. }
  143. }
  144. else
  145. {
  146. #ifndef DISABLE_DEPTH_TESTS
  147. if (SortKey > TileNearZ)
  148. {
  149. float4 DepthMask = saturate(1000.0 * (Depths - Particle.Depth));
  150. BlendLowRes(Quad, Particle, PixelCoord, DepthMask);
  151. }
  152. else
  153. #endif
  154. {
  155. BlendLowRes(Quad, Particle, PixelCoord);
  156. }
  157. }
  158. //if (all(float4(Quad[0].a, Quad[1].a, Quad[2].a, Quad[3].a) > ALPHA_THRESHOLD))
  159. //{
  160. // Quad[0].a = Quad[1].a = Quad[2].a = Quad[3].a = 1.0;
  161. // return Quad;
  162. //}
  163. } // for
  164. // Every outer loop iteration traverses 32 entries in the sorted particle list
  165. HitMaskStart += 4;
  166. BinStart += 32;
  167. } // while
  168. return Quad;
  169. }
  170. [RootSignature(Particle_RootSig)]
  171. [numthreads(TILE_SIZE / 2, TILE_SIZE / 2, 1)]
  172. void main( uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex, uint3 GTid : SV_GroupThreadID )
  173. {
  174. const uint DrawPacket = g_DrawPackets[Gid.x];
  175. uint2 TileCoord = uint2(DrawPacket >> 16, DrawPacket >> 24) & 0xFF;
  176. const uint ParticleCount = DrawPacket & 0xFFFF;
  177. const uint HitMaskSizeInBytes = MAX_PARTICLES_PER_BIN / 8;
  178. const uint TileIndex = TileCoord.x + TileCoord.y * gTileRowPitch;
  179. const uint HitMaskStart = TileIndex * HitMaskSizeInBytes;
  180. const uint2 BinCoord = TileCoord / uint2(TILES_PER_BIN_X, TILES_PER_BIN_Y);
  181. const uint BinIndex = BinCoord.x + BinCoord.y * gBinsPerRow;
  182. const uint BinStart = BinIndex * MAX_PARTICLES_PER_BIN;
  183. const uint2 ST = TileCoord * TILE_SIZE + 2 * GTid.xy;
  184. float4x4 Quad = RenderParticles( TileCoord, ST, ParticleCount, HitMaskStart, BinStart );
  185. WriteBlendedQuad(ST, Quad);
  186. }