FluidCS11_DensityCS_Shared.hlsl 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. // RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
  2. // CHECK: threadId
  3. // CHECK: flattenedThreadIdInGroup
  4. // CHECK: bufferLoad
  5. // CHECK: addrspace(3)
  6. // CHECK: barrier
  7. // CHECK: addrspace(3)
  8. // CHECK: dot2
  9. // CHECK: barrier
  10. // CHECK: bufferStore
  11. // CHECK: !"llvm.loop.unroll.disable"
  12. //--------------------------------------------------------------------------------------
  13. // File: FluidCS11.hlsl
  14. //
  15. // Copyright (c) Microsoft Corporation. All rights reserved.
  16. //--------------------------------------------------------------------------------------
  17. //--------------------------------------------------------------------------------------
  18. // Smoothed Particle Hydrodynamics Algorithm Based Upon:
  19. // Particle-Based Fluid Simulation for Interactive Applications
  20. // Matthias Müller
  21. //--------------------------------------------------------------------------------------
  22. //--------------------------------------------------------------------------------------
  23. // Optimized Grid Algorithm Based Upon:
  24. // Broad-Phase Collision Detection with CUDA
  25. // Scott Le Grand
  26. //--------------------------------------------------------------------------------------
  27. struct Particle
  28. {
  29. float2 position;
  30. float2 velocity;
  31. };
  32. struct ParticleForces
  33. {
  34. float2 acceleration;
  35. };
  36. struct ParticleDensity
  37. {
  38. float density;
  39. };
  40. cbuffer cbSimulationConstants : register( b0 )
  41. {
  42. uint g_iNumParticles;
  43. float g_fTimeStep;
  44. float g_fSmoothlen;
  45. float g_fPressureStiffness;
  46. float g_fRestDensity;
  47. float g_fDensityCoef;
  48. float g_fGradPressureCoef;
  49. float g_fLapViscosityCoef;
  50. float g_fWallStiffness;
  51. float4 g_vGravity;
  52. float4 g_vGridDim;
  53. float3 g_vPlanes[4];
  54. };
  55. //--------------------------------------------------------------------------------------
  56. // Fluid Simulation
  57. //--------------------------------------------------------------------------------------
  58. #define SIMULATION_BLOCK_SIZE 256
  59. //--------------------------------------------------------------------------------------
  60. // Structured Buffers
  61. //--------------------------------------------------------------------------------------
  62. RWStructuredBuffer<Particle> ParticlesRW : register( u0 );
  63. StructuredBuffer<Particle> ParticlesRO : register( t0 );
  64. RWStructuredBuffer<ParticleDensity> ParticlesDensityRW : register( u0 );
  65. StructuredBuffer<ParticleDensity> ParticlesDensityRO : register( t1 );
  66. RWStructuredBuffer<ParticleForces> ParticlesForcesRW : register( u0 );
  67. StructuredBuffer<ParticleForces> ParticlesForcesRO : register( t2 );
  68. RWStructuredBuffer<unsigned int> GridRW : register( u0 );
  69. StructuredBuffer<unsigned int> GridRO : register( t3 );
  70. RWStructuredBuffer<uint2> GridIndicesRW : register( u0 );
  71. StructuredBuffer<uint2> GridIndicesRO : register( t4 );
  72. //--------------------------------------------------------------------------------------
  73. // Grid Construction
  74. //--------------------------------------------------------------------------------------
  75. // For simplicity, this sample uses a 16-bit hash based on the grid cell and
  76. // a 16-bit particle ID to keep track of the particles while sorting
  77. // This imposes a limitation of 64K particles and 256x256 grid work
  78. // You could extended the implementation to support large scenarios by using a uint2
  79. float2 GridCalculateCell(float2 position)
  80. {
  81. return clamp(position * g_vGridDim.xy + g_vGridDim.zw, float2(0, 0), float2(255, 255));
  82. }
  83. unsigned int GridConstuctKey(uint2 xy)
  84. {
  85. // Bit pack [-----UNUSED-----][----Y---][----X---]
  86. // 16-bit 8-bit 8-bit
  87. return dot(xy.yx, uint2(256, 1));
  88. }
  89. unsigned int GridConstuctKeyValuePair(uint2 xy, uint value)
  90. {
  91. // Bit pack [----Y---][----X---][-----VALUE------]
  92. // 8-bit 8-bit 16-bit
  93. return dot(uint3(xy.yx, value), uint3(256*256*256, 256*256, 1));
  94. }
  95. unsigned int GridGetKey(unsigned int keyvaluepair)
  96. {
  97. return (keyvaluepair >> 16);
  98. }
  99. unsigned int GridGetValue(unsigned int keyvaluepair)
  100. {
  101. return (keyvaluepair & 0xFFFF);
  102. }
  103. //--------------------------------------------------------------------------------------
  104. // Density Calculation
  105. //--------------------------------------------------------------------------------------
  106. float CalculateDensity(float r_sq)
  107. {
  108. const float h_sq = g_fSmoothlen * g_fSmoothlen;
  109. // Implements this equation:
  110. // W_poly6(r, h) = 315 / (64 * pi * h^9) * (h^2 - r^2)^3
  111. // g_fDensityCoef = fParticleMass * 315.0f / (64.0f * PI * fSmoothlen^9)
  112. return g_fDensityCoef * (h_sq - r_sq) * (h_sq - r_sq) * (h_sq - r_sq);
  113. }
  114. //--------------------------------------------------------------------------------------
  115. // Shared Memory Optimized N^2 Algorithm
  116. //--------------------------------------------------------------------------------------
  117. groupshared float2 density_shared_pos[SIMULATION_BLOCK_SIZE];
  118. [numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
  119. void main( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
  120. {
  121. const unsigned int P_ID = DTid.x;
  122. const float h_sq = g_fSmoothlen * g_fSmoothlen;
  123. float2 P_position = ParticlesRO[P_ID].position;
  124. float density = 0;
  125. // Calculate the density based on all neighbors
  126. [loop]
  127. for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE)
  128. {
  129. // Cache a tile of particles unto shared memory to increase IO efficiency
  130. density_shared_pos[GI] = ParticlesRO[N_block_ID + GI].position;
  131. GroupMemoryBarrierWithGroupSync();
  132. for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++)
  133. {
  134. float2 N_position = density_shared_pos[N_tile_ID];
  135. float2 diff = N_position - P_position;
  136. float r_sq = dot(diff, diff);
  137. if (r_sq < h_sq)
  138. {
  139. density += CalculateDensity(r_sq);
  140. }
  141. }
  142. GroupMemoryBarrierWithGroupSync();
  143. }
  144. ParticlesDensityRW[P_ID].density = density;
  145. }