FluidCS11_ForceCS_Shared.hlsl 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. // RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
  2. // CHECK: threadId
  3. // CHECK: flattenedThreadIdInGroup
  4. // CHECK: bufferLoad
  5. // CHECK: dot2
  6. // CHECK: Log
  7. // CHECK: Exp
  8. // CHECK: FMax
  9. // CHECK: Sqrt
  10. // CHECK: barrier
  11. // CHECK: bufferStore
  12. //--------------------------------------------------------------------------------------
  13. // File: FluidCS11.hlsl
  14. //
  15. // Copyright (c) Microsoft Corporation. All rights reserved.
  16. //--------------------------------------------------------------------------------------
  17. //--------------------------------------------------------------------------------------
  18. // Smoothed Particle Hydrodynamics Algorithm Based Upon:
  19. // Particle-Based Fluid Simulation for Interactive Applications
  20. // Matthias Müller
  21. //--------------------------------------------------------------------------------------
  22. //--------------------------------------------------------------------------------------
  23. // Optimized Grid Algorithm Based Upon:
  24. // Broad-Phase Collision Detection with CUDA
  25. // Scott Le Grand
  26. //--------------------------------------------------------------------------------------
  27. struct Particle
  28. {
  29. float2 position;
  30. float2 velocity;
  31. };
  32. struct ParticleForces
  33. {
  34. float2 acceleration;
  35. };
  36. struct ParticleDensity
  37. {
  38. float density;
  39. };
  40. cbuffer cbSimulationConstants : register( b0 )
  41. {
  42. uint g_iNumParticles;
  43. float g_fTimeStep;
  44. float g_fSmoothlen;
  45. float g_fPressureStiffness;
  46. float g_fRestDensity;
  47. float g_fDensityCoef;
  48. float g_fGradPressureCoef;
  49. float g_fLapViscosityCoef;
  50. float g_fWallStiffness;
  51. float4 g_vGravity;
  52. float4 g_vGridDim;
  53. float3 g_vPlanes[4];
  54. };
  55. //--------------------------------------------------------------------------------------
  56. // Fluid Simulation
  57. //--------------------------------------------------------------------------------------
  58. #define SIMULATION_BLOCK_SIZE 256
  59. //--------------------------------------------------------------------------------------
  60. // Structured Buffers
  61. //--------------------------------------------------------------------------------------
  62. RWStructuredBuffer<Particle> ParticlesRW : register( u0 );
  63. StructuredBuffer<Particle> ParticlesRO : register( t0 );
  64. RWStructuredBuffer<ParticleDensity> ParticlesDensityRW : register( u0 );
  65. StructuredBuffer<ParticleDensity> ParticlesDensityRO : register( t1 );
  66. RWStructuredBuffer<ParticleForces> ParticlesForcesRW : register( u0 );
  67. StructuredBuffer<ParticleForces> ParticlesForcesRO : register( t2 );
  68. RWStructuredBuffer<unsigned int> GridRW : register( u0 );
  69. StructuredBuffer<unsigned int> GridRO : register( t3 );
  70. RWStructuredBuffer<uint2> GridIndicesRW : register( u0 );
  71. StructuredBuffer<uint2> GridIndicesRO : register( t4 );
  72. //--------------------------------------------------------------------------------------
  73. // Grid Construction
  74. //--------------------------------------------------------------------------------------
  75. // For simplicity, this sample uses a 16-bit hash based on the grid cell and
  76. // a 16-bit particle ID to keep track of the particles while sorting
  77. // This imposes a limitation of 64K particles and 256x256 grid work
  78. // You could extended the implementation to support large scenarios by using a uint2
  79. float2 GridCalculateCell(float2 position)
  80. {
  81. return clamp(position * g_vGridDim.xy + g_vGridDim.zw, float2(0, 0), float2(255, 255));
  82. }
  83. unsigned int GridConstuctKey(uint2 xy)
  84. {
  85. // Bit pack [-----UNUSED-----][----Y---][----X---]
  86. // 16-bit 8-bit 8-bit
  87. return dot(xy.yx, uint2(256, 1));
  88. }
  89. unsigned int GridConstuctKeyValuePair(uint2 xy, uint value)
  90. {
  91. // Bit pack [----Y---][----X---][-----VALUE------]
  92. // 8-bit 8-bit 16-bit
  93. return dot(uint3(xy.yx, value), uint3(256*256*256, 256*256, 1));
  94. }
  95. unsigned int GridGetKey(unsigned int keyvaluepair)
  96. {
  97. return (keyvaluepair >> 16);
  98. }
  99. unsigned int GridGetValue(unsigned int keyvaluepair)
  100. {
  101. return (keyvaluepair & 0xFFFF);
  102. }
  103. //--------------------------------------------------------------------------------------
  104. // Force Calculation
  105. //--------------------------------------------------------------------------------------
  106. float CalculatePressure(float density)
  107. {
  108. // Implements this equation:
  109. // Pressure = B * ((rho / rho_0)^y - 1)
  110. return g_fPressureStiffness * max(pow(density / g_fRestDensity, 3) - 1, 0);
  111. }
  112. float2 CalculateGradPressure(float r, float P_pressure, float N_pressure, float N_density, float2 diff)
  113. {
  114. const float h = g_fSmoothlen;
  115. float avg_pressure = 0.5f * (N_pressure + P_pressure);
  116. // Implements this equation:
  117. // W_spkiey(r, h) = 15 / (pi * h^6) * (h - r)^3
  118. // GRAD( W_spikey(r, h) ) = -45 / (pi * h^6) * (h - r)^2
  119. // g_fGradPressureCoef = fParticleMass * -45.0f / (PI * fSmoothlen^6)
  120. return g_fGradPressureCoef * avg_pressure / N_density * (h - r) * (h - r) / r * (diff);
  121. }
  122. float2 CalculateLapVelocity(float r, float2 P_velocity, float2 N_velocity, float N_density)
  123. {
  124. const float h = g_fSmoothlen;
  125. float2 vel_diff = (N_velocity - P_velocity);
  126. // Implements this equation:
  127. // W_viscosity(r, h) = 15 / (2 * pi * h^3) * (-r^3 / (2 * h^3) + r^2 / h^2 + h / (2 * r) - 1)
  128. // LAPLACIAN( W_viscosity(r, h) ) = 45 / (pi * h^6) * (h - r)
  129. // g_fLapViscosityCoef = fParticleMass * fViscosity * 45.0f / (PI * fSmoothlen^6)
  130. return g_fLapViscosityCoef / N_density * (h - r) * vel_diff;
  131. }
  132. //--------------------------------------------------------------------------------------
  133. // Shared Memory Optimized N^2 Algorithm
  134. //--------------------------------------------------------------------------------------
  135. groupshared struct { float2 position; float2 velocity; float density; } force_shared_pos[SIMULATION_BLOCK_SIZE];
  136. [numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
  137. void main( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
  138. {
  139. const unsigned int P_ID = DTid.x; // Particle ID to operate on
  140. float2 P_position = ParticlesRO[P_ID].position;
  141. float2 P_velocity = ParticlesRO[P_ID].velocity;
  142. float P_density = ParticlesDensityRO[P_ID].density;
  143. float P_pressure = CalculatePressure(P_density);
  144. const float h_sq = g_fSmoothlen * g_fSmoothlen;
  145. float2 acceleration = float2(0, 0);
  146. // Calculate the acceleration based on all neighbors
  147. [loop]
  148. for (uint N_block_ID = 0 ; N_block_ID < g_iNumParticles ; N_block_ID += SIMULATION_BLOCK_SIZE)
  149. {
  150. // Cache a tile of particles unto shared memory to increase IO efficiency
  151. force_shared_pos[GI].position = ParticlesRO[N_block_ID + GI].position;
  152. force_shared_pos[GI].velocity = ParticlesRO[N_block_ID + GI].velocity;
  153. force_shared_pos[GI].density = ParticlesDensityRO[N_block_ID + GI].density;
  154. GroupMemoryBarrierWithGroupSync();
  155. [loop]
  156. for (uint N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE; N_tile_ID++ )
  157. {
  158. uint N_ID = N_block_ID + N_tile_ID;
  159. float2 N_position = force_shared_pos[N_tile_ID].position;
  160. float2 diff = N_position - P_position;
  161. float r_sq = dot(diff, diff);
  162. if (r_sq < h_sq && P_ID != N_ID)
  163. {
  164. float2 N_velocity = force_shared_pos[N_tile_ID].velocity;
  165. float N_density = force_shared_pos[N_tile_ID].density;
  166. float N_pressure = CalculatePressure(N_density);
  167. float r = sqrt(r_sq);
  168. // Pressure Term
  169. acceleration += CalculateGradPressure(r, P_pressure, N_pressure, N_density, diff);
  170. // Viscosity Term
  171. acceleration += CalculateLapVelocity(r, P_velocity, N_velocity, N_density);
  172. }
  173. }
  174. GroupMemoryBarrierWithGroupSync();
  175. }
  176. ParticlesForcesRW[P_ID].acceleration = acceleration / P_density;
  177. }