GpuParticlesCommon.hlsl 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma once
  6. #include <AnKi/Shaders/PackFunctions.hlsl>
  7. #include <AnKi/Shaders/Include/ParticleTypes.h>
  8. #include <AnKi/Shaders/Include/GpuSceneTypes.h>
  9. Texture2D<Vec4> g_depthTex : register(ANKI_CONCATENATE(t, ANKI_PARTICLE_SIM_DEPTH_BUFFER));
  10. Texture2D<Vec4> g_gbufferRt2Tex : register(ANKI_CONCATENATE(t, ANKI_PARTICLE_SIM_NORMAL_BUFFER));
  11. StructuredBuffer<Mat3x4> g_gpuSceneTransforms : register(ANKI_CONCATENATE(t, ANKI_PARTICLE_SIM_GPU_SCENE_TRANSFORMS));
  12. ConstantBuffer<ParticleSimulationConstants> g_consts : register(ANKI_CONCATENATE(b, ANKI_PARTICLE_SIM_CONSTANTS));
  13. RWByteAddressBuffer g_gpuScene : register(ANKI_CONCATENATE(u, ANKI_PARTICLE_SIM_GPU_SCENE));
  14. RWStructuredBuffer<GpuSceneParticleEmitter2> g_gpuSceneParticleEmitters :
  15. register(ANKI_CONCATENATE(u, ANKI_PARTICLE_SIM_GPU_SCENE_PARTICLE_EMITTERS));
  16. RWStructuredBuffer<ParticleSimulationScratch> g_scratch : register(ANKI_CONCATENATE(u, ANKI_PARTICLE_SIM_SCRATCH));
  17. RWStructuredBuffer<ParticleSimulationCpuFeedback> g_cpuFeedback : register(ANKI_CONCATENATE(u, ANKI_PARTICLE_SIM_CPU_FEEDBACK));
  18. static U32 g_particleIdx;
  19. static U32 g_randomNumber;
  20. void particlesInitGlobals(U32 particleIdx)
  21. {
  22. g_particleIdx = particleIdx;
  23. g_randomNumber = g_consts.m_randomNumber + particleIdx;
  24. }
  25. U32 genHash(U32 x)
  26. {
  27. x ^= x >> 16;
  28. x *= 0x7feb352d;
  29. x ^= x >> 15;
  30. x *= 0x846ca68b;
  31. x ^= x >> 16;
  32. return x;
  33. }
  34. U32 getRandomU32()
  35. {
  36. return genHash(g_particleIdx + g_randomNumber++);
  37. }
  38. F32 getRandomRange(F32 min, F32 max)
  39. {
  40. const U32 ru = getRandomU32() % 0xFFFFF;
  41. const F32 r = F32(ru) / F32(0xFFFFF);
  42. return min + r * (max - min);
  43. }
  44. Vec3 getRandomRange(Vec3 min, Vec3 max)
  45. {
  46. return Vec3(getRandomRange(min.x, max.x), getRandomRange(min.y, max.y), getRandomRange(min.z, max.z));
  47. }
  48. template<typename T>
  49. T readProp(GpuSceneParticleEmitter2 emitter, ParticleProperty prop)
  50. {
  51. return g_gpuScene.Load<T>(emitter.m_particleStateSteamOffsets[(U32)prop] + g_particleIdx * sizeof(T));
  52. }
  53. template<typename T>
  54. void writeProp(GpuSceneParticleEmitter2 emitter, ParticleProperty prop, T value)
  55. {
  56. g_gpuScene.Store<T>(emitter.m_particleStateSteamOffsets[(U32)prop] + g_particleIdx * sizeof(T), value);
  57. }
  58. // Use the depth buffer and the normal buffer to resolve a collision
  59. Bool particleCollision(inout Vec3 x, out Vec3 n, F32 acceptablePenetrationDistance)
  60. {
  61. n = 0.0;
  62. Vec4 v4 = mul(g_consts.m_viewProjMat, Vec4(x, 1.0));
  63. const Vec3 v3 = v4.xyz / v4.w;
  64. if(any(v3.xy <= -1.0) || any(v3.xy >= 1.0))
  65. {
  66. return false;
  67. }
  68. Vec2 texSize;
  69. g_depthTex.GetDimensions(texSize.x, texSize.y);
  70. UVec2 texCoord = ndcToUv(v3.xy) * texSize;
  71. const F32 refDepth = g_depthTex[texCoord].r;
  72. const F32 particleDepth = v3.z;
  73. if(particleDepth < refDepth)
  74. {
  75. return false;
  76. }
  77. const F32 refViewZ = g_consts.m_unprojectionParams.z / (g_consts.m_unprojectionParams.w + refDepth);
  78. const F32 particleViewZ = g_consts.m_unprojectionParams.z / (g_consts.m_unprojectionParams.w + particleDepth);
  79. if(abs(particleViewZ - refViewZ) > acceptablePenetrationDistance)
  80. {
  81. // Depth buffer is not reliable, assume no collision
  82. return false;
  83. }
  84. // Collides, change the position
  85. g_gbufferRt2Tex.GetDimensions(texSize.x, texSize.y);
  86. texCoord = ndcToUv(v3.xy) * texSize;
  87. n = unpackNormalFromGBuffer(g_gbufferRt2Tex[texCoord]);
  88. v4 = mul(g_consts.m_invertedViewProjMat, Vec4(v3.xy, refDepth, 1.0));
  89. x = v4.xyz / v4.w;
  90. // Also push it a bit outside the surface
  91. x += n * 0.01;
  92. return true;
  93. }
  94. struct SimulationArgs
  95. {
  96. Bool m_checkCollision; // Check collision using the depth buffer
  97. F32 m_penetrationDistance; // Since collision is checked against the depth buffer add a threshold to avoid falce positives
  98. U32 m_iterationCount; // The number of interations the simulation will run. Increase it for better accuracy
  99. F32 m_e; // The coefficient of restitution. 0 is inelastic, 1 is bouncy
  100. F32 m_mu; // The friction coefficient. From ~0.2 to 1.0
  101. F32 m_velocityDamping; // Decreases the velocity a bit. Set it to 1 to disable damping
  102. void init()
  103. {
  104. m_checkCollision = true;
  105. m_penetrationDistance = 0.5;
  106. m_iterationCount = 1;
  107. m_e = 0.5;
  108. m_mu = 0.2;
  109. m_velocityDamping = 1.0;
  110. }
  111. };
  112. // F Force
  113. // m Mass
  114. // dt Delta time
  115. // v Velocity
  116. // x Particle position
  117. void simulatePhysics(Vec3 F, F32 m, F32 dt, inout Vec3 v, inout Vec3 x, SimulationArgs args)
  118. {
  119. const Vec3 a = F / m;
  120. const F32 sdt = dt / F32(args.m_iterationCount);
  121. for(U32 i = 0; i < args.m_iterationCount; ++i)
  122. {
  123. // Compute the new pos and velocity
  124. v += a * sdt; // a = dv/dt
  125. x += v * sdt; // v = dx/dt
  126. Vec3 n;
  127. const Bool collides = args.m_checkCollision && particleCollision(x, n, args.m_penetrationDistance);
  128. if(!collides)
  129. {
  130. continue;
  131. }
  132. const F32 vn = dot(v, n);
  133. if(vn >= 0.0)
  134. {
  135. continue;
  136. }
  137. // Restitution
  138. v -= (1.0 + args.m_e) * vn * n;
  139. // Friction
  140. const Vec3 vt = v - dot(v, n) * n;
  141. const F32 vtLen = length(vt);
  142. if(vtLen > 0.0)
  143. {
  144. const F32 jn = -(1.0 + args.m_e) * m * vn;
  145. const F32 jtDesired = m * vtLen;
  146. const F32 jt = min(jtDesired, args.m_mu * jn);
  147. v -= (jt / m) * (vt / vtLen);
  148. }
  149. }
  150. // Add some small damping to avoid jitter
  151. v *= args.m_velocityDamping;
  152. }
  153. void appendAlive(GpuSceneParticleEmitter2 emitter, Vec3 particlePos, F32 particleScale)
  154. {
  155. // Add the alive particle index to the array
  156. U32 count;
  157. InterlockedAdd(g_scratch[0].m_aliveParticleCount, 1, count);
  158. BAB_STORE(g_gpuScene, U32, emitter.m_aliveParticleIndicesOffset + count * sizeof(U32), g_particleIdx);
  159. // Update the AABB
  160. const F32 toCentimeters = 100.0;
  161. const IVec3 quatizedPosMin = floor((particlePos + emitter.m_particleAabbMin * particleScale) * toCentimeters);
  162. const IVec3 quatizedPosMax = ceil((particlePos + emitter.m_particleAabbMax * particleScale) * toCentimeters);
  163. [unroll] for(U32 i = 0; i < 3; ++i)
  164. {
  165. InterlockedMin(g_scratch[0].m_aabbMin[i], quatizedPosMin[i]);
  166. InterlockedMax(g_scratch[0].m_aabbMax[i], quatizedPosMax[i]);
  167. }
  168. }
  169. template<typename TInterface>
  170. void particleMain(U32 svDispatchThreadId, U32 svGroupIndex, TInterface iface)
  171. {
  172. particlesInitGlobals(svDispatchThreadId.x);
  173. GpuSceneParticleEmitter2 emitter = SBUFF(g_gpuSceneParticleEmitters, g_consts.m_gpuSceneParticleEmitterIndex);
  174. iface.initAnKiParticleEmitterProperties(emitter);
  175. const Mat3x4 emitterTrf = SBUFF(g_gpuSceneTransforms, emitter.m_worldTransformsIndex);
  176. const Bool reinit = emitter.m_reinitializeOnNextUpdate;
  177. const Bool canEmitThisFrame = emitter.m_timeLeftForNextEmission - g_consts.m_dt <= 0.0;
  178. if(g_particleIdx < emitter.m_particleCount)
  179. {
  180. // Decide what to do
  181. Bool init = false;
  182. Bool makeAlive = false;
  183. Bool simulate = false;
  184. F32 lifeFactor = 1.0;
  185. if(reinit)
  186. {
  187. U32 emittedParticleCount;
  188. InterlockedAdd(g_scratch[0].m_emittedParticleCount, 1, emittedParticleCount);
  189. init = true;
  190. makeAlive = emittedParticleCount < emitter.m_particlesPerEmission;
  191. }
  192. else
  193. {
  194. lifeFactor = readProp<F32>(emitter, ParticleProperty::kLifeFactor);
  195. const Bool alive = lifeFactor < 1.0;
  196. if(alive)
  197. {
  198. simulate = true;
  199. }
  200. else if(canEmitThisFrame)
  201. {
  202. U32 emittedParticleCount;
  203. InterlockedAdd(g_scratch[0].m_emittedParticleCount, 1, emittedParticleCount);
  204. init = emittedParticleCount < emitter.m_particlesPerEmission;
  205. makeAlive = true;
  206. }
  207. }
  208. // Do the actual work
  209. if(simulate)
  210. {
  211. Vec3 particlePosition;
  212. F32 particleScale;
  213. iface.simulateParticle(emitter, lifeFactor, particlePosition, particleScale);
  214. appendAlive(emitter, particlePosition, particleScale);
  215. }
  216. else if(init)
  217. {
  218. Vec3 particlePosition;
  219. F32 particleScale;
  220. iface.initializeParticle(emitter, emitterTrf, makeAlive, particlePosition, particleScale);
  221. if(makeAlive)
  222. {
  223. appendAlive(emitter, particlePosition, particleScale);
  224. }
  225. }
  226. }
  227. // Check if it's the last threadgroup running
  228. if(svGroupIndex == 0)
  229. {
  230. U32 threadgroupIdx;
  231. InterlockedAdd(g_scratch[0].m_threadgroupCount, 1, threadgroupIdx);
  232. const U32 threadgroupCount = (emitter.m_particleCount + ANKI_WAVE_SIZE - 1) / ANKI_WAVE_SIZE;
  233. const Bool lastThreadExecuting = (threadgroupIdx + 1 == threadgroupCount);
  234. if(lastThreadExecuting)
  235. {
  236. // Inform about the bounding volume
  237. const F32 toMeters = 1.0 / 100.0;
  238. ParticleSimulationCpuFeedback feedback = (ParticleSimulationCpuFeedback)0;
  239. feedback.m_aabbMin = g_scratch[0].m_aabbMin * toMeters;
  240. feedback.m_aabbMax = g_scratch[0].m_aabbMax * toMeters;
  241. feedback.m_uuid = emitter.m_uuid;
  242. g_cpuFeedback[0] = feedback;
  243. // Update the GPU scene emitter
  244. if(canEmitThisFrame)
  245. {
  246. SBUFF(g_gpuSceneParticleEmitters, g_consts.m_gpuSceneParticleEmitterIndex).m_timeLeftForNextEmission = emitter.m_emissionPeriod;
  247. }
  248. else
  249. {
  250. SBUFF(g_gpuSceneParticleEmitters, g_consts.m_gpuSceneParticleEmitterIndex).m_timeLeftForNextEmission -= g_consts.m_dt;
  251. }
  252. if(reinit)
  253. {
  254. SBUFF(g_gpuSceneParticleEmitters, g_consts.m_gpuSceneParticleEmitterIndex).m_reinitializeOnNextUpdate = 0;
  255. }
  256. SBUFF(g_gpuSceneParticleEmitters, g_consts.m_gpuSceneParticleEmitterIndex).m_aliveParticleCount = g_scratch[0].m_aliveParticleCount;
  257. // Reset the scratch struct for next frame
  258. g_scratch[0].m_aabbMin = kMaxI32;
  259. g_scratch[0].m_aabbMax = kMinI32;
  260. g_scratch[0].m_threadgroupCount = 0;
  261. g_scratch[0].m_emittedParticleCount = 0;
  262. g_scratch[0].m_aliveParticleCount = 0;
  263. }
  264. }
  265. }