GpuParticleBounds.bsl 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. shader GpuParticleBounds
  2. {
  3. featureset = HighEnd;
  4. code
  5. {
  6. #define NUM_REDUCE_THREADS 32
  7. #define NUM_SERIAL_REDUCTIONS NUM_THREADS / NUM_REDUCE_THREADS
  8. #define FLT_MAX 3.402823466e+38f
  9. #define FLT_MIN -3.402823466e+38f
  10. Buffer<uint2> gParticleIndices;
  11. Texture2D gPosAndTimeTex;
  12. RWBuffer<float3> gOutput;
  13. cbuffer Input
  14. {
  15. uint gIterationsPerGroup;
  16. uint gNumExtraIterations;
  17. uint gNumParticles;
  18. };
  19. groupshared float3 sGroupMin[NUM_THREADS];
  20. groupshared float3 sGroupMax[NUM_THREADS];
  21. [numthreads(NUM_THREADS, 1, 1)]
  22. void csmain(uint3 groupThreadId : SV_GroupThreadID, uint3 groupId : SV_GroupID)
  23. {
  24. uint threadId = groupThreadId.x;
  25. uint particleIdx;
  26. uint numIterations;
  27. if(groupId.x < gNumExtraIterations)
  28. {
  29. particleIdx = groupId.x * (gIterationsPerGroup + 1);
  30. numIterations = gIterationsPerGroup + 1;
  31. }
  32. else
  33. {
  34. particleIdx = groupId.x * gIterationsPerGroup + gNumExtraIterations;
  35. numIterations = gIterationsPerGroup;
  36. }
  37. particleIdx = particleIdx * NUM_THREADS + threadId;
  38. float3 localMin = FLT_MAX;
  39. float3 localMax = FLT_MIN;
  40. for(uint i = 0; i < numIterations; ++i)
  41. {
  42. if(particleIdx >= gNumParticles)
  43. break;
  44. uint3 pixelPos;
  45. pixelPos.xy = gParticleIndices[particleIdx];
  46. pixelPos.z = 0;
  47. float4 positionAndTime = gPosAndTimeTex.Load(pixelPos);
  48. // Check if particle is dead
  49. if(positionAndTime.w > 1.0f)
  50. continue;
  51. localMin = min(localMin, positionAndTime.xyz);
  52. localMax = max(localMax, positionAndTime.xyz);
  53. particleIdx += NUM_THREADS;
  54. }
  55. sGroupMin[threadId] = localMin;
  56. sGroupMax[threadId] = localMax;
  57. GroupMemoryBarrierWithGroupSync();
  58. // Reduce serially first
  59. uint reduceThreadId = threadId & (NUM_REDUCE_THREADS - 1);
  60. if(threadId < NUM_REDUCE_THREADS)
  61. {
  62. // Reduce minimum
  63. localMin = sGroupMin[reduceThreadId];
  64. [unroll]
  65. for(int i = 0; i < NUM_SERIAL_REDUCTIONS; ++i)
  66. localMin = min(localMin, sGroupMin[i * NUM_REDUCE_THREADS + reduceThreadId]);
  67. }
  68. else if(threadId < (NUM_REDUCE_THREADS * 2))
  69. {
  70. // Reduce maximum
  71. localMax = sGroupMax[reduceThreadId];
  72. [unroll]
  73. for(int i = 0; i < NUM_SERIAL_REDUCTIONS; ++i)
  74. localMax = max(localMax, sGroupMax[i * NUM_REDUCE_THREADS + reduceThreadId]);
  75. }
  76. GroupMemoryBarrierWithGroupSync();
  77. // Store serial reduction results
  78. if(threadId < NUM_REDUCE_THREADS)
  79. sGroupMin[reduceThreadId] = localMin;
  80. else if(threadId < (NUM_REDUCE_THREADS * 2))
  81. sGroupMax[reduceThreadId] = localMax;
  82. GroupMemoryBarrierWithGroupSync();
  83. // Do parallel reduction within a warp
  84. if(threadId < NUM_REDUCE_THREADS)
  85. {
  86. // Reduce minimum
  87. localMin = sGroupMin[reduceThreadId];
  88. [unroll]
  89. for(uint i = 1; i < NUM_REDUCE_THREADS; i <<= 1)
  90. {
  91. // Note: Bank conflicts with i = 2, 4, etc.
  92. localMin = min(localMin, sGroupMin[reduceThreadId + i]);
  93. sGroupMin[reduceThreadId] = localMin;
  94. }
  95. }
  96. else if(threadId < (NUM_REDUCE_THREADS * 2))
  97. {
  98. // Reduce maximum
  99. localMax = sGroupMax[reduceThreadId];
  100. [unroll]
  101. for(uint i = 1; i < NUM_REDUCE_THREADS; i <<= 1)
  102. {
  103. // Note: Bank conflicts with i = 2, 4, etc.
  104. localMax = min(localMax, sGroupMax[reduceThreadId + i]);
  105. sGroupMax[reduceThreadId] = localMax;
  106. }
  107. }
  108. GroupMemoryBarrierWithGroupSync();
  109. if(threadId == 0) gOutput[groupId.x * 2 + 0] = sGroupMin[0];
  110. if(threadId == 1) gOutput[groupId.x * 2 + 1] = sGroupMax[0];
  111. }
  112. };
  113. };