JobManagerCompute.hlsl 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #define LOCK(spinlock) \
  6. for(bool keepWaiting__ = true; keepWaiting__;) \
  7. { \
  8. uint locked__; \
  9. InterlockedCompareExchange(spinlock, 0, 1, locked__); \
  10. if(locked__ == 0) \
  11. { \
  12. locked__ = locked__
  13. #define UNLOCK(spinlock) \
  14. InterlockedExchange(spinlock, 0, locked__); \
  15. keepWaiting__ = false; \
  16. } \
  17. }
  18. #define NUMTHREADS 128
  19. #define MAX_CHILDREN 4
  20. struct Queue
  21. {
  22. uint m_spinlock;
  23. uint m_head;
  24. uint m_tail;
  25. uint m_pendingWork;
  26. };
  27. globallycoherent RWStructuredBuffer<Queue> g_queue : register(u0);
  28. globallycoherent RWStructuredBuffer<uint> g_ringBuffer : register(u1);
  29. RWStructuredBuffer<uint> g_finalResult : register(u2);
  30. struct Consts
  31. {
  32. uint m_ringBufferSizeMinusOne;
  33. uint m_padding[3];
  34. };
  35. #if defined(__spirv__)
  36. [[vk::push_constant]] ConstantBuffer<Consts> g_consts;
  37. #else
  38. ConstantBuffer<Consts> g_consts : register(b0, space3000);
  39. #endif
  40. groupshared uint g_inWorkItems[NUMTHREADS];
  41. groupshared uint g_inWorkItemCount;
  42. groupshared bool g_bNoMoreWork;
  43. groupshared uint g_outWorkItems[NUMTHREADS * MAX_CHILDREN];
  44. groupshared uint g_outWorkItemCount;
  45. static const int kMashPushTries = 1000;
  46. [numthreads(NUMTHREADS, 1, 1)] void main(uint svGroupIndex : SV_GROUPINDEX)
  47. {
  48. if(svGroupIndex == 0)
  49. {
  50. g_inWorkItemCount = 0;
  51. g_outWorkItemCount = 0;
  52. }
  53. while(true)
  54. {
  55. GroupMemoryBarrierWithGroupSync();
  56. if(svGroupIndex == 0)
  57. {
  58. bool pushSuccessful = true;
  59. int iterationCount = kMashPushTries;
  60. const uint oldInWorkItemCount = g_inWorkItemCount;
  61. const uint outWorkItemCount = g_outWorkItemCount;
  62. do
  63. {
  64. LOCK(g_queue[0].m_spinlock);
  65. // Touch groupshared as little as possible
  66. uint head = g_queue[0].m_head;
  67. uint tail = g_queue[0].m_tail;
  68. uint pendingWork = g_queue[0].m_pendingWork;
  69. // Dequeue work
  70. if(iterationCount == kMashPushTries)
  71. {
  72. const uint workItemCount = min(NUMTHREADS, head - tail);
  73. for(uint it = 0; it < workItemCount; ++it)
  74. {
  75. g_inWorkItems[it] = g_ringBuffer[(tail + it) & g_consts.m_ringBufferSizeMinusOne];
  76. }
  77. pendingWork += workItemCount;
  78. g_inWorkItemCount = workItemCount;
  79. tail += workItemCount;
  80. }
  81. // Push work
  82. if(outWorkItemCount > 0)
  83. {
  84. const bool full = (head - tail) + outWorkItemCount >= (g_consts.m_ringBufferSizeMinusOne + 1);
  85. pushSuccessful = !full;
  86. if(pushSuccessful)
  87. {
  88. for(uint i = 0; i < outWorkItemCount; ++i)
  89. {
  90. g_ringBuffer[(head + i) & g_consts.m_ringBufferSizeMinusOne] = g_outWorkItems[i];
  91. }
  92. head += outWorkItemCount;
  93. g_outWorkItemCount = 0;
  94. }
  95. }
  96. if(pushSuccessful)
  97. {
  98. pendingWork -= oldInWorkItemCount;
  99. g_bNoMoreWork = pendingWork == 0;
  100. }
  101. // Restore mem
  102. g_queue[0].m_head = head;
  103. g_queue[0].m_tail = tail;
  104. g_queue[0].m_pendingWork = pendingWork;
  105. UNLOCK(g_queue[0].m_spinlock);
  106. } while(!pushSuccessful && (iterationCount-- > 0));
  107. if(!pushSuccessful)
  108. {
  109. InterlockedAdd(g_finalResult[1], 1);
  110. g_bNoMoreWork = true;
  111. }
  112. }
  113. GroupMemoryBarrierWithGroupSync();
  114. // Do work
  115. if(g_bNoMoreWork)
  116. {
  117. // No more work
  118. break;
  119. }
  120. else if(svGroupIndex < g_inWorkItemCount)
  121. {
  122. const uint workItem = g_inWorkItems[svGroupIndex];
  123. const uint level = workItem >> 16u;
  124. const uint payload = workItem & 0xFFFFu;
  125. if(level == 0)
  126. {
  127. InterlockedAdd(g_finalResult[0], payload);
  128. }
  129. else
  130. {
  131. uint newWorkItem = (level - 1) << 16u;
  132. newWorkItem |= payload;
  133. uint slot;
  134. InterlockedAdd(g_outWorkItemCount, MAX_CHILDREN, slot);
  135. for(uint i = 0; i < MAX_CHILDREN; ++i)
  136. {
  137. g_outWorkItems[slot + i] = newWorkItem;
  138. }
  139. }
  140. }
  141. }
  142. }