JobManagerWg.hlsl 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #define MAX_CHILDREN 4
  6. #define NUMTHREADS 64
  7. StructuredBuffer<uint> g_initialWork : register(t0);
  8. RWStructuredBuffer<uint> g_finalResult : register(u0);
  9. #define ASSERT(x) \
  10. do \
  11. { \
  12. if(!(x)) \
  13. { \
  14. InterlockedAdd(g_finalResult[1], 1); \
  15. } \
  16. } while(0)
  17. struct FirstNodeInput
  18. {
  19. uint3 m_svDispatchGrid : SV_DispatchGrid;
  20. };
  21. struct SecondNodeInput
  22. {
  23. uint3 m_svDispatchGrid : SV_DispatchGrid;
  24. uint m_workItems[NUMTHREADS];
  25. uint m_workItemCount;
  26. };
  27. groupshared uint g_newWorkItemCount;
  28. [Shader("node")][NodeLaunch("broadcasting")][NodeIsProgramEntry][NodeMaxDispatchGrid(1, 1, 1)][numthreads(NUMTHREADS, 1, 1)] void
  29. main(DispatchNodeInputRecord<FirstNodeInput> input, uint svDispatchThreadId
  30. : SV_DispatchThreadId, uint svGroupIndex : SV_GROUPINDEX, [MaxRecords(MAX_CHILDREN)] NodeOutput<SecondNodeInput> secondNode)
  31. {
  32. if(svGroupIndex == 0)
  33. {
  34. g_newWorkItemCount = 0;
  35. }
  36. GroupMemoryBarrierWithGroupSync();
  37. uint count, stride;
  38. g_initialWork.GetDimensions(count, stride);
  39. uint newWorkItemCount = 0;
  40. uint newWorkItems[MAX_CHILDREN];
  41. uint firstOutputRecord = 0;
  42. if(svDispatchThreadId < count)
  43. {
  44. const uint workItem = g_initialWork[svDispatchThreadId];
  45. const uint level = workItem >> 16u;
  46. const uint payload = workItem & 0xFFFFu;
  47. if(level == 0)
  48. {
  49. InterlockedAdd(g_finalResult[0], payload);
  50. }
  51. else
  52. {
  53. uint newWorkItem = (level - 1) << 16u;
  54. newWorkItem |= payload;
  55. for(uint i = 0; i < MAX_CHILDREN; ++i)
  56. {
  57. newWorkItems[i] = newWorkItem;
  58. }
  59. InterlockedAdd(g_newWorkItemCount, MAX_CHILDREN, firstOutputRecord);
  60. newWorkItemCount = MAX_CHILDREN;
  61. }
  62. }
  63. GroupMemoryBarrierWithGroupSync();
  64. const uint recordCount = (g_newWorkItemCount + NUMTHREADS - 1) / NUMTHREADS;
  65. GroupNodeOutputRecords<SecondNodeInput> output = secondNode.GetGroupNodeOutputRecords(recordCount);
  66. if(recordCount)
  67. {
  68. for(uint i = 0; i < recordCount; ++i)
  69. {
  70. output[i].m_svDispatchGrid = 1;
  71. const uint begin = i * NUMTHREADS;
  72. const uint end = min((i + 1) * NUMTHREADS, g_newWorkItemCount);
  73. output[i].m_workItemCount = end - begin;
  74. }
  75. for(uint i = 0; i < newWorkItemCount; ++i)
  76. {
  77. const uint k = (firstOutputRecord + i) / NUMTHREADS;
  78. const uint l = (firstOutputRecord + i) % NUMTHREADS;
  79. output[k].m_workItems[l] = newWorkItems[i];
  80. }
  81. }
  82. output.OutputComplete();
  83. }
  84. static const int x = 0; // For formatting
  85. [Shader("node")][NodeLaunch("broadcasting")][numthreads(NUMTHREADS, 1, 1)][NodeDispatchGrid(1, 1, 1)][NodeMaxRecursionDepth(16)] void
  86. secondNode(DispatchNodeInputRecord<SecondNodeInput> input, [MaxRecords(MAX_CHILDREN)] NodeOutput<SecondNodeInput> secondNode,
  87. uint svGroupIndex : SV_GROUPINDEX)
  88. {
  89. if(svGroupIndex == 0)
  90. {
  91. g_newWorkItemCount = 0;
  92. }
  93. GroupMemoryBarrierWithGroupSync();
  94. uint newWorkItemCount = 0;
  95. uint newWorkItems[MAX_CHILDREN];
  96. uint firstOutputRecord = 0;
  97. if(svGroupIndex < input.Get().m_workItemCount)
  98. {
  99. const uint workItem = input.Get().m_workItems[svGroupIndex];
  100. const uint level = workItem >> 16u;
  101. const uint payload = workItem & 0xFFFFu;
  102. if(level == 0)
  103. {
  104. InterlockedAdd(g_finalResult[0], payload);
  105. }
  106. else
  107. {
  108. uint newWorkItem = (level - 1) << 16u;
  109. newWorkItem |= payload;
  110. for(uint i = 0; i < MAX_CHILDREN; ++i)
  111. {
  112. newWorkItems[i] = newWorkItem;
  113. }
  114. InterlockedAdd(g_newWorkItemCount, MAX_CHILDREN, firstOutputRecord);
  115. newWorkItemCount = MAX_CHILDREN;
  116. }
  117. }
  118. GroupMemoryBarrierWithGroupSync();
  119. const uint recordCount = (secondNode.IsValid()) ? (g_newWorkItemCount + NUMTHREADS - 1) / NUMTHREADS : 0;
  120. GroupNodeOutputRecords<SecondNodeInput> output = secondNode.GetGroupNodeOutputRecords(recordCount);
  121. if(recordCount)
  122. {
  123. for(uint i = 0; i < recordCount; ++i)
  124. {
  125. output[i].m_svDispatchGrid = 1;
  126. const uint begin = i * NUMTHREADS;
  127. const uint end = min((i + 1) * NUMTHREADS, g_newWorkItemCount);
  128. output[i].m_workItemCount = end - begin;
  129. }
  130. for(uint i = 0; i < newWorkItemCount; ++i)
  131. {
  132. const uint k = (firstOutputRecord + i) / NUMTHREADS;
  133. const uint l = (firstOutputRecord + i) % NUMTHREADS;
  134. output[k].m_workItems[l] = newWorkItems[i];
  135. }
  136. }
  137. output.OutputComplete();
  138. if(!secondNode.IsValid())
  139. {
  140. ASSERT(1);
  141. }
  142. }