IndirectDispatch.azsl 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include "IndirectRendering.azsli"
  9. #include <Atom/Features/IndirectRendering.azsli>
  10. #define ThreadBlockSize 128
  11. ShaderResourceGroupSemantic SRG_Frequency1
  12. {
  13. FrequencyId = 1;
  14. };
  15. ShaderResourceGroupSemantic SRG_Frequency2
  16. {
  17. FrequencyId = 2;
  18. };
  19. ShaderResourceGroupSemantic SRG_Frequency3
  20. {
  21. FrequencyId = 3;
  22. };
  23. ShaderResourceGroup CullSrg : SRG_Frequency1
  24. {
  25. float2 m_cullOffset; // The culling plane offset in homogenous space.
  26. uint m_inNumCommands;
  27. uint m_maxDrawIndirectCount;
  28. RWStructuredBuffer<uint> m_outNumCommands;
  29. };
  30. // This SRG contains the commands when using SequenceType::Draw option.
  31. ShaderResourceGroup IndirectDrawCommandsSrg : SRG_Frequency2
  32. {
  33. // Commands in one sequence
  34. struct IndirectCommandSequence
  35. {
  36. DrawIndexedIndirectCommand m_drawCommand;
  37. };
  38. StructuredBuffer<IndirectCommandSequence> m_inputCommands;
  39. RWStructuredBuffer<IndirectCommandSequence> m_outputCommands;
  40. // Copy a command from the input list or clear it's value to 0.
  41. void SetCommand(uint srcIndex, uint destIndex, bool clear = false)
  42. {
  43. if(clear)
  44. {
  45. m_outputCommands[destIndex] = (IndirectCommandSequence)0;
  46. }
  47. else
  48. {
  49. m_outputCommands[destIndex] = m_inputCommands[srcIndex];
  50. }
  51. }
  52. };
  53. // This SRG contains the commands when using SequenceType::IAInlineConstDraw option.
  54. ShaderResourceGroup IndirectIAInlineConstCommandsSrg : SRG_Frequency3
  55. {
  56. // Commands in one sequence
  57. struct IndirectCommandSequence
  58. {
  59. uint m_rootConstantsCommand;
  60. VertexViewIndirectCommand m_vertexCommand;
  61. IndexViewIndirectCommand m_indexCommand;
  62. DrawIndexedIndirectCommand m_drawCommand;
  63. };
  64. StructuredBuffer<IndirectCommandSequence> m_inputCommands;
  65. RWStructuredBuffer<IndirectCommandSequence> m_outputCommands;
  66. // Copy a command using from the input list or clear it's value to 0.
  67. void SetCommand(uint srcIndex, uint destIndex, bool clear = false)
  68. {
  69. if(clear)
  70. {
  71. m_outputCommands[destIndex] = (IndirectCommandSequence)0;
  72. }
  73. else
  74. {
  75. m_outputCommands[destIndex] = m_inputCommands[srcIndex];
  76. }
  77. }
  78. };
  79. option enum class SequenceType { Draw, IAInlineConstDraw} o_sequenceType = SequenceType::Draw;
  80. option bool o_countBufferSupported = false;
  81. [numthreads(ThreadBlockSize, 1, 1)]
  82. void MainCS(uint3 groupId : SV_GroupID, uint groupIndex : SV_GroupIndex)
  83. {
  84. // Each thread of the CS operates on one of the indirect commands.
  85. uint index = (groupId.x * ThreadBlockSize) + groupIndex;
  86. if(index < CullSrg::m_inNumCommands)
  87. {
  88. // We cull only in the X axis.
  89. // Calculate the left and right limits of the cull area.
  90. float4 left = mul(IndirectSceneSrg::m_matrix, float4(TransformInstancePos(float3(-1.0, 0, 0), IndirectSceneSrg::m_instancesData[index]), 1.0));
  91. left /= left.w;
  92. float4 right = mul(IndirectSceneSrg::m_matrix, float4(TransformInstancePos(float3(1.0, 0, 0), IndirectSceneSrg::m_instancesData[index]), 1.0));
  93. right /= right.w;
  94. uint outputIndex = index;
  95. bool setCommand = o_countBufferSupported ? false : true;
  96. bool clearCommand = true;
  97. // Check if we need to cull the primitive.
  98. if (CullSrg::m_cullOffset.x < right.x && left.x < CullSrg::m_cullOffset.y)
  99. {
  100. setCommand = true;
  101. clearCommand = false;
  102. // If count buffer is not supported, the output index is the same as the input index
  103. // and we just set the number of vertices to draw to 0 so no triangles are rendered.
  104. // If it is supported, the output index is calculated from the count buffer.
  105. if (o_countBufferSupported)
  106. {
  107. // The current value of the count buffer will be our output index. This value start with 0
  108. // and is increment by 1 after each thread writes a new command.
  109. // If multi indirect is not supported, then we need to divide the total draw calls in
  110. // multiple "groups". The count buffer will contain multiple values, one for each of these
  111. // groups, and we just apply an offset to the count buffer when submitting the indirect draw in CPU.
  112. uint i = 0;
  113. do
  114. {
  115. // Increment and retrieve the number atomically to get the output index.
  116. // If the value is over the allowed limit, we need to move to the next
  117. // "group". Since we already called InterlockedAdd (and incremented the value),
  118. // the count for the group will be greater than the allowed limit, but since the Indirect Draw
  119. // call on CPU will limit the max number of operations, it doesn't matter that the count
  120. // buffer value is over the limit.
  121. InterlockedAdd(CullSrg::m_outNumCommands[i++], uint(1), outputIndex);
  122. } while (outputIndex >= CullSrg::m_maxDrawIndirectCount);
  123. outputIndex = (i - 1) * CullSrg::m_maxDrawIndirectCount + outputIndex;
  124. }
  125. }
  126. if(setCommand)
  127. {
  128. // Copy or clear the output command
  129. switch(o_sequenceType)
  130. {
  131. case SequenceType::Draw:
  132. IndirectDrawCommandsSrg::SetCommand(index, outputIndex, clearCommand);
  133. break;
  134. case SequenceType::IAInlineConstDraw:
  135. IndirectIAInlineConstCommandsSrg::SetCommand(index, outputIndex, clearCommand);
  136. break;
  137. }
  138. }
  139. }
  140. }