sort.glsl 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. #[compute]
  2. #version 450
  3. VERSION_DEFINES
  4. // Original version here:
  5. // https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11/blob/master/gpuparticles11/src/Shaders
  6. //
  7. // Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining a copy
  10. // of this software and associated documentation files (the "Software"), to deal
  11. // in the Software without restriction, including without limitation the rights
  12. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  13. // copies of the Software, and to permit persons to whom the Software is
  14. // furnished to do so, subject to the following conditions:
  15. //
  16. // The above copyright notice and this permission notice shall be included in
  17. // all copies or substantial portions of the Software.
  18. //
  19. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  25. // THE SOFTWARE.
  26. //
  27. #define SORT_SIZE 512
  28. #define NUM_THREADS (SORT_SIZE / 2)
  29. #define INVERSION (16 * 2 + 8 * 3)
  30. #define ITERATIONS 1
  31. layout(local_size_x = NUM_THREADS, local_size_y = 1, local_size_z = 1) in;
  32. #ifndef MODE_SORT_STEP
  33. shared vec2 g_LDS[SORT_SIZE];
  34. #endif
  35. layout(set = 1, binding = 0, std430) restrict buffer SortBuffer {
  36. vec2 data[];
  37. }
  38. sort_buffer;
  39. layout(push_constant, binding = 0, std430) uniform Params {
  40. uint total_elements;
  41. uint pad[3];
  42. ivec4 job_params;
  43. }
  44. params;
  45. void main() {
  46. #ifdef MODE_SORT_BLOCK
  47. uvec3 Gid = gl_WorkGroupID;
  48. uvec3 DTid = gl_GlobalInvocationID;
  49. uvec3 GTid = gl_LocalInvocationID;
  50. uint GI = gl_LocalInvocationIndex;
  51. int GlobalBaseIndex = int((Gid.x * SORT_SIZE) + GTid.x);
  52. int LocalBaseIndex = int(GI);
  53. int numElementsInThreadGroup = int(min(SORT_SIZE, params.total_elements - (Gid.x * SORT_SIZE)));
  54. // Load shared data
  55. int i;
  56. for (i = 0; i < 2 * ITERATIONS; ++i) {
  57. if (GI + i * NUM_THREADS < numElementsInThreadGroup)
  58. g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
  59. }
  60. groupMemoryBarrier();
  61. barrier();
  62. // Bitonic sort
  63. for (int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2) {
  64. for (int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) {
  65. for (i = 0; i < ITERATIONS; ++i) {
  66. int tmp_index = int(GI + NUM_THREADS * i);
  67. int index_low = tmp_index & (nMergeSubSize - 1);
  68. int index_high = 2 * (tmp_index - index_low);
  69. int index = index_high + index_low;
  70. int nSwapElem = nMergeSubSize == nMergeSize >> 1 ? index_high + (2 * nMergeSubSize - 1) - index_low : index_high + nMergeSubSize + index_low;
  71. if (nSwapElem < numElementsInThreadGroup) {
  72. vec2 a = g_LDS[index];
  73. vec2 b = g_LDS[nSwapElem];
  74. if (a.x > b.x) {
  75. g_LDS[index] = b;
  76. g_LDS[nSwapElem] = a;
  77. }
  78. }
  79. groupMemoryBarrier();
  80. barrier();
  81. }
  82. }
  83. }
  84. // Store shared data
  85. for (i = 0; i < 2 * ITERATIONS; ++i) {
  86. if (GI + i * NUM_THREADS < numElementsInThreadGroup) {
  87. sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS];
  88. }
  89. }
  90. #endif
  91. #ifdef MODE_SORT_STEP
  92. uvec3 Gid = gl_WorkGroupID;
  93. uvec3 GTid = gl_LocalInvocationID;
  94. ivec4 tgp;
  95. tgp.x = int(Gid.x) * 256;
  96. tgp.y = 0;
  97. tgp.z = int(params.total_elements);
  98. tgp.w = min(512, max(0, tgp.z - int(Gid.x) * 512));
  99. uint localID = int(tgp.x) + GTid.x; // calculate threadID within this sortable-array
  100. uint index_low = localID & (params.job_params.x - 1);
  101. uint index_high = 2 * (localID - index_low);
  102. uint index = tgp.y + index_high + index_low;
  103. uint nSwapElem = tgp.y + index_high + params.job_params.y + params.job_params.z * index_low;
  104. if (nSwapElem < tgp.y + tgp.z) {
  105. vec2 a = sort_buffer.data[index];
  106. vec2 b = sort_buffer.data[nSwapElem];
  107. if (a.x > b.x) {
  108. sort_buffer.data[index] = b;
  109. sort_buffer.data[nSwapElem] = a;
  110. }
  111. }
  112. #endif
  113. #ifdef MODE_SORT_INNER
  114. uvec3 Gid = gl_WorkGroupID;
  115. uvec3 DTid = gl_GlobalInvocationID;
  116. uvec3 GTid = gl_LocalInvocationID;
  117. uint GI = gl_LocalInvocationIndex;
  118. ivec4 tgp;
  119. tgp.x = int(Gid.x * 256);
  120. tgp.y = 0;
  121. tgp.z = int(params.total_elements.x);
  122. tgp.w = int(min(512, max(0, params.total_elements - Gid.x * 512)));
  123. int GlobalBaseIndex = int(tgp.y + tgp.x * 2 + GTid.x);
  124. int LocalBaseIndex = int(GI);
  125. int i;
  126. // Load shared data
  127. for (i = 0; i < 2; ++i) {
  128. if (GI + i * NUM_THREADS < tgp.w)
  129. g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
  130. }
  131. groupMemoryBarrier();
  132. barrier();
  133. // sort threadgroup shared memory
  134. for (int nMergeSubSize = SORT_SIZE >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) {
  135. int tmp_index = int(GI);
  136. int index_low = tmp_index & (nMergeSubSize - 1);
  137. int index_high = 2 * (tmp_index - index_low);
  138. int index = index_high + index_low;
  139. int nSwapElem = index_high + nMergeSubSize + index_low;
  140. if (nSwapElem < tgp.w) {
  141. vec2 a = g_LDS[index];
  142. vec2 b = g_LDS[nSwapElem];
  143. if (a.x > b.x) {
  144. g_LDS[index] = b;
  145. g_LDS[nSwapElem] = a;
  146. }
  147. }
  148. groupMemoryBarrier();
  149. barrier();
  150. }
  151. // Store shared data
  152. for (i = 0; i < 2; ++i) {
  153. if (GI + i * NUM_THREADS < tgp.w) {
  154. sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS];
  155. }
  156. }
  157. #endif
  158. }