radixsort.h 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. * Copyright 2010-2015 Branimir Karadzic. All rights reserved.
  3. * License: http://www.opensource.org/licenses/BSD-2-Clause
  4. */
  5. #ifndef BX_RADIXSORT_H_HEADER_GUARD
  6. #define BX_RADIXSORT_H_HEADER_GUARD
  7. #include "bx.h"
  8. namespace bx
  9. {
  10. #define BX_RADIXSORT_BITS 11
  11. #define BX_RADIXSORT_HISTOGRAM_SIZE (1<<BX_RADIXSORT_BITS)
  12. #define BX_RADIXSORT_BIT_MASK (BX_RADIXSORT_HISTOGRAM_SIZE-1)
  13. inline void radixSort32(uint32_t* __restrict _keys, uint32_t* __restrict _tempKeys, uint32_t _size)
  14. {
  15. uint32_t* __restrict keys = _keys;
  16. uint32_t* __restrict tempKeys = _tempKeys;
  17. uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
  18. uint16_t shift = 0;
  19. uint32_t pass = 0;
  20. for (; pass < 3; ++pass)
  21. {
  22. memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
  23. bool sorted = true;
  24. {
  25. uint32_t key = keys[0];
  26. uint32_t prevKey = key;
  27. for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
  28. {
  29. key = keys[ii];
  30. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  31. ++histogram[index];
  32. sorted &= prevKey <= key;
  33. }
  34. }
  35. if (sorted)
  36. {
  37. goto done;
  38. }
  39. uint32_t offset = 0;
  40. for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
  41. {
  42. uint32_t count = histogram[ii];
  43. histogram[ii] = offset;
  44. offset += count;
  45. }
  46. for (uint32_t ii = 0; ii < _size; ++ii)
  47. {
  48. uint32_t key = keys[ii];
  49. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  50. uint32_t dest = histogram[index]++;
  51. tempKeys[dest] = key;
  52. }
  53. uint32_t* swapKeys = tempKeys;
  54. tempKeys = keys;
  55. keys = swapKeys;
  56. shift += BX_RADIXSORT_BITS;
  57. }
  58. done:
  59. if (0 != (pass&1) )
  60. {
  61. // Odd number of passes needs to do copy to the destination.
  62. memcpy(_keys, _tempKeys, _size*sizeof(uint32_t) );
  63. }
  64. }
  65. template <typename Ty>
  66. inline void radixSort32(uint32_t* __restrict _keys, uint32_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size)
  67. {
  68. uint32_t* __restrict keys = _keys;
  69. uint32_t* __restrict tempKeys = _tempKeys;
  70. Ty* __restrict values = _values;
  71. Ty* __restrict tempValues = _tempValues;
  72. uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
  73. uint16_t shift = 0;
  74. uint32_t pass = 0;
  75. for (; pass < 3; ++pass)
  76. {
  77. memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
  78. bool sorted = true;
  79. {
  80. uint32_t key = keys[0];
  81. uint32_t prevKey = key;
  82. for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
  83. {
  84. key = keys[ii];
  85. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  86. ++histogram[index];
  87. sorted &= prevKey <= key;
  88. }
  89. }
  90. if (sorted)
  91. {
  92. goto done;
  93. }
  94. uint32_t offset = 0;
  95. for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
  96. {
  97. uint32_t count = histogram[ii];
  98. histogram[ii] = offset;
  99. offset += count;
  100. }
  101. for (uint32_t ii = 0; ii < _size; ++ii)
  102. {
  103. uint32_t key = keys[ii];
  104. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  105. uint32_t dest = histogram[index]++;
  106. tempKeys[dest] = key;
  107. tempValues[dest] = values[ii];
  108. }
  109. uint32_t* swapKeys = tempKeys;
  110. tempKeys = keys;
  111. keys = swapKeys;
  112. Ty* swapValues = tempValues;
  113. tempValues = values;
  114. values = swapValues;
  115. shift += BX_RADIXSORT_BITS;
  116. }
  117. done:
  118. if (0 != (pass&1) )
  119. {
  120. // Odd number of passes needs to do copy to the destination.
  121. memcpy(_keys, _tempKeys, _size*sizeof(uint32_t) );
  122. for (uint32_t ii = 0; ii < _size; ++ii)
  123. {
  124. _values[ii] = _tempValues[ii];
  125. }
  126. }
  127. }
  128. inline void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, uint32_t _size)
  129. {
  130. uint64_t* __restrict keys = _keys;
  131. uint64_t* __restrict tempKeys = _tempKeys;
  132. uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
  133. uint16_t shift = 0;
  134. uint32_t pass = 0;
  135. for (; pass < 6; ++pass)
  136. {
  137. memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
  138. bool sorted = true;
  139. {
  140. uint64_t key = keys[0];
  141. uint64_t prevKey = key;
  142. for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
  143. {
  144. key = keys[ii];
  145. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  146. ++histogram[index];
  147. sorted &= prevKey <= key;
  148. }
  149. }
  150. if (sorted)
  151. {
  152. goto done;
  153. }
  154. uint32_t offset = 0;
  155. for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
  156. {
  157. uint32_t count = histogram[ii];
  158. histogram[ii] = offset;
  159. offset += count;
  160. }
  161. for (uint32_t ii = 0; ii < _size; ++ii)
  162. {
  163. uint64_t key = keys[ii];
  164. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  165. uint32_t dest = histogram[index]++;
  166. tempKeys[dest] = key;
  167. }
  168. uint64_t* swapKeys = tempKeys;
  169. tempKeys = keys;
  170. keys = swapKeys;
  171. shift += BX_RADIXSORT_BITS;
  172. }
  173. done:
  174. if (0 != (pass&1) )
  175. {
  176. // Odd number of passes needs to do copy to the destination.
  177. memcpy(_keys, _tempKeys, _size*sizeof(uint64_t) );
  178. }
  179. }
  180. template <typename Ty>
  181. inline void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size)
  182. {
  183. uint64_t* __restrict keys = _keys;
  184. uint64_t* __restrict tempKeys = _tempKeys;
  185. Ty* __restrict values = _values;
  186. Ty* __restrict tempValues = _tempValues;
  187. uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
  188. uint16_t shift = 0;
  189. uint32_t pass = 0;
  190. for (; pass < 6; ++pass)
  191. {
  192. memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
  193. bool sorted = true;
  194. {
  195. uint64_t key = keys[0];
  196. uint64_t prevKey = key;
  197. for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
  198. {
  199. key = keys[ii];
  200. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  201. ++histogram[index];
  202. sorted &= prevKey <= key;
  203. }
  204. }
  205. if (sorted)
  206. {
  207. goto done;
  208. }
  209. uint32_t offset = 0;
  210. for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
  211. {
  212. uint32_t count = histogram[ii];
  213. histogram[ii] = offset;
  214. offset += count;
  215. }
  216. for (uint32_t ii = 0; ii < _size; ++ii)
  217. {
  218. uint64_t key = keys[ii];
  219. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  220. uint32_t dest = histogram[index]++;
  221. tempKeys[dest] = key;
  222. tempValues[dest] = values[ii];
  223. }
  224. uint64_t* swapKeys = tempKeys;
  225. tempKeys = keys;
  226. keys = swapKeys;
  227. Ty* swapValues = tempValues;
  228. tempValues = values;
  229. values = swapValues;
  230. shift += BX_RADIXSORT_BITS;
  231. }
  232. done:
  233. if (0 != (pass&1) )
  234. {
  235. // Odd number of passes needs to do copy to the destination.
  236. memcpy(_keys, _tempKeys, _size*sizeof(uint64_t) );
  237. for (uint32_t ii = 0; ii < _size; ++ii)
  238. {
  239. _values[ii] = _tempValues[ii];
  240. }
  241. }
  242. }
  243. #undef BX_RADIXSORT_BITS
  244. #undef BX_RADIXSORT_HISTOGRAM_SIZE
  245. #undef BX_RADIXSORT_BIT_MASK
  246. } // namespace bx
  247. #endif // BX_RADIXSORT_H_HEADER_GUARD