radixsort.h 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. /*
  2. * Copyright 2010-2015 Branimir Karadzic. All rights reserved.
  3. * License: http://www.opensource.org/licenses/BSD-2-Clause
  4. */
  5. #ifndef BX_RADIXSORT_H_HEADER_GUARD
  6. #define BX_RADIXSORT_H_HEADER_GUARD
  7. #include "bx.h"
  8. namespace bx
  9. {
  10. #define BX_RADIXSORT_BITS 11
  11. #define BX_RADIXSORT_HISTOGRAM_SIZE (1<<BX_RADIXSORT_BITS)
  12. #define BX_RADIXSORT_BIT_MASK (BX_RADIXSORT_HISTOGRAM_SIZE-1)
  13. template <typename Ty>
  14. void radixSort32(uint32_t* __restrict _keys, uint32_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size)
  15. {
  16. uint32_t* __restrict keys = _keys;
  17. uint32_t* __restrict tempKeys = _tempKeys;
  18. Ty* __restrict values = _values;
  19. Ty* __restrict tempValues = _tempValues;
  20. uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
  21. uint16_t shift = 0;
  22. uint32_t pass = 0;
  23. for (; pass < 3; ++pass)
  24. {
  25. memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
  26. bool sorted = true;
  27. uint32_t key = keys[0];
  28. uint32_t prevKey = key;
  29. for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
  30. {
  31. key = keys[ii];
  32. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  33. ++histogram[index];
  34. sorted &= prevKey <= key;
  35. }
  36. if (sorted)
  37. {
  38. goto done;
  39. }
  40. uint16_t offset = 0;
  41. for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
  42. {
  43. uint16_t count = histogram[ii];
  44. histogram[ii] = offset;
  45. offset += count;
  46. }
  47. for (uint32_t ii = 0; ii < _size; ++ii)
  48. {
  49. uint32_t key = keys[ii];
  50. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  51. uint16_t dest = histogram[index]++;
  52. tempKeys[dest] = key;
  53. tempValues[dest] = values[ii];
  54. }
  55. uint32_t* swapKeys = tempKeys;
  56. tempKeys = keys;
  57. keys = swapKeys;
  58. Ty* swapValues = tempValues;
  59. tempValues = values;
  60. values = swapValues;
  61. shift += BX_RADIXSORT_BITS;
  62. }
  63. done:
  64. if (0 != (pass&1) )
  65. {
  66. // Odd number of passes needs to do copy to the destination.
  67. memcpy(_keys, _tempKeys, _size*sizeof(uint32_t) );
  68. for (uint32_t ii = 0; ii < _size; ++ii)
  69. {
  70. _values[ii] = _tempValues[ii];
  71. }
  72. }
  73. }
  74. template <typename Ty>
  75. void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size)
  76. {
  77. uint64_t* __restrict keys = _keys;
  78. uint64_t* __restrict tempKeys = _tempKeys;
  79. Ty* __restrict values = _values;
  80. Ty* __restrict tempValues = _tempValues;
  81. uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
  82. uint16_t shift = 0;
  83. uint32_t pass = 0;
  84. for (; pass < 6; ++pass)
  85. {
  86. memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
  87. bool sorted = true;
  88. uint64_t key = keys[0];
  89. uint64_t prevKey = key;
  90. for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
  91. {
  92. key = keys[ii];
  93. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  94. ++histogram[index];
  95. sorted &= prevKey <= key;
  96. }
  97. if (sorted)
  98. {
  99. goto done;
  100. }
  101. uint16_t offset = 0;
  102. for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
  103. {
  104. uint16_t count = histogram[ii];
  105. histogram[ii] = offset;
  106. offset += count;
  107. }
  108. for (uint32_t ii = 0; ii < _size; ++ii)
  109. {
  110. uint64_t key = keys[ii];
  111. uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
  112. uint16_t dest = histogram[index]++;
  113. tempKeys[dest] = key;
  114. tempValues[dest] = values[ii];
  115. }
  116. uint64_t* swapKeys = tempKeys;
  117. tempKeys = keys;
  118. keys = swapKeys;
  119. Ty* swapValues = tempValues;
  120. tempValues = values;
  121. values = swapValues;
  122. shift += BX_RADIXSORT_BITS;
  123. }
  124. done:
  125. if (0 != (pass&1) )
  126. {
  127. // Odd number of passes needs to do copy to the destination.
  128. memcpy(_keys, _tempKeys, _size*sizeof(uint64_t) );
  129. for (uint32_t ii = 0; ii < _size; ++ii)
  130. {
  131. _values[ii] = _tempValues[ii];
  132. }
  133. }
  134. }
  135. #undef BX_RADIXSORT_BITS
  136. #undef BX_RADIXSORT_HISTOGRAM_SIZE
  137. #undef BX_RADIXSORT_BIT_MASK
  138. } // namespace bx
  139. #endif // BX_RADIXSORT_H_HEADER_GUARD