| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291 |
- /*
- * Copyright 2010-2015 Branimir Karadzic. All rights reserved.
- * License: http://www.opensource.org/licenses/BSD-2-Clause
- */
- #ifndef BX_RADIXSORT_H_HEADER_GUARD
- #define BX_RADIXSORT_H_HEADER_GUARD
- #include "bx.h"
- namespace bx
- {
- #define BX_RADIXSORT_BITS 11
- #define BX_RADIXSORT_HISTOGRAM_SIZE (1<<BX_RADIXSORT_BITS)
- #define BX_RADIXSORT_BIT_MASK (BX_RADIXSORT_HISTOGRAM_SIZE-1)
- inline void radixSort32(uint32_t* __restrict _keys, uint32_t* __restrict _tempKeys, uint32_t _size)
- {
- uint32_t* __restrict keys = _keys;
- uint32_t* __restrict tempKeys = _tempKeys;
- uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
- uint16_t shift = 0;
- uint32_t pass = 0;
- for (; pass < 3; ++pass)
- {
- memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
- bool sorted = true;
- {
- uint32_t key = keys[0];
- uint32_t prevKey = key;
- for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
- {
- key = keys[ii];
- uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
- ++histogram[index];
- sorted &= prevKey <= key;
- }
- }
- if (sorted)
- {
- goto done;
- }
- uint32_t offset = 0;
- for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
- {
- uint32_t count = histogram[ii];
- histogram[ii] = offset;
- offset += count;
- }
- for (uint32_t ii = 0; ii < _size; ++ii)
- {
- uint32_t key = keys[ii];
- uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
- uint32_t dest = histogram[index]++;
- tempKeys[dest] = key;
- }
- uint32_t* swapKeys = tempKeys;
- tempKeys = keys;
- keys = swapKeys;
- shift += BX_RADIXSORT_BITS;
- }
- done:
- if (0 != (pass&1) )
- {
- // Odd number of passes needs to do copy to the destination.
- memcpy(_keys, _tempKeys, _size*sizeof(uint32_t) );
- }
- }
- template <typename Ty>
- inline void radixSort32(uint32_t* __restrict _keys, uint32_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size)
- {
- uint32_t* __restrict keys = _keys;
- uint32_t* __restrict tempKeys = _tempKeys;
- Ty* __restrict values = _values;
- Ty* __restrict tempValues = _tempValues;
- uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
- uint16_t shift = 0;
- uint32_t pass = 0;
- for (; pass < 3; ++pass)
- {
- memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
- bool sorted = true;
- {
- uint32_t key = keys[0];
- uint32_t prevKey = key;
- for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
- {
- key = keys[ii];
- uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
- ++histogram[index];
- sorted &= prevKey <= key;
- }
- }
- if (sorted)
- {
- goto done;
- }
- uint32_t offset = 0;
- for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
- {
- uint32_t count = histogram[ii];
- histogram[ii] = offset;
- offset += count;
- }
- for (uint32_t ii = 0; ii < _size; ++ii)
- {
- uint32_t key = keys[ii];
- uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
- uint32_t dest = histogram[index]++;
- tempKeys[dest] = key;
- tempValues[dest] = values[ii];
- }
- uint32_t* swapKeys = tempKeys;
- tempKeys = keys;
- keys = swapKeys;
- Ty* swapValues = tempValues;
- tempValues = values;
- values = swapValues;
- shift += BX_RADIXSORT_BITS;
- }
- done:
- if (0 != (pass&1) )
- {
- // Odd number of passes needs to do copy to the destination.
- memcpy(_keys, _tempKeys, _size*sizeof(uint32_t) );
- for (uint32_t ii = 0; ii < _size; ++ii)
- {
- _values[ii] = _tempValues[ii];
- }
- }
- }
- inline void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, uint32_t _size)
- {
- uint64_t* __restrict keys = _keys;
- uint64_t* __restrict tempKeys = _tempKeys;
- uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
- uint16_t shift = 0;
- uint32_t pass = 0;
- for (; pass < 6; ++pass)
- {
- memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
- bool sorted = true;
- {
- uint64_t key = keys[0];
- uint64_t prevKey = key;
- for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
- {
- key = keys[ii];
- uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
- ++histogram[index];
- sorted &= prevKey <= key;
- }
- }
- if (sorted)
- {
- goto done;
- }
- uint32_t offset = 0;
- for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
- {
- uint32_t count = histogram[ii];
- histogram[ii] = offset;
- offset += count;
- }
- for (uint32_t ii = 0; ii < _size; ++ii)
- {
- uint64_t key = keys[ii];
- uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
- uint32_t dest = histogram[index]++;
- tempKeys[dest] = key;
- }
- uint64_t* swapKeys = tempKeys;
- tempKeys = keys;
- keys = swapKeys;
- shift += BX_RADIXSORT_BITS;
- }
- done:
- if (0 != (pass&1) )
- {
- // Odd number of passes needs to do copy to the destination.
- memcpy(_keys, _tempKeys, _size*sizeof(uint64_t) );
- }
- }
- template <typename Ty>
- inline void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size)
- {
- uint64_t* __restrict keys = _keys;
- uint64_t* __restrict tempKeys = _tempKeys;
- Ty* __restrict values = _values;
- Ty* __restrict tempValues = _tempValues;
- uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
- uint16_t shift = 0;
- uint32_t pass = 0;
- for (; pass < 6; ++pass)
- {
- memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
- bool sorted = true;
- {
- uint64_t key = keys[0];
- uint64_t prevKey = key;
- for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
- {
- key = keys[ii];
- uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
- ++histogram[index];
- sorted &= prevKey <= key;
- }
- }
- if (sorted)
- {
- goto done;
- }
- uint32_t offset = 0;
- for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
- {
- uint32_t count = histogram[ii];
- histogram[ii] = offset;
- offset += count;
- }
- for (uint32_t ii = 0; ii < _size; ++ii)
- {
- uint64_t key = keys[ii];
- uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
- uint32_t dest = histogram[index]++;
- tempKeys[dest] = key;
- tempValues[dest] = values[ii];
- }
- uint64_t* swapKeys = tempKeys;
- tempKeys = keys;
- keys = swapKeys;
- Ty* swapValues = tempValues;
- tempValues = values;
- values = swapValues;
- shift += BX_RADIXSORT_BITS;
- }
- done:
- if (0 != (pass&1) )
- {
- // Odd number of passes needs to do copy to the destination.
- memcpy(_keys, _tempKeys, _size*sizeof(uint64_t) );
- for (uint32_t ii = 0; ii < _size; ++ii)
- {
- _values[ii] = _tempValues[ii];
- }
- }
- }
- #undef BX_RADIXSORT_BITS
- #undef BX_RADIXSORT_HISTOGRAM_SIZE
- #undef BX_RADIXSORT_BIT_MASK
- } // namespace bx
- #endif // BX_RADIXSORT_H_HEADER_GUARD
|