cpu.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. /*
  2. * Copyright 2010-2015 Branimir Karadzic. All rights reserved.
  3. * License: http://www.opensource.org/licenses/BSD-2-Clause
  4. */
  5. #ifndef BX_CPU_H_HEADER_GUARD
  6. #define BX_CPU_H_HEADER_GUARD
  7. #include "bx.h"
  8. #if BX_COMPILER_MSVC
  9. # if BX_PLATFORM_XBOX360
  10. # include <ppcintrinsics.h>
  11. # include <xtl.h>
  12. # else
  13. # include <math.h> // math.h is included because VS bitches:
  14. // warning C4985: 'ceil': attributes not present on previous declaration.
  15. // must be included before intrin.h.
  16. # include <intrin.h>
  17. # include <windows.h>
  18. # endif // !BX_PLATFORM_XBOX360
  19. extern "C" void _ReadBarrier();
  20. extern "C" void _WriteBarrier();
  21. extern "C" void _ReadWriteBarrier();
  22. # pragma intrinsic(_ReadBarrier)
  23. # pragma intrinsic(_WriteBarrier)
  24. # pragma intrinsic(_ReadWriteBarrier)
  25. # pragma intrinsic(_InterlockedExchangeAdd)
  26. # pragma intrinsic(_InterlockedCompareExchange)
  27. #endif // BX_COMPILER_MSVC
  28. namespace bx
  29. {
  30. ///
  31. inline void readBarrier()
  32. {
  33. #if BX_COMPILER_MSVC
  34. _ReadBarrier();
  35. #else
  36. asm volatile("":::"memory");
  37. #endif // BX_COMPILER
  38. }
  39. ///
  40. inline void writeBarrier()
  41. {
  42. #if BX_COMPILER_MSVC
  43. _WriteBarrier();
  44. #else
  45. asm volatile("":::"memory");
  46. #endif // BX_COMPILER
  47. }
  48. ///
  49. inline void readWriteBarrier()
  50. {
  51. #if BX_COMPILER_MSVC
  52. _ReadWriteBarrier();
  53. #else
  54. asm volatile("":::"memory");
  55. #endif // BX_COMPILER
  56. }
  57. ///
  58. inline void memoryBarrier()
  59. {
  60. #if BX_PLATFORM_XBOX360
  61. __lwsync();
  62. #elif BX_PLATFORM_WINRT
  63. MemoryBarrier();
  64. #elif BX_COMPILER_MSVC
  65. _mm_mfence();
  66. #else
  67. __sync_synchronize();
  68. // asm volatile("mfence":::"memory");
  69. #endif // BX_COMPILER
  70. }
  71. template<typename Ty>
  72. inline Ty atomicFetchAndAdd(volatile Ty* _ptr, Ty _value);
  73. template<typename Ty>
  74. inline Ty atomicAddAndFetch(volatile Ty* _ptr, Ty _value);
  75. template<typename Ty>
  76. inline Ty atomicFetchAndSub(volatile Ty* _ptr, Ty _value);
  77. template<typename Ty>
  78. inline Ty atomicSubAndFetch(volatile Ty* _ptr, Ty _value);
  79. template<typename Ty>
  80. inline Ty atomicCompareAndSwap(volatile void* _ptr, Ty _old, Ty _new);
  81. template<>
  82. inline int32_t atomicCompareAndSwap(volatile void* _ptr, int32_t _old, int32_t _new);
  83. template<>
  84. inline int64_t atomicCompareAndSwap(volatile void* _ptr, int64_t _old, int64_t _new);
  85. template<>
  86. inline int32_t atomicFetchAndAdd<int32_t>(volatile int32_t* _ptr, int32_t _add)
  87. {
  88. #if BX_COMPILER_MSVC
  89. return _InterlockedExchangeAdd( (volatile long*)_ptr, _add);
  90. #else
  91. return __sync_fetch_and_add(_ptr, _add);
  92. #endif // BX_COMPILER_
  93. }
  94. template<>
  95. inline int64_t atomicFetchAndAdd<int64_t>(volatile int64_t* _ptr, int64_t _add)
  96. {
  97. #if BX_COMPILER_MSVC
  98. # if _WIN32_WINNT >= 0x600
  99. return _InterlockedExchangeAdd64( (volatile int64_t*)_ptr, _add);
  100. # else
  101. int64_t oldVal;
  102. int64_t newVal = *(int64_t volatile*)_ptr;
  103. do
  104. {
  105. oldVal = newVal;
  106. newVal = atomicCompareAndSwap(_ptr, oldVal, newVal + _add);
  107. } while (oldVal != newVal);
  108. return oldVal;
  109. # endif
  110. #else
  111. return __sync_fetch_and_add(_ptr, _add);
  112. #endif // BX_COMPILER_
  113. }
  114. template<>
  115. inline uint32_t atomicFetchAndAdd<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
  116. {
  117. return uint32_t(atomicFetchAndAdd<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
  118. }
  119. template<>
  120. inline uint64_t atomicFetchAndAdd<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
  121. {
  122. return uint64_t(atomicFetchAndAdd<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
  123. }
  124. template<>
  125. inline int32_t atomicAddAndFetch<int32_t>(volatile int32_t* _ptr, int32_t _add)
  126. {
  127. #if BX_COMPILER_MSVC
  128. return atomicFetchAndAdd(_ptr, _add) + _add;
  129. #else
  130. return __sync_add_and_fetch(_ptr, _add);
  131. #endif // BX_COMPILER_
  132. }
  133. template<>
  134. inline int64_t atomicAddAndFetch<int64_t>(volatile int64_t* _ptr, int64_t _add)
  135. {
  136. #if BX_COMPILER_MSVC
  137. return atomicFetchAndAdd(_ptr, _add) + _add;
  138. #else
  139. return __sync_add_and_fetch(_ptr, _add);
  140. #endif // BX_COMPILER_
  141. }
  142. template<>
  143. inline uint32_t atomicAddAndFetch<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
  144. {
  145. return uint32_t(atomicAddAndFetch<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
  146. }
  147. template<>
  148. inline uint64_t atomicAddAndFetch<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
  149. {
  150. return uint64_t(atomicAddAndFetch<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
  151. }
  152. template<>
  153. inline int32_t atomicFetchAndSub<int32_t>(volatile int32_t* _ptr, int32_t _sub)
  154. {
  155. #if BX_COMPILER_MSVC
  156. return atomicFetchAndAdd(_ptr, -_sub);
  157. #else
  158. return __sync_fetch_and_sub(_ptr, _sub);
  159. #endif // BX_COMPILER_
  160. }
  161. template<>
  162. inline int64_t atomicFetchAndSub<int64_t>(volatile int64_t* _ptr, int64_t _sub)
  163. {
  164. #if BX_COMPILER_MSVC
  165. return atomicFetchAndAdd(_ptr, -_sub);
  166. #else
  167. return __sync_fetch_and_sub(_ptr, _sub);
  168. #endif // BX_COMPILER_
  169. }
  170. template<>
  171. inline uint32_t atomicFetchAndSub<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
  172. {
  173. return uint32_t(atomicFetchAndSub<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
  174. }
  175. template<>
  176. inline uint64_t atomicFetchAndSub<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
  177. {
  178. return uint64_t(atomicFetchAndSub<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
  179. }
  180. template<>
  181. inline int32_t atomicSubAndFetch<int32_t>(volatile int32_t* _ptr, int32_t _sub)
  182. {
  183. #if BX_COMPILER_MSVC
  184. return atomicFetchAndAdd(_ptr, -_sub) - _sub;
  185. #else
  186. return __sync_sub_and_fetch(_ptr, _sub);
  187. #endif // BX_COMPILER_
  188. }
  189. template<>
  190. inline int64_t atomicSubAndFetch<int64_t>(volatile int64_t* _ptr, int64_t _sub)
  191. {
  192. #if BX_COMPILER_MSVC
  193. return atomicFetchAndAdd(_ptr, -_sub) - _sub;
  194. #else
  195. return __sync_sub_and_fetch(_ptr, _sub);
  196. #endif // BX_COMPILER_
  197. }
  198. template<>
  199. inline uint32_t atomicSubAndFetch<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
  200. {
  201. return uint32_t(atomicSubAndFetch<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
  202. }
  203. template<>
  204. inline uint64_t atomicSubAndFetch<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
  205. {
  206. return uint64_t(atomicSubAndFetch<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
  207. }
  208. /// Returns the resulting incremented value.
  209. template<typename Ty>
  210. inline Ty atomicInc(volatile Ty* _ptr)
  211. {
  212. return atomicAddAndFetch(_ptr, Ty(1) );
  213. }
  214. /// Returns the resulting decremented value.
  215. template<typename Ty>
  216. inline Ty atomicDec(volatile Ty* _ptr)
  217. {
  218. return atomicSubAndFetch(_ptr, Ty(1) );
  219. }
  220. ///
  221. template<>
  222. inline int32_t atomicCompareAndSwap(volatile void* _ptr, int32_t _old, int32_t _new)
  223. {
  224. #if BX_COMPILER_MSVC
  225. return _InterlockedCompareExchange( (volatile LONG*)(_ptr), _new, _old);
  226. #else
  227. return __sync_val_compare_and_swap( (volatile int32_t*)_ptr, _old, _new);
  228. #endif // BX_COMPILER
  229. }
  230. ///
  231. template<>
  232. inline int64_t atomicCompareAndSwap(volatile void* _ptr, int64_t _old, int64_t _new)
  233. {
  234. #if BX_COMPILER_MSVC
  235. return _InterlockedCompareExchange64( (volatile LONG64*)(_ptr), _new, _old);
  236. #else
  237. return __sync_val_compare_and_swap( (volatile int64_t*)_ptr, _old, _new);
  238. #endif // BX_COMPILER
  239. }
  240. ///
  241. inline void* atomicExchangePtr(void** _ptr, void* _new)
  242. {
  243. #if BX_COMPILER_MSVC
  244. return InterlockedExchangePointer(_ptr, _new);
  245. #else
  246. return __sync_lock_test_and_set(_ptr, _new);
  247. #endif // BX_COMPILER
  248. }
  249. ///
  250. inline int32_t atomicTestAndInc(volatile void* _ptr, int32_t _test)
  251. {
  252. int32_t oldVal;
  253. int32_t newVal = *(int32_t volatile*)_ptr;
  254. do
  255. {
  256. oldVal = newVal;
  257. newVal = atomicCompareAndSwap(_ptr, oldVal, newVal >= _test ? _test : newVal+1);
  258. } while (oldVal != newVal);
  259. return oldVal;
  260. }
  261. ///
  262. inline int32_t atomicTestAndDec(volatile void* _ptr, int32_t _test)
  263. {
  264. int32_t oldVal;
  265. int32_t newVal = *(int32_t volatile*)_ptr;
  266. do
  267. {
  268. oldVal = newVal;
  269. newVal = atomicCompareAndSwap(_ptr, oldVal, newVal <= _test ? _test : newVal-1);
  270. } while (oldVal != newVal);
  271. return oldVal;
  272. }
  273. } // namespace bx
  274. #endif // BX_CPU_H_HEADER_GUARD