cpu.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. * Copyright 2010-2016 Branimir Karadzic. All rights reserved.
  3. * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
  4. */
  5. #ifndef BX_CPU_H_HEADER_GUARD
  6. #define BX_CPU_H_HEADER_GUARD
  7. #include "bx.h"
  8. #if BX_COMPILER_MSVC
  9. # if BX_PLATFORM_XBOX360
  10. # include <ppcintrinsics.h>
  11. # include <xtl.h>
  12. # else
  13. # include <math.h> // math.h is included because VS bitches:
  14. // warning C4985: 'ceil': attributes not present on previous declaration.
  15. // must be included before intrin.h.
  16. # include <intrin.h>
  17. # include <windows.h>
  18. # endif // !BX_PLATFORM_XBOX360
  19. # if BX_PLATFORM_WINRT
  20. # define _InterlockedExchangeAdd64 InterlockedExchangeAdd64
  21. # endif // BX_PLATFORM_WINRT
  22. extern "C" void _ReadBarrier();
  23. extern "C" void _WriteBarrier();
  24. extern "C" void _ReadWriteBarrier();
  25. # pragma intrinsic(_ReadBarrier)
  26. # pragma intrinsic(_WriteBarrier)
  27. # pragma intrinsic(_ReadWriteBarrier)
  28. # pragma intrinsic(_InterlockedExchangeAdd)
  29. # pragma intrinsic(_InterlockedCompareExchange)
  30. #endif // BX_COMPILER_MSVC
  31. namespace bx
  32. {
  33. ///
  34. inline void readBarrier()
  35. {
  36. #if BX_COMPILER_MSVC
  37. _ReadBarrier();
  38. #else
  39. asm volatile("":::"memory");
  40. #endif // BX_COMPILER
  41. }
  42. ///
  43. inline void writeBarrier()
  44. {
  45. #if BX_COMPILER_MSVC
  46. _WriteBarrier();
  47. #else
  48. asm volatile("":::"memory");
  49. #endif // BX_COMPILER
  50. }
  51. ///
  52. inline void readWriteBarrier()
  53. {
  54. #if BX_COMPILER_MSVC
  55. _ReadWriteBarrier();
  56. #else
  57. asm volatile("":::"memory");
  58. #endif // BX_COMPILER
  59. }
  60. ///
  61. inline void memoryBarrier()
  62. {
  63. #if BX_PLATFORM_XBOX360
  64. __lwsync();
  65. #elif BX_PLATFORM_WINRT
  66. MemoryBarrier();
  67. #elif BX_COMPILER_MSVC
  68. _mm_mfence();
  69. #else
  70. __sync_synchronize();
  71. // asm volatile("mfence":::"memory");
  72. #endif // BX_COMPILER
  73. }
  74. template<typename Ty>
  75. inline Ty atomicFetchAndAdd(volatile Ty* _ptr, Ty _value);
  76. template<typename Ty>
  77. inline Ty atomicAddAndFetch(volatile Ty* _ptr, Ty _value);
  78. template<typename Ty>
  79. inline Ty atomicFetchAndSub(volatile Ty* _ptr, Ty _value);
  80. template<typename Ty>
  81. inline Ty atomicSubAndFetch(volatile Ty* _ptr, Ty _value);
  82. template<typename Ty>
  83. inline Ty atomicCompareAndSwap(volatile void* _ptr, Ty _old, Ty _new);
  84. template<>
  85. inline int32_t atomicCompareAndSwap(volatile void* _ptr, int32_t _old, int32_t _new);
  86. template<>
  87. inline int64_t atomicCompareAndSwap(volatile void* _ptr, int64_t _old, int64_t _new);
  88. template<>
  89. inline int32_t atomicFetchAndAdd<int32_t>(volatile int32_t* _ptr, int32_t _add)
  90. {
  91. #if BX_COMPILER_MSVC
  92. return _InterlockedExchangeAdd( (volatile long*)_ptr, _add);
  93. #else
  94. return __sync_fetch_and_add(_ptr, _add);
  95. #endif // BX_COMPILER_
  96. }
  97. template<>
  98. inline int64_t atomicFetchAndAdd<int64_t>(volatile int64_t* _ptr, int64_t _add)
  99. {
  100. #if BX_COMPILER_MSVC
  101. # if _WIN32_WINNT >= 0x600
  102. return _InterlockedExchangeAdd64( (volatile int64_t*)_ptr, _add);
  103. # else
  104. int64_t oldVal;
  105. int64_t newVal = *(int64_t volatile*)_ptr;
  106. do
  107. {
  108. oldVal = newVal;
  109. newVal = atomicCompareAndSwap(_ptr, oldVal, newVal + _add);
  110. } while (oldVal != newVal);
  111. return oldVal;
  112. # endif
  113. #else
  114. return __sync_fetch_and_add(_ptr, _add);
  115. #endif // BX_COMPILER_
  116. }
  117. template<>
  118. inline uint32_t atomicFetchAndAdd<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
  119. {
  120. return uint32_t(atomicFetchAndAdd<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
  121. }
  122. template<>
  123. inline uint64_t atomicFetchAndAdd<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
  124. {
  125. return uint64_t(atomicFetchAndAdd<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
  126. }
  127. template<>
  128. inline int32_t atomicAddAndFetch<int32_t>(volatile int32_t* _ptr, int32_t _add)
  129. {
  130. #if BX_COMPILER_MSVC
  131. return atomicFetchAndAdd(_ptr, _add) + _add;
  132. #else
  133. return __sync_add_and_fetch(_ptr, _add);
  134. #endif // BX_COMPILER_
  135. }
  136. template<>
  137. inline int64_t atomicAddAndFetch<int64_t>(volatile int64_t* _ptr, int64_t _add)
  138. {
  139. #if BX_COMPILER_MSVC
  140. return atomicFetchAndAdd(_ptr, _add) + _add;
  141. #else
  142. return __sync_add_and_fetch(_ptr, _add);
  143. #endif // BX_COMPILER_
  144. }
  145. template<>
  146. inline uint32_t atomicAddAndFetch<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
  147. {
  148. return uint32_t(atomicAddAndFetch<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
  149. }
  150. template<>
  151. inline uint64_t atomicAddAndFetch<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
  152. {
  153. return uint64_t(atomicAddAndFetch<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
  154. }
  155. template<>
  156. inline int32_t atomicFetchAndSub<int32_t>(volatile int32_t* _ptr, int32_t _sub)
  157. {
  158. #if BX_COMPILER_MSVC
  159. return atomicFetchAndAdd(_ptr, -_sub);
  160. #else
  161. return __sync_fetch_and_sub(_ptr, _sub);
  162. #endif // BX_COMPILER_
  163. }
  164. template<>
  165. inline int64_t atomicFetchAndSub<int64_t>(volatile int64_t* _ptr, int64_t _sub)
  166. {
  167. #if BX_COMPILER_MSVC
  168. return atomicFetchAndAdd(_ptr, -_sub);
  169. #else
  170. return __sync_fetch_and_sub(_ptr, _sub);
  171. #endif // BX_COMPILER_
  172. }
  173. template<>
  174. inline uint32_t atomicFetchAndSub<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
  175. {
  176. return uint32_t(atomicFetchAndSub<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
  177. }
  178. template<>
  179. inline uint64_t atomicFetchAndSub<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
  180. {
  181. return uint64_t(atomicFetchAndSub<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
  182. }
  183. template<>
  184. inline int32_t atomicSubAndFetch<int32_t>(volatile int32_t* _ptr, int32_t _sub)
  185. {
  186. #if BX_COMPILER_MSVC
  187. return atomicFetchAndAdd(_ptr, -_sub) - _sub;
  188. #else
  189. return __sync_sub_and_fetch(_ptr, _sub);
  190. #endif // BX_COMPILER_
  191. }
  192. template<>
  193. inline int64_t atomicSubAndFetch<int64_t>(volatile int64_t* _ptr, int64_t _sub)
  194. {
  195. #if BX_COMPILER_MSVC
  196. return atomicFetchAndAdd(_ptr, -_sub) - _sub;
  197. #else
  198. return __sync_sub_and_fetch(_ptr, _sub);
  199. #endif // BX_COMPILER_
  200. }
  201. template<>
  202. inline uint32_t atomicSubAndFetch<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
  203. {
  204. return uint32_t(atomicSubAndFetch<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
  205. }
  206. template<>
  207. inline uint64_t atomicSubAndFetch<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
  208. {
  209. return uint64_t(atomicSubAndFetch<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
  210. }
  211. /// Returns the resulting incremented value.
  212. template<typename Ty>
  213. inline Ty atomicInc(volatile Ty* _ptr)
  214. {
  215. return atomicAddAndFetch(_ptr, Ty(1) );
  216. }
  217. /// Returns the resulting decremented value.
  218. template<typename Ty>
  219. inline Ty atomicDec(volatile Ty* _ptr)
  220. {
  221. return atomicSubAndFetch(_ptr, Ty(1) );
  222. }
  223. ///
  224. template<>
  225. inline int32_t atomicCompareAndSwap(volatile void* _ptr, int32_t _old, int32_t _new)
  226. {
  227. #if BX_COMPILER_MSVC
  228. return _InterlockedCompareExchange( (volatile LONG*)(_ptr), _new, _old);
  229. #else
  230. return __sync_val_compare_and_swap( (volatile int32_t*)_ptr, _old, _new);
  231. #endif // BX_COMPILER
  232. }
  233. ///
  234. template<>
  235. inline int64_t atomicCompareAndSwap(volatile void* _ptr, int64_t _old, int64_t _new)
  236. {
  237. #if BX_COMPILER_MSVC
  238. return _InterlockedCompareExchange64( (volatile LONG64*)(_ptr), _new, _old);
  239. #else
  240. return __sync_val_compare_and_swap( (volatile int64_t*)_ptr, _old, _new);
  241. #endif // BX_COMPILER
  242. }
  243. ///
  244. inline void* atomicExchangePtr(void** _ptr, void* _new)
  245. {
  246. #if BX_COMPILER_MSVC
  247. return InterlockedExchangePointer(_ptr, _new);
  248. #else
  249. return __sync_lock_test_and_set(_ptr, _new);
  250. #endif // BX_COMPILER
  251. }
  252. ///
  253. inline int32_t atomicTestAndInc(volatile void* _ptr, int32_t _test)
  254. {
  255. int32_t oldVal;
  256. int32_t newVal = *(int32_t volatile*)_ptr;
  257. do
  258. {
  259. oldVal = newVal;
  260. newVal = atomicCompareAndSwap(_ptr, oldVal, newVal >= _test ? _test : newVal+1);
  261. } while (oldVal != newVal);
  262. return oldVal;
  263. }
  264. ///
  265. inline int32_t atomicTestAndDec(volatile void* _ptr, int32_t _test)
  266. {
  267. int32_t oldVal;
  268. int32_t newVal = *(int32_t volatile*)_ptr;
  269. do
  270. {
  271. oldVal = newVal;
  272. newVal = atomicCompareAndSwap(_ptr, oldVal, newVal <= _test ? _test : newVal-1);
  273. } while (oldVal != newVal);
  274. return oldVal;
  275. }
  276. } // namespace bx
  277. #endif // BX_CPU_H_HEADER_GUARD