eathread_atomic_x86-64.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Copyright (c) Electronic Arts Inc. All rights reserved.
  3. ///////////////////////////////////////////////////////////////////////////////
  4. #if defined(EA_PRAGMA_ONCE_SUPPORTED)
  5. #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
  6. #endif
  7. /////////////////////////////////////////////////////////////////////////////
  8. // Defines functionality for threadsafe primitive operations.
  9. /////////////////////////////////////////////////////////////////////////////
  10. #ifndef EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H
  11. #define EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H
  12. #include "EABase/eabase.h"
  13. #include <stddef.h>
  14. #include <eathread/internal/eathread_atomic_standalone.h>
  15. #ifdef EA_COMPILER_MSVC
  16. EA_DISABLE_ALL_VC_WARNINGS()
  17. #include <math.h> // VS2008 has an acknowledged bug that requires math.h (and possibly also string.h) to be #included before intrin.h.
  18. #include <intrin.h>
  19. EA_RESTORE_ALL_VC_WARNINGS()
  20. #endif
  21. EA_DISABLE_VC_WARNING(4146) // unary minus operator applied to unsigned type, result still unsigned
  22. #if defined(EA_PROCESSOR_X86_64)
  23. #define EA_THREAD_ATOMIC_IMPLEMENTED
  24. namespace EA
  25. {
  26. namespace Thread
  27. {
  28. ///
  29. /// Non-member 128-bit Atomics implementation
  30. ///
  31. #if (EA_COMPILER_MSVC >= 1500) // VS2008+
  32. #define EATHREAD_ATOMIC_128_SUPPORTED 1
  33. // Algorithm for implementing an arbitrary atomic modification via AtomicCompareAndSwap:
  34. // int128_t oldValue;
  35. //
  36. // do {
  37. // oldValue = AtomicGetValue(dest);
  38. // newValue = <modification of oldValue>
  39. // } while(!AtomicCompareAndSwap(dest, oldValue, newValue));
  40. // The following function is a wrapper for the Microsoft _InterlockedCompareExchange128 function.
  41. // Early versions of AMD 64-bit hardware do not support 128 bit atomics. To check for hardware support
  42. // for the cmpxchg16b instruction, call the __cpuid intrinsic with InfoType=0x00000001 (standard function 1).
  43. // Bit 13 of CPUInfo[2] (ECX) is 1 if the instruction is supported.
  44. inline bool AtomicSetValueConditionall28(volatile int64_t* dest128, const int64_t* value128, const int64_t* condition128)
  45. {
  46. __int64 conditionCopy[2] = { condition128[0], condition128[1] }; // We make a copy because Microsoft modifies the output, which is inconsistent with the rest of our atomic API.
  47. return _InterlockedCompareExchange128(dest128, value128[1], value128[0], conditionCopy) == 1; // Question: Do we need to reverse the order of value128 if running on big-endian? Microsoft's documentation currently doesn't address this.
  48. }
  49. inline bool AtomicSetValueConditionall28(volatile uint64_t* dest128, const uint64_t* value128, const uint64_t* condition128)
  50. {
  51. __int64 conditionCopy[2] = { (int64_t) condition128[0], (int64_t)condition128[1] }; // We make a copy because Microsoft modifies the output, which is inconsistent with the rest of our atomic API.
  52. return _InterlockedCompareExchange128((volatile int64_t*)dest128, (int64_t)value128[1], (int64_t)value128[0], conditionCopy) == 1; // Question: Do we need to reverse the order of value128 if running on big-endian? Microsoft's documentation currently doesn't address this.
  53. }
  54. #elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
  55. #if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && EA_COMPILER_VERSION >= 4003) // GCC 4.3 or later for 128 bit atomics
  56. #define EATHREAD_ATOMIC_128_SUPPORTED 1
  57. // GCC on x64 implements all of its __sync functions below via the cmpxchg16b instruction,
  58. // usually in the form of a loop.
  59. // Use of 128 bit atomics on GCC requires compiling with the -mcx16 compiler argument.
  60. // See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
  61. inline __int128_t AtomicGetValue(volatile __int128_t* source)
  62. {
  63. return __sync_add_and_fetch(source, __int128_t(0)); // Is there a better way to do an atomic read?
  64. }
  65. inline void AtomicSetValue(volatile __int128_t* dest, __int128_t value)
  66. {
  67. __sync_lock_test_and_set(dest, value);
  68. }
  69. inline __int128_t AtomicIncrement(volatile __int128_t* dest)
  70. {
  71. return __sync_add_and_fetch(dest, __int128_t(1));
  72. }
  73. inline __int128_t AtomicDecrement(volatile __int128_t* dest)
  74. {
  75. return __sync_add_and_fetch(dest, __int128_t(-1));
  76. }
  77. inline __int128_t AtomicAdd(volatile __int128_t* dest, __int128_t value)
  78. {
  79. return __sync_add_and_fetch(dest, value);
  80. }
  81. inline __int128_t AtomicOr(volatile __int128_t* dest, __int128_t value)
  82. {
  83. return __sync_or_and_fetch(dest, value);
  84. }
  85. inline __int128_t AtomicAnd(volatile __int128_t* dest, __int128_t value)
  86. {
  87. return __sync_and_and_fetch(dest, value);
  88. }
  89. inline __int128_t AtomicXor(volatile __int128_t* dest, __int128_t value)
  90. {
  91. return __sync_xor_and_fetch(dest, value);
  92. }
  93. inline __int128_t AtomicSwap(volatile __int128_t* dest, __int128_t value)
  94. {
  95. return __sync_lock_test_and_set(dest, value);
  96. }
  97. inline bool AtomicSetValueConditional(volatile __int128_t* dest, __int128_t value, __int128_t condition)
  98. {
  99. return __sync_bool_compare_and_swap(dest, condition, value);
  100. }
  101. inline bool AtomicSetValueConditional(volatile __uint128_t* dest, __uint128_t value, __uint128_t condition)
  102. {
  103. return __sync_bool_compare_and_swap(dest, condition, value);
  104. }
  105. // The following 64-bit-based 128 bit atomic is provided for compatibility with the Microsoft version.
  106. // GCC supports the native __int128_t data type and thus can support a 128-bit-based 128 bit atomic.
  107. inline bool AtomicSetValueConditionall28(volatile int64_t* dest128, const int64_t* value128, const int64_t* condition128)
  108. {
  109. // Use of this requires compiling with the -mcx16 compiler argument. See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
  110. return __sync_bool_compare_and_swap((volatile __int128_t*)dest128, *(volatile __int128_t*)condition128, *(volatile __int128_t*)value128);
  111. }
  112. inline bool AtomicSetValueConditionall28(volatile uint64_t* dest128, const uint64_t* value128, const uint64_t* condition128)
  113. {
  114. // Use of this requires compiling with the -mcx16 compiler argument. See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
  115. return __sync_bool_compare_and_swap((volatile __uint128_t*)dest128, *(volatile __uint128_t*)condition128, *(volatile __uint128_t*)value128);
  116. }
  117. #endif
  118. #endif
  119. /// class AtomicInt
  120. /// Actual implementation may vary per platform. May require certain alignments, sizes,
  121. /// and declaration specifications per platform.
  122. template <class T>
  123. class AtomicInt
  124. {
  125. public:
  126. typedef AtomicInt<T> ThisType;
  127. typedef T ValueType;
  128. /// AtomicInt
  129. /// Empty constructor. Intentionally leaves mValue in an unspecified state.
  130. /// This is done so that an AtomicInt acts like a standard built-in integer.
  131. AtomicInt()
  132. {}
  133. AtomicInt(ValueType n)
  134. { SetValue(n); }
  135. AtomicInt(const ThisType& x)
  136. : mValue(x.GetValue()) {}
  137. AtomicInt& operator=(const ThisType& x)
  138. { mValue = x.GetValue(); return *this; }
  139. ValueType GetValueRaw() const
  140. { return mValue; }
  141. ValueType GetValue() const;
  142. ValueType SetValue(ValueType n);
  143. bool SetValueConditional(ValueType n, ValueType condition);
  144. ValueType Increment();
  145. ValueType Decrement();
  146. ValueType Add(ValueType n);
  147. // operators
  148. inline operator const ValueType() const { return GetValue(); } // Should this be provided? Is it safe enough? Return value of 'const' attempts to make this safe from misuse.
  149. inline ValueType operator =(ValueType n) { SetValue(n); return n; }
  150. inline ValueType operator+=(ValueType n) { return Add(n);}
  151. inline ValueType operator-=(ValueType n) { return Add(-n);}
  152. inline ValueType operator++() { return Increment();}
  153. inline ValueType operator++(int) { return Increment() - 1;}
  154. inline ValueType operator--() { return Decrement(); }
  155. inline ValueType operator--(int) { return Decrement() + 1;}
  156. protected:
  157. volatile ValueType mValue;
  158. };
  159. #if defined(EA_COMPILER_MSVC)
  160. #pragma intrinsic(_InterlockedExchange)
  161. #pragma intrinsic(_InterlockedExchangeAdd)
  162. #pragma intrinsic(_InterlockedCompareExchange)
  163. #pragma intrinsic(_InterlockedIncrement)
  164. #pragma intrinsic(_InterlockedDecrement)
  165. #pragma intrinsic(_InterlockedExchange64)
  166. #pragma intrinsic(_InterlockedExchangeAdd64)
  167. #pragma intrinsic(_InterlockedCompareExchange64)
  168. #pragma intrinsic(_InterlockedIncrement64)
  169. #pragma intrinsic(_InterlockedDecrement64)
  170. // The following should work under any compiler, including such compilers as GCC under
  171. // WINE or some other Win32 emulation. Win32 InterlockedXXX functions must exist on
  172. // any system that supports the Windows API, be it 32 or 64 bit Windows.
  173. // 32 bit versions
  174. template<> inline
  175. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
  176. { return (ValueType)_InterlockedExchangeAdd((long*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
  177. template<> inline
  178. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
  179. { return (ValueType)_InterlockedExchangeAdd((long*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
  180. template<> inline
  181. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
  182. { return (ValueType)_InterlockedExchange((long*)&mValue, (long)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
  183. template<> inline
  184. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
  185. { return (ValueType)_InterlockedExchange((long*)&mValue, (long)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
  186. template<> inline
  187. bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
  188. { return ((ValueType)_InterlockedCompareExchange((long*)&mValue, (long)n, (long)condition) == condition); }
  189. template<> inline
  190. bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
  191. { return ((ValueType)_InterlockedCompareExchange((long*)&mValue, (long)n, (long)condition) == condition); }
  192. template<> inline
  193. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
  194. { return (ValueType)_InterlockedIncrement((long*)&mValue); }
  195. template<> inline
  196. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
  197. { return (ValueType)_InterlockedIncrement((long*)&mValue); }
  198. template<> inline
  199. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
  200. { return (ValueType)_InterlockedDecrement((long*)&mValue); }
  201. template<> inline
  202. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
  203. { return (ValueType)_InterlockedDecrement((long*)&mValue); }
  204. template<> inline
  205. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
  206. { return ((ValueType)_InterlockedExchangeAdd((long*)&mValue, (long)n) + n); }
  207. template<> inline
  208. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
  209. { return ((ValueType)_InterlockedExchangeAdd((long*)&mValue, (long)n) + n); }
  210. // 64 bit versions
  211. template<> inline
  212. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
  213. { return (ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
  214. template<> inline
  215. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
  216. { return (ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
  217. template<> inline
  218. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
  219. { return (ValueType)_InterlockedExchange64((__int64*)&mValue, (__int64)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
  220. template<> inline
  221. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
  222. { return (ValueType)_InterlockedExchange64((__int64*)&mValue, (__int64)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
  223. template<> inline
  224. bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
  225. { return ((ValueType)_InterlockedCompareExchange64((__int64*)&mValue, (__int64)n, (__int64)condition) == condition); }
  226. template<> inline
  227. bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
  228. { return ((ValueType)_InterlockedCompareExchange64((__int64*)&mValue, (__int64)n, (__int64)condition) == condition); }
  229. template<> inline
  230. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
  231. { return (ValueType)_InterlockedIncrement64((__int64*)&mValue); }
  232. template<> inline
  233. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
  234. { return (ValueType)_InterlockedIncrement64((__int64*)&mValue); }
  235. template<> inline
  236. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
  237. { return (ValueType)_InterlockedDecrement64((__int64*)&mValue); }
  238. template<> inline
  239. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
  240. { return (ValueType)_InterlockedDecrement64((__int64*)&mValue); }
  241. template<> inline
  242. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
  243. { return ((ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, (__int64)n) + n); }
  244. template<> inline
  245. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
  246. { return ((ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, (__int64)n) + n); }
  247. #elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
  248. // Recent versions of GCC have atomic primitives built into the compiler and standard library.
  249. #if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && EA_COMPILER_VERSION >= 4001) // GCC 4.1 or later
  250. template <> inline
  251. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
  252. { return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
  253. template <> inline
  254. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
  255. { return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
  256. template <> inline
  257. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
  258. { __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
  259. template <> inline
  260. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
  261. { __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
  262. template <> inline
  263. bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
  264. { return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
  265. template <> inline
  266. bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
  267. { return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
  268. template <> inline
  269. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
  270. { return __sync_add_and_fetch(&mValue, 1); }
  271. template <> inline
  272. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
  273. { return __sync_add_and_fetch(&mValue, 1); }
  274. template <> inline
  275. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
  276. { return __sync_sub_and_fetch(&mValue, 1); }
  277. template <> inline
  278. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
  279. { return __sync_sub_and_fetch(&mValue, 1); }
  280. template <> inline
  281. AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
  282. { return __sync_add_and_fetch(&mValue, n); }
  283. template <> inline
  284. AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
  285. { return __sync_add_and_fetch(&mValue, n); }
  286. template <> inline
  287. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
  288. { return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
  289. template <> inline
  290. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
  291. { return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
  292. template <> inline
  293. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
  294. { __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
  295. template <> inline
  296. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
  297. { __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
  298. template <> inline
  299. bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
  300. { return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
  301. template <> inline
  302. bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
  303. { return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
  304. template <> inline
  305. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
  306. { return __sync_add_and_fetch(&mValue, 1); }
  307. template <> inline
  308. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
  309. { return __sync_add_and_fetch(&mValue, 1); }
  310. template <> inline
  311. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
  312. { return __sync_sub_and_fetch(&mValue, 1); }
  313. template <> inline
  314. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
  315. { return __sync_sub_and_fetch(&mValue, 1); }
  316. template <> inline
  317. AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
  318. { return __sync_add_and_fetch(&mValue, n); }
  319. template <> inline
  320. AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
  321. { return __sync_add_and_fetch(&mValue, n); }
  322. #endif // GCC 4.1 or later
  323. #endif // GCC
  324. } // namespace Thread
  325. } // namespace EA
  326. #endif // EA_PROCESSOR_X86_64
  327. EA_RESTORE_VC_WARNING()
  328. #endif // EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H