eathread_sync.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Copyright (c) Electronic Arts Inc. All rights reserved.
  3. ///////////////////////////////////////////////////////////////////////////////
  4. /////////////////////////////////////////////////////////////////////////////
  5. // Functionality related to memory and code generation synchronization.
  6. //
  7. // Overview (partially taken from Usenet)
  8. // On all modern hardware, a store instruction does not necessarily result
  9. // in an immediate write to main memory, or even to the (processor specific)
  10. // cache. A store instruction simply places a write request in a request
  11. // queue, and continues. (Future reads in the same processor will check if
  12. // there is a write to the same address in this queue, and fetch it, rather
  13. // than reading from memory. Reads from another processor, however, can't
  14. // see this queue.) Generally, the ordering of requests in this queue is
  15. // not guaranteed, although some hardware offers stricter guarantees.
  16. // Thus, you must do something to ensure that the writes actually occur.
  17. // This is called a write barrier, and generally takes the form of a special
  18. // instruction.
  19. //
  20. // And of course, just because you have written the data to main memory
  21. // doesn't mean that some other processor, executing a different thread,
  22. // doesn't have a stale copy in its cache, and use that for a read. Before
  23. // reading the variables, you need to ensure that the processor has the
  24. // most recent copy in its cache. This is called a read barrier, and
  25. // again, takes the form of a special hardware instruction. A number of
  26. // architectures (e.g. Intel x86-32) still guarantee read consistency --
  27. // all of the processors "listen" on the main memory bus, and if there is
  28. // a write, automatically purge the corresponding data from their cache.
  29. // But not all.
  30. //
  31. // Note that if you are writing data within a operating system-level
  32. // locked mutex, the lock and unlock of the mutex will synchronize memory
  33. // for you, thus eliminating the need for you to execute read and/or write
  34. // barriers. However, mutex locking and its associated thread stalling is
  35. // a potentially inefficient operation when in some cases you could simply
  36. // write the memory from one thread and read it from another without
  37. // using mutexes around the data access. Some systems let you write memory
  38. // from one thread and read it from another (without you using mutexes)
  39. // without using memory barriers, but others (notably SMP) will not let you
  40. // get away with this, even if you put a mutex around the write. In these
  41. // cases you need read/write barriers.
  42. /////////////////////////////////////////////////////////////////////////////
  43. #ifndef EATHREAD_EATHREAD_SYNC_H
  44. #define EATHREAD_EATHREAD_SYNC_H
  45. // Note
  46. // These functions are not placed in a C++ namespace but instead are standalone.
  47. // The reason for this is that these are usually implemented as #defines of
  48. // C or asm code or implemented as compiler intrinsics. We however document
  49. // these functions here as if they are simply functions. The actual platform-
  50. // specific declarations are in the appropriate platform-specific directory.
  51. #include <EABase/eabase.h>
  52. #include <eathread/internal/config.h>
  53. #if !EA_THREADS_AVAILABLE
  54. // Do nothing.
  55. #elif defined(EA_PROCESSOR_X86)
  56. #include <eathread/x86/eathread_sync_x86.h>
  57. #elif defined(EA_PROCESSOR_X86_64)
  58. #include <eathread/x86-64/eathread_sync_x86-64.h>
  59. #elif defined(EA_PROCESSOR_IA64)
  60. #include <eathread/ia64/eathread_sync_ia64.h>
  61. #elif defined(EA_PLATFORM_APPLE)
  62. #include <eathread/apple/eathread_sync_apple.h>
  63. #elif defined(EA_PROCESSOR_ARM)
  64. #include <eathread/arm/eathread_sync_arm.h>
  65. #endif
  66. #if defined(EA_PRAGMA_ONCE_SUPPORTED)
  67. #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
  68. #endif
  69. // EA_THREAD_DO_SPIN
  70. //
  71. // Provides a macro which maps to whatever processor idle functionality the given platform requires.
  72. //
  73. // Example usage:
  74. // EA_THREAD_DO_SPIN();
  75. //
  76. #ifndef EA_THREAD_DO_SPIN
  77. #ifdef EA_THREAD_COOPERATIVE
  78. #define EA_THREAD_DO_SPIN() ThreadSleep()
  79. #else
  80. #define EA_THREAD_DO_SPIN() EAProcessorPause() // We don't check for EA_TARGET_SMP here and instead sleep if not defined because you probably shouldn't be using a spinlock on a pre-emptive system unless it is a multi-processing system.
  81. #endif
  82. #endif
  83. // The above header files would define EA_THREAD_SYNC_IMPLEMENTED.
  84. #if !defined(EA_THREAD_SYNC_IMPLEMENTED)
  85. // Perhaps it should be considered too serious of an error to allow compilation
  86. // to continue. If so, then we should enable the #error below.
  87. // #error EA_THREAD_SYNC_IMPLEMENTED not defined.
  88. /// EAProcessorPause
  89. ///
  90. /// \Declaration
  91. /// void EAProcessorPause();
  92. ///
  93. /// \Description
  94. /// This statement causes the processor to efficiently (as much as possible)
  95. /// execute a no-op (a.k.a nop or noop). These are particularly useful in
  96. /// spin-wait loops. Without a proper pause, some processors suffer severe
  97. /// performance penalties while executing spin-wait loops such as those in
  98. /// simple spin locks. Many processors have specialized pause instructions
  99. /// (e.g. Intel x86 P4 'pause' or 'asm rep nop') that can be taken advantage
  100. /// of here.
  101. ///
  102. /// \Example
  103. /// while (!flag) {
  104. /// EAProcessorPause();
  105. /// }
  106. #define EAProcessorPause()
  107. /// EAReadBarrier
  108. ///
  109. /// \Declaration
  110. /// void EAReadBarrier();
  111. ///
  112. /// \Description
  113. /// A read barrier ensures that neither software nor hardware perform a memory
  114. /// read prior to the read barrier and that recent writes to main memory are
  115. /// immediately seen (and not using stale cached data) by the processor executing
  116. /// the read barrier. This generally does not mean a (performance draining)
  117. /// invalidation of the entire cache but does possibly mean invalidating any cache
  118. /// that refers to main memory which has changed. Thus, there is a performance
  119. /// cost but considering the use of this operation, this is the most efficient
  120. /// way of achieving the effect.
  121. ///
  122. /// \Example
  123. /// The following function will operate fine on some multiprocessing systems but
  124. /// hang (possibly indefinitely) on other multiprocessing systems unless the
  125. /// EAReadBarrier call is present.
  126. ///
  127. /// void ThreadFunction() {
  128. /// extern volatile int gFlag;
  129. /// while(gFlag == 0){ // Wait for separate thread to write to gSomeFlag.
  130. /// EAProcessorPause();
  131. /// EAReadBarrier();
  132. /// // Do memory sharing operations with other threads here.
  133. /// }
  134. /// }
  135. #define EAReadBarrier()
  136. /// EAWriteBarrier
  137. ///
  138. /// \Declaration
  139. /// void EAWriteBarrier();
  140. ///
  141. /// \Description
  142. /// A write barrier ensures that neither software nor hardware delay a memory
  143. /// write operation past the barrier. If you want your memory write committed
  144. /// to main memory immediately, this statement will have that effect. As such,
  145. /// this is something like a flush of the current processor's write cache.
  146. /// Note that flushing memory from a processor's cache to main memory like this
  147. /// doesn't cause a second processor to immediately see the changed values in
  148. /// main memory, as the second processor has a read cache between it and main
  149. /// memory. Thus, a second processor would need to execute a read barrier if it
  150. /// wants to see the updates immediately.
  151. #define EAWriteBarrier()
  152. /// EAReadWriteBarrier
  153. ///
  154. /// Declaration
  155. /// void EAReadWriteBarrier();
  156. ///
  157. /// Description
  158. /// A read/write barrier has the same effect as both a read barrier and a write
  159. /// barrier at once. A read barrier ensures that neither software nor hardware
  160. /// perform a memory read prior to the read barrier, while a write barrier
  161. /// ensures that neither software nor hardware delay a memory write operation
  162. /// past the barrier. A ReadWriteBarrier specifically acts like a WriteBarrier
  163. /// followed by a ReadBarrier, despite the name ReadWriteBarrier being the
  164. /// other way around.
  165. ///
  166. /// EAReadWriteBarrier synchronizes both reads and writes to system memory
  167. /// between processors and their caches on multiprocessor systems, particulary
  168. /// SMP systems. This can be useful to ensure the state of global variables at
  169. /// a particular point in your code for multithreaded applications. Higher level
  170. /// thread synchronization level primitives such as mutexes achieve the same
  171. /// effect (while providing the additional functionality of synchronizing code
  172. /// execution) but at a significantly higher cost.
  173. ///
  174. /// A two-processor SMP system has two processors, each with its own instruction
  175. /// and data caches. If the first processor writes to a memory location and the
  176. /// second processor needs to read from that location, the first procesor's
  177. /// write may still be in its cache and not committed to main memory and the
  178. /// second processor may thus would not see the newly written value. The value
  179. /// will eventually get written from the first cache to main memory, but if you
  180. /// need to ensure that it is written at a particular time, you would use a
  181. /// ReadWrite barrier.
  182. ///
  183. /// This function is similar to the Linux kernel rwb() function and to the
  184. /// Windows kernel KeMemoryBarrier function.
  185. #define EAReadWriteBarrier()
  186. /// EACompilerMemoryBarrier
  187. ///
  188. /// \Declaration
  189. /// void EACompilerMemoryBarrier();
  190. ///
  191. /// \Description
  192. /// Provides a barrier for compiler optimization. The compiler will not make
  193. /// assumptions about locations across an EACompilerMemoryBarrier statement.
  194. /// For example, if a compiler has memory values temporarily cached in
  195. /// registers but you need them to be written to memory, you can execute the
  196. /// EACompilerMemoryBarrier statement. This is somewhat similar in concept to
  197. /// the C volatile keyword except that it applies to all memory the compiler
  198. /// is currently working with and applies its effect only where you specify
  199. /// and not for every usage as with the volatile keyword.
  200. ///
  201. /// Under GCC, this statement is equivalent to the GCC `asm volatile("":::"memory")`
  202. /// statement. Under VC++, this is equivalent to a _ReadWriteBarrier statement
  203. /// (not to be confused with EAReadWriteBarrier above) and equivalent to the Windows
  204. /// kernel function KeMemoryBarrierWithoutFence. This is also known as barrier()
  205. /// undef Linux.
  206. ///
  207. /// EACompilerMemoryBarrier is a compiler-level statement and not a
  208. /// processor-level statement. For processor-level memory barriers,
  209. /// use EAReadBarrier, etc.
  210. ///
  211. /// \Example
  212. /// Without the compiler memory barrier below, an optimizing compiler might
  213. /// never assign 0 to gValue because gValue is reassigned to 1 later and
  214. /// because gValue is not declared volatile.
  215. ///
  216. /// void ThreadFunction() {
  217. /// extern int gValue; // Note that gValue is intentionally not declared volatile,
  218. /// gValue = 0;
  219. /// EACompilerMemoryBarrier();
  220. /// gValue = 1;
  221. /// }
  222. #define EACompilerMemoryBarrier()
  223. #endif // EA_THREAD_SYNC_IMPLEMENTED
  224. #endif // #ifdef EATHREAD_EATHREAD_SYNC_H