atomic_gcc_sync.h 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
  2. #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
  3. #define ATOMIC_INIT(...) {__VA_ARGS__}
  4. typedef enum {
  5. atomic_memory_order_relaxed,
  6. atomic_memory_order_acquire,
  7. atomic_memory_order_release,
  8. atomic_memory_order_acq_rel,
  9. atomic_memory_order_seq_cst
  10. } atomic_memory_order_t;
  11. ATOMIC_INLINE void
  12. atomic_fence(atomic_memory_order_t mo) {
  13. /* Easy cases first: no barrier, and full barrier. */
  14. if (mo == atomic_memory_order_relaxed) {
  15. asm volatile("" ::: "memory");
  16. return;
  17. }
  18. if (mo == atomic_memory_order_seq_cst) {
  19. asm volatile("" ::: "memory");
  20. __sync_synchronize();
  21. asm volatile("" ::: "memory");
  22. return;
  23. }
  24. asm volatile("" ::: "memory");
  25. # if defined(__i386__) || defined(__x86_64__)
  26. /* This is implicit on x86. */
  27. # elif defined(__ppc64__)
  28. asm volatile("lwsync");
  29. # elif defined(__ppc__)
  30. asm volatile("sync");
  31. # elif defined(__sparc__) && defined(__arch64__)
  32. if (mo == atomic_memory_order_acquire) {
  33. asm volatile("membar #LoadLoad | #LoadStore");
  34. } else if (mo == atomic_memory_order_release) {
  35. asm volatile("membar #LoadStore | #StoreStore");
  36. } else {
  37. asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
  38. }
  39. # else
  40. __sync_synchronize();
  41. # endif
  42. asm volatile("" ::: "memory");
  43. }
  44. /*
  45. * A correct implementation of seq_cst loads and stores on weakly ordered
  46. * architectures could do either of the following:
  47. * 1. store() is weak-fence -> store -> strong fence, load() is load ->
  48. * strong-fence.
  49. * 2. store() is strong-fence -> store, load() is strong-fence -> load ->
  50. * weak-fence.
  51. * The tricky thing is, load() and store() above can be the load or store
  52. * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
  53. * means going with strategy 2.
  54. * On strongly ordered architectures, the natural strategy is to stick a strong
  55. * fence after seq_cst stores, and have naked loads. So we want the strong
  56. * fences in different places on different architectures.
  57. * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
  58. * accomplish this.
  59. */
  60. ATOMIC_INLINE void
  61. atomic_pre_sc_load_fence() {
  62. # if defined(__i386__) || defined(__x86_64__) || \
  63. (defined(__sparc__) && defined(__arch64__))
  64. atomic_fence(atomic_memory_order_relaxed);
  65. # else
  66. atomic_fence(atomic_memory_order_seq_cst);
  67. # endif
  68. }
  69. ATOMIC_INLINE void
  70. atomic_post_sc_store_fence() {
  71. # if defined(__i386__) || defined(__x86_64__) || \
  72. (defined(__sparc__) && defined(__arch64__))
  73. atomic_fence(atomic_memory_order_seq_cst);
  74. # else
  75. atomic_fence(atomic_memory_order_relaxed);
  76. # endif
  77. }
  78. #define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
  79. /* unused */ lg_size) \
  80. typedef struct { \
  81. type volatile repr; \
  82. } atomic_##short_type##_t; \
  83. \
  84. ATOMIC_INLINE type \
  85. atomic_load_##short_type(const atomic_##short_type##_t *a, \
  86. atomic_memory_order_t mo) { \
  87. if (mo == atomic_memory_order_seq_cst) { \
  88. atomic_pre_sc_load_fence(); \
  89. } \
  90. type result = a->repr; \
  91. if (mo != atomic_memory_order_relaxed) { \
  92. atomic_fence(atomic_memory_order_acquire); \
  93. } \
  94. return result; \
  95. } \
  96. \
  97. ATOMIC_INLINE void \
  98. atomic_store_##short_type(atomic_##short_type##_t *a, \
  99. type val, atomic_memory_order_t mo) { \
  100. if (mo != atomic_memory_order_relaxed) { \
  101. atomic_fence(atomic_memory_order_release); \
  102. } \
  103. a->repr = val; \
  104. if (mo == atomic_memory_order_seq_cst) { \
  105. atomic_post_sc_store_fence(); \
  106. } \
  107. } \
  108. \
  109. ATOMIC_INLINE type \
  110. atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
  111. atomic_memory_order_t mo) { \
  112. /* \
  113. * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
  114. * an atomic exchange builtin. We fake it with a CAS loop. \
  115. */ \
  116. while (true) { \
  117. type old = a->repr; \
  118. if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \
  119. return old; \
  120. } \
  121. } \
  122. } \
  123. \
  124. ATOMIC_INLINE bool \
  125. atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
  126. type *expected, type desired, \
  127. atomic_memory_order_t success_mo, \
  128. atomic_memory_order_t failure_mo) { \
  129. type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
  130. desired); \
  131. if (prev == *expected) { \
  132. return true; \
  133. } else { \
  134. *expected = prev; \
  135. return false; \
  136. } \
  137. } \
  138. ATOMIC_INLINE bool \
  139. atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
  140. type *expected, type desired, \
  141. atomic_memory_order_t success_mo, \
  142. atomic_memory_order_t failure_mo) { \
  143. type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
  144. desired); \
  145. if (prev == *expected) { \
  146. return true; \
  147. } else { \
  148. *expected = prev; \
  149. return false; \
  150. } \
  151. }
  152. #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
  153. /* unused */ lg_size) \
  154. JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
  155. \
  156. ATOMIC_INLINE type \
  157. atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
  158. atomic_memory_order_t mo) { \
  159. return __sync_fetch_and_add(&a->repr, val); \
  160. } \
  161. \
  162. ATOMIC_INLINE type \
  163. atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
  164. atomic_memory_order_t mo) { \
  165. return __sync_fetch_and_sub(&a->repr, val); \
  166. } \
  167. \
  168. ATOMIC_INLINE type \
  169. atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
  170. atomic_memory_order_t mo) { \
  171. return __sync_fetch_and_and(&a->repr, val); \
  172. } \
  173. \
  174. ATOMIC_INLINE type \
  175. atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
  176. atomic_memory_order_t mo) { \
  177. return __sync_fetch_and_or(&a->repr, val); \
  178. } \
  179. \
  180. ATOMIC_INLINE type \
  181. atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
  182. atomic_memory_order_t mo) { \
  183. return __sync_fetch_and_xor(&a->repr, val); \
  184. }
  185. #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */