arch-ppc.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /*-------------------------------------------------------------------------
  2. *
  3. * arch-ppc.h
  4. * Atomic operations considerations specific to PowerPC
  5. *
  6. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  7. * Portions Copyright (c) 1994, Regents of the University of California
  8. *
  9. * NOTES:
  10. *
  11. * src/include/port/atomics/arch-ppc.h
  12. *
  13. *-------------------------------------------------------------------------
  14. */
  15. #if defined(__GNUC__)
  16. /*
  17. * lwsync orders loads with respect to each other, and similarly with stores.
  18. * But a load can be performed before a subsequent store, so sync must be used
  19. * for a full memory barrier.
  20. */
  21. #define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory")
  22. #define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
  23. #define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
  24. #endif
  25. #define PG_HAVE_ATOMIC_U32_SUPPORT
  26. typedef struct pg_atomic_uint32
  27. {
  28. volatile uint32 value;
  29. } pg_atomic_uint32;
  30. /* 64bit atomics are only supported in 64bit mode */
  31. #if SIZEOF_VOID_P >= 8
  32. #define PG_HAVE_ATOMIC_U64_SUPPORT
  33. typedef struct pg_atomic_uint64
  34. {
  35. volatile uint64 value pg_attribute_aligned(8);
  36. } pg_atomic_uint64;
  37. #endif
  38. /*
  39. * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
  40. * code generation differs at the end. __atomic_compare_exchange_n():
  41. * 100: isync
  42. * 104: mfcr r3
  43. * 108: rlwinm r3,r3,3,31,31
  44. * 10c: bne 120 <.eb+0x10>
  45. * 110: clrldi r3,r3,63
  46. * 114: addi r1,r1,112
  47. * 118: blr
  48. * 11c: nop
  49. * 120: clrldi r3,r3,63
  50. * 124: stw r9,0(r4)
  51. * 128: addi r1,r1,112
  52. * 12c: blr
  53. *
  54. * This:
  55. * f0: isync
  56. * f4: mfcr r9
  57. * f8: rldicl. r3,r9,35,63
  58. * fc: bne 104 <.eb>
  59. * 100: stw r10,0(r4)
  60. * 104: addi r1,r1,112
  61. * 108: blr
  62. *
  63. * This implementation may or may not have materially different performance.
  64. * It's not exploiting the fact that cr0 still holds the relevant comparison
  65. * bits, set during the __asm__. One could fix that by moving more code into
  66. * the __asm__. (That would remove the freedom to eliminate dead stores when
  67. * the caller ignores "expected", but few callers do.)
  68. *
  69. * Recognizing constant "newval" would be superfluous, because there's no
  70. * immediate-operand version of stwcx.
  71. */
  72. #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
  73. static inline bool
  74. pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
  75. uint32 *expected, uint32 newval)
  76. {
  77. uint32 found;
  78. uint32 condition_register;
  79. bool ret;
  80. #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
  81. if (__builtin_constant_p(*expected) &&
  82. (int32) *expected <= PG_INT16_MAX &&
  83. (int32) *expected >= PG_INT16_MIN)
  84. __asm__ __volatile__(
  85. " sync \n"
  86. " lwarx %0,0,%5 \n"
  87. " cmpwi %0,%3 \n"
  88. " bne $+12 \n" /* branch to isync */
  89. " stwcx. %4,0,%5 \n"
  90. " bne $-16 \n" /* branch to lwarx */
  91. " isync \n"
  92. " mfcr %1 \n"
  93. : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
  94. : "i"(*expected), "r"(newval), "r"(&ptr->value)
  95. : "memory", "cc");
  96. else
  97. #endif
  98. __asm__ __volatile__(
  99. " sync \n"
  100. " lwarx %0,0,%5 \n"
  101. " cmpw %0,%3 \n"
  102. " bne $+12 \n" /* branch to isync */
  103. " stwcx. %4,0,%5 \n"
  104. " bne $-16 \n" /* branch to lwarx */
  105. " isync \n"
  106. " mfcr %1 \n"
  107. : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
  108. : "r"(*expected), "r"(newval), "r"(&ptr->value)
  109. : "memory", "cc");
  110. ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
  111. if (!ret)
  112. *expected = found;
  113. return ret;
  114. }
  115. /*
  116. * This mirrors gcc __sync_fetch_and_add().
  117. *
  118. * Like tas(), use constraint "=&b" to avoid allocating r0.
  119. */
  120. #define PG_HAVE_ATOMIC_FETCH_ADD_U32
  121. static inline uint32
  122. pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
  123. {
  124. uint32 _t;
  125. uint32 res;
  126. #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
  127. if (__builtin_constant_p(add_) &&
  128. add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
  129. __asm__ __volatile__(
  130. " sync \n"
  131. " lwarx %1,0,%4 \n"
  132. " addi %0,%1,%3 \n"
  133. " stwcx. %0,0,%4 \n"
  134. " bne $-12 \n" /* branch to lwarx */
  135. " isync \n"
  136. : "=&r"(_t), "=&b"(res), "+m"(ptr->value)
  137. : "i"(add_), "r"(&ptr->value)
  138. : "memory", "cc");
  139. else
  140. #endif
  141. __asm__ __volatile__(
  142. " sync \n"
  143. " lwarx %1,0,%4 \n"
  144. " add %0,%1,%3 \n"
  145. " stwcx. %0,0,%4 \n"
  146. " bne $-12 \n" /* branch to lwarx */
  147. " isync \n"
  148. : "=&r"(_t), "=&r"(res), "+m"(ptr->value)
  149. : "r"(add_), "r"(&ptr->value)
  150. : "memory", "cc");
  151. return res;
  152. }
  153. #ifdef PG_HAVE_ATOMIC_U64_SUPPORT
  154. #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
  155. static inline bool
  156. pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
  157. uint64 *expected, uint64 newval)
  158. {
  159. uint64 found;
  160. uint32 condition_register;
  161. bool ret;
  162. /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
  163. #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
  164. if (__builtin_constant_p(*expected) &&
  165. (int64) *expected <= PG_INT16_MAX &&
  166. (int64) *expected >= PG_INT16_MIN)
  167. __asm__ __volatile__(
  168. " sync \n"
  169. " ldarx %0,0,%5 \n"
  170. " cmpdi %0,%3 \n"
  171. " bne $+12 \n" /* branch to isync */
  172. " stdcx. %4,0,%5 \n"
  173. " bne $-16 \n" /* branch to ldarx */
  174. " isync \n"
  175. " mfcr %1 \n"
  176. : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
  177. : "i"(*expected), "r"(newval), "r"(&ptr->value)
  178. : "memory", "cc");
  179. else
  180. #endif
  181. __asm__ __volatile__(
  182. " sync \n"
  183. " ldarx %0,0,%5 \n"
  184. " cmpd %0,%3 \n"
  185. " bne $+12 \n" /* branch to isync */
  186. " stdcx. %4,0,%5 \n"
  187. " bne $-16 \n" /* branch to ldarx */
  188. " isync \n"
  189. " mfcr %1 \n"
  190. : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
  191. : "r"(*expected), "r"(newval), "r"(&ptr->value)
  192. : "memory", "cc");
  193. ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
  194. if (!ret)
  195. *expected = found;
  196. return ret;
  197. }
  198. #define PG_HAVE_ATOMIC_FETCH_ADD_U64
  199. static inline uint64
  200. pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
  201. {
  202. uint64 _t;
  203. uint64 res;
  204. /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
  205. #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
  206. if (__builtin_constant_p(add_) &&
  207. add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
  208. __asm__ __volatile__(
  209. " sync \n"
  210. " ldarx %1,0,%4 \n"
  211. " addi %0,%1,%3 \n"
  212. " stdcx. %0,0,%4 \n"
  213. " bne $-12 \n" /* branch to ldarx */
  214. " isync \n"
  215. : "=&r"(_t), "=&b"(res), "+m"(ptr->value)
  216. : "i"(add_), "r"(&ptr->value)
  217. : "memory", "cc");
  218. else
  219. #endif
  220. __asm__ __volatile__(
  221. " sync \n"
  222. " ldarx %1,0,%4 \n"
  223. " add %0,%1,%3 \n"
  224. " stdcx. %0,0,%4 \n"
  225. " bne $-12 \n" /* branch to ldarx */
  226. " isync \n"
  227. : "=&r"(_t), "=&r"(res), "+m"(ptr->value)
  228. : "r"(add_), "r"(&ptr->value)
  229. : "memory", "cc");
  230. return res;
  231. }
  232. #endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
  233. /* per architecture manual doubleword accesses have single copy atomicity */
  234. #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY