123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- /*-------------------------------------------------------------------------
- *
- * arch-ppc.h
- * Atomic operations considerations specific to PowerPC
- *
- * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * NOTES:
- *
- * src/include/port/atomics/arch-ppc.h
- *
- *-------------------------------------------------------------------------
- */
- #if defined(__GNUC__)
- /*
- * lwsync orders loads with respect to each other, and similarly with stores.
- * But a load can be performed before a subsequent store, so sync must be used
- * for a full memory barrier.
- */
- #define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory")
- #define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
- #define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
- #endif
- #define PG_HAVE_ATOMIC_U32_SUPPORT
- typedef struct pg_atomic_uint32
- {
- volatile uint32 value;
- } pg_atomic_uint32;
- /* 64bit atomics are only supported in 64bit mode */
- #if SIZEOF_VOID_P >= 8
- #define PG_HAVE_ATOMIC_U64_SUPPORT
- typedef struct pg_atomic_uint64
- {
- volatile uint64 value pg_attribute_aligned(8);
- } pg_atomic_uint64;
- #endif
- /*
- * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
- * code generation differs at the end. __atomic_compare_exchange_n():
- * 100: isync
- * 104: mfcr r3
- * 108: rlwinm r3,r3,3,31,31
- * 10c: bne 120 <.eb+0x10>
- * 110: clrldi r3,r3,63
- * 114: addi r1,r1,112
- * 118: blr
- * 11c: nop
- * 120: clrldi r3,r3,63
- * 124: stw r9,0(r4)
- * 128: addi r1,r1,112
- * 12c: blr
- *
- * This:
- * f0: isync
- * f4: mfcr r9
- * f8: rldicl. r3,r9,35,63
- * fc: bne 104 <.eb>
- * 100: stw r10,0(r4)
- * 104: addi r1,r1,112
- * 108: blr
- *
- * This implementation may or may not have materially different performance.
- * It's not exploiting the fact that cr0 still holds the relevant comparison
- * bits, set during the __asm__. One could fix that by moving more code into
- * the __asm__. (That would remove the freedom to eliminate dead stores when
- * the caller ignores "expected", but few callers do.)
- *
- * Recognizing constant "newval" would be superfluous, because there's no
- * immediate-operand version of stwcx.
- */
- #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
- static inline bool
- pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
- uint32 *expected, uint32 newval)
- {
- uint32 found;
- uint32 condition_register;
- bool ret;
- #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
- if (__builtin_constant_p(*expected) &&
- (int32) *expected <= PG_INT16_MAX &&
- (int32) *expected >= PG_INT16_MIN)
- __asm__ __volatile__(
- " sync \n"
- " lwarx %0,0,%5 \n"
- " cmpwi %0,%3 \n"
- " bne $+12 \n" /* branch to isync */
- " stwcx. %4,0,%5 \n"
- " bne $-16 \n" /* branch to lwarx */
- " isync \n"
- " mfcr %1 \n"
- : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
- : "i"(*expected), "r"(newval), "r"(&ptr->value)
- : "memory", "cc");
- else
- #endif
- __asm__ __volatile__(
- " sync \n"
- " lwarx %0,0,%5 \n"
- " cmpw %0,%3 \n"
- " bne $+12 \n" /* branch to isync */
- " stwcx. %4,0,%5 \n"
- " bne $-16 \n" /* branch to lwarx */
- " isync \n"
- " mfcr %1 \n"
- : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
- : "r"(*expected), "r"(newval), "r"(&ptr->value)
- : "memory", "cc");
- ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
- if (!ret)
- *expected = found;
- return ret;
- }
- /*
- * This mirrors gcc __sync_fetch_and_add().
- *
- * Like tas(), use constraint "=&b" to avoid allocating r0.
- */
- #define PG_HAVE_ATOMIC_FETCH_ADD_U32
- static inline uint32
- pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
- {
- uint32 _t;
- uint32 res;
- #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
- if (__builtin_constant_p(add_) &&
- add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
- __asm__ __volatile__(
- " sync \n"
- " lwarx %1,0,%4 \n"
- " addi %0,%1,%3 \n"
- " stwcx. %0,0,%4 \n"
- " bne $-12 \n" /* branch to lwarx */
- " isync \n"
- : "=&r"(_t), "=&b"(res), "+m"(ptr->value)
- : "i"(add_), "r"(&ptr->value)
- : "memory", "cc");
- else
- #endif
- __asm__ __volatile__(
- " sync \n"
- " lwarx %1,0,%4 \n"
- " add %0,%1,%3 \n"
- " stwcx. %0,0,%4 \n"
- " bne $-12 \n" /* branch to lwarx */
- " isync \n"
- : "=&r"(_t), "=&r"(res), "+m"(ptr->value)
- : "r"(add_), "r"(&ptr->value)
- : "memory", "cc");
- return res;
- }
- #ifdef PG_HAVE_ATOMIC_U64_SUPPORT
- #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
- static inline bool
- pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
- uint64 *expected, uint64 newval)
- {
- uint64 found;
- uint32 condition_register;
- bool ret;
- /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
- #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
- if (__builtin_constant_p(*expected) &&
- (int64) *expected <= PG_INT16_MAX &&
- (int64) *expected >= PG_INT16_MIN)
- __asm__ __volatile__(
- " sync \n"
- " ldarx %0,0,%5 \n"
- " cmpdi %0,%3 \n"
- " bne $+12 \n" /* branch to isync */
- " stdcx. %4,0,%5 \n"
- " bne $-16 \n" /* branch to ldarx */
- " isync \n"
- " mfcr %1 \n"
- : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
- : "i"(*expected), "r"(newval), "r"(&ptr->value)
- : "memory", "cc");
- else
- #endif
- __asm__ __volatile__(
- " sync \n"
- " ldarx %0,0,%5 \n"
- " cmpd %0,%3 \n"
- " bne $+12 \n" /* branch to isync */
- " stdcx. %4,0,%5 \n"
- " bne $-16 \n" /* branch to ldarx */
- " isync \n"
- " mfcr %1 \n"
- : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
- : "r"(*expected), "r"(newval), "r"(&ptr->value)
- : "memory", "cc");
- ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
- if (!ret)
- *expected = found;
- return ret;
- }
- #define PG_HAVE_ATOMIC_FETCH_ADD_U64
- static inline uint64
- pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
- {
- uint64 _t;
- uint64 res;
- /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
- #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
- if (__builtin_constant_p(add_) &&
- add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
- __asm__ __volatile__(
- " sync \n"
- " ldarx %1,0,%4 \n"
- " addi %0,%1,%3 \n"
- " stdcx. %0,0,%4 \n"
- " bne $-12 \n" /* branch to ldarx */
- " isync \n"
- : "=&r"(_t), "=&b"(res), "+m"(ptr->value)
- : "i"(add_), "r"(&ptr->value)
- : "memory", "cc");
- else
- #endif
- __asm__ __volatile__(
- " sync \n"
- " ldarx %1,0,%4 \n"
- " add %0,%1,%3 \n"
- " stdcx. %0,0,%4 \n"
- " bne $-12 \n" /* branch to ldarx */
- " isync \n"
- : "=&r"(_t), "=&r"(res), "+m"(ptr->value)
- : "r"(add_), "r"(&ptr->value)
- : "memory", "cc");
- return res;
- }
- #endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
- /* per architecture manual doubleword accesses have single copy atomicity */
- #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
|