123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415 |
- /*
- * fast architecture specific locking
- *
- * $Id$
- *
- *
- *
- * Copyright (C) 2001-2003 FhG Fokus
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- /*
- *
- *History:
- *--------
- * 2002-02-05 created by andrei
- * 2003-01-16 added PPC locking code contributed by Dinos Dorkofikis
- * <[email protected]>
- * 2004-09-12 added MIPS locking for ISA>=2 (>r3000) (andrei)
- * 2004-12-16 for now use the same locking code for sparc32 as for sparc64
- * (it will work only if NOSMP is defined) (andrei)
- * 2005-04-27 added alpha locking code (andrei)
- * 2005-05-25 PPC locking code enabled for PPC64; added a lwsync to
- * the tsl part and replaced the sync with a lwsync for the
- * unlock part (andrei)
- * 2006-03-08 mips2 NOSMP (skip sync), optimized x86 & mips clobbers and
- * input/output constraints (andrei)
- * 2006-04-03 optimization: call lock_get memory barrier outside tsl,in the
- * calling function, only if the lock operation succeeded
- * (membar_getlock()) (andrei)
- * added try_lock(); more x86 optimizations, x86 release_lock
- * fix (andrei)
- * 2006-04-04 sparc* optimizations, sparc32 smp support, armv6 no smp support,
- * ppc, mips*, alpha optimizations (andrei)
- * 2006-04-05 ppc fixes (s/stw/stwx/, s/lwz/lwzx), early clobber added
- * where needed (andrei)
- * 2006-11-22 arm early clobber added: according to the swp instruction
- * specification the address register must be != from the other 2
- * (Julien Blache <[email protected]>)
- *
- */
- /*
- * WARNING: the code was not tested on the following architectures:
- * - arm6 (cross-compiles ok, no test)
- * - alpha (cross-compiles ok, no test)
- * - mips64 (cross-compiles ok)
- * - ppc64 (compiles ok)
- * - sparc32 (tested on a sparc64)
- */
- #ifndef fastlock_h
- #define fastlock_h
- #include "sched_yield.h"
- #define SPIN_OPTIMIZE /* if defined optimize spining on the lock:
- try first the lock with non-atomic/non memory locking
- operations, and only if the lock appears to be free
- switch to the more expensive version */
- typedef volatile int fl_lock_t;
- #define init_lock( l ) (l)=0
- /* what membar to use (if any) after taking a lock. This
- * was separated from the lock code to allow better optimizations.
- * e.g.: use the membar_getlock only after getting the lock and don't use
- * it if lock_get fails / when spinning on tsl.
- * There is no corresponding membar_release_lock (because lock_release
- * must always include the needed memory barrier).
- * WARNING: this is intended only for internal fastlock use*/
- #if defined(__CPU_i386) || defined(__CPU_x86_64)
- #define membar_getlock() /* not needed on x86 */
- #elif defined(__CPU_sparc64)
- #ifndef NOSMP
- #define membar_getlock() \
- asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
- /* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
- * since ldstub acts both as a store and as a load */
- #else
- /* no need for a compiler barrier, that is already included in lock_get/tsl*/
- #define membar_getlock() /* not needed if no smp*/
- #endif /* NOSMP */
- #elif defined(__CPU_sparc)
- #define membar_getlock()/* no need for a compiler barrier, already included */
- #elif defined __CPU_arm || defined __CPU_arm6
- #ifndef NOSMP
- #warning smp not supported on arm* (no membars), try compiling with -DNOSMP
- #endif /* NOSMP */
- #define membar_getlock()
- #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
- #ifndef NOSMP
- #define membar_getlock() \
- asm volatile("lwsync \n\t" : : : "memory");
- #else
- #define membar_getlock()
- #endif /* NOSMP */
- #elif defined __CPU_mips2 || defined __CPU_mips64
- #ifndef NOSMP
- #define membar_getlock() \
- asm volatile("sync \n\t" : : : "memory");
- #else
- #define membar_getlock()
- #endif /* NOSMP */
- #elif defined __CPU_mips
- #ifndef NOSMP
- #warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
- #endif
- #define membar_getlock()
- #elif defined __CPU_alpha
- #ifndef NOSMP
- #define membar_getlock() \
- asm volatile("mb \n\t" : : : "memory");
- #else
- #define membar_getlock()
- #endif /* NOSMP */
- #else /* __CPU_xxx */
- #error "unknown architecture"
- #endif
- /*test and set lock, ret !=0 if lock held by someone else, 0 otherwise
- * WARNING: no memory barriers included, if you use this function directly
- * (not recommended) and it gets the lock (ret==0), you should call
- * membar_getlock() after it */
- inline static int tsl(fl_lock_t* lock)
- {
- int val;
- #if defined(__CPU_i386) || defined(__CPU_x86_64)
- #ifdef NOSMP
- asm volatile(
- " xor %0, %0 \n\t"
- " btsl $0, %2 \n\t"
- " setc %b0 \n\t"
- : "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory", "cc"
- );
- #else
- asm volatile(
- #ifdef SPIN_OPTIMIZE
- " cmpb $0, %2 \n\t"
- " mov $1, %0 \n\t"
- " jnz 1f \n\t"
- #else
- " mov $1, %0 \n\t"
- #endif
- " xchgb %2, %b0 \n\t"
- "1: \n\t"
- : "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory"
- #ifdef SPIN_OPTIMIZE
- , "cc"
- #endif
- );
- #endif /*NOSMP*/
- #elif defined(__CPU_sparc64)
- asm volatile(
- #ifdef SPIN_OPTIMIZE
- " ldub [%2], %0 \n\t"
- " brnz,a,pn %0, 1f \n\t"
- " nop \n\t"
- #endif
- " ldstub [%2], %0 \n\t"
- "1: \n\t"
- /* membar_getlock must be called outside this function */
- : "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
- );
- #elif defined(__CPU_sparc)
- asm volatile(
- #ifdef SPIN_OPTIMIZE
- " ldub [%2], %0 \n\t"
- " tst %0 \n\t"
- " bne,a 1f \n\t"
- " nop \n\t"
- #endif
- " ldstub [%2], %0 \n\t"
- "1: \n\t"
- /* membar_getlock must be called outside this function */
- : "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
- #ifdef SPIN_OPTIMIZE
- , "cc"
- #endif
- );
- #elif defined __CPU_arm
- asm volatile(
- "swp %0, %2, [%3] \n\t"
- : "=&r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
- );
- #elif defined __CPU_arm6
- asm volatile(
- " ldrex %0, [%2] \n\t"
- " cmp %0, #0 \n\t"
- " strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */
- /* if %0!=0 => either it was 1 initially or was 0
- * and somebody changed it just before the strexeq (so the
- * lock is taken) => it's safe to return %0 */
- : "=&r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
- );
- #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
- asm volatile(
- "1: \n\t"
- #ifdef SPIN_OPTIMIZE
- " lwzx %0, 0, %2 \n\t"
- " cmpwi %0, 0 \n\t"
- " bne- 2f \n\t" /* predict: not taken */
- #endif
- " lwarx %0, 0, %2\n\t"
- " cmpwi %0, 0\n\t"
- " bne- 2f\n\t"
- " stwcx. %3, 0, %2\n\t"
- " bne- 1b\n\t"
- /* membar_getlock must be called outside this function */
- "2:\n\t"
- : "=&r" (val), "=m"(*lock) : "r"(lock), "r"(1) : "memory", "cc"
- );
- #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
- || defined __CPU_mips64
- long tmp;
-
- asm volatile(
- ".set push \n\t"
- ".set noreorder\n\t"
- ".set mips2 \n\t"
- #ifdef SPIN_OPTIMIZE
- " lw %1, %2 \n\t"
- " bne %1, $0, 2f \n\t"
- " nop \n\t"
- #endif
- "1: ll %1, %2 \n\t"
- " bne %1, $0, 2f \n\t"
- " li %0, 1 \n\t" /* delay slot */
- " sc %0, %2 \n\t"
- " beqz %0, 1b \n\t"
- " nop \n\t"
- "2: \n\t"
- /* membar_getlock must be called outside this function */
- ".set pop\n\t"
- : "=&r" (tmp), "=&r" (val), "=m" (*lock)
- : "m" (*lock)
- : "memory"
- );
- #elif defined __CPU_alpha
- long tmp;
- tmp=0;
- /* lock low bit set to 1 when the lock is hold and to 0 otherwise */
- asm volatile(
- "1: ldl %0, %1 \n\t"
- " blbs %0, 2f \n\t" /* optimization if locked */
- " ldl_l %0, %1 \n\t"
- " blbs %0, 2f \n\t"
- " lda %2, 1 \n\t" /* or: or $31, 1, %2 ??? */
- " stl_c %2, %1 \n\t"
- " beq %2, 3f \n\t" /* back cond. jumps are always predicted to be
- taken => make forward jump */
- /* membar_getlock must be called outside this function */
- "2: \n\t"
- ".subsection 2 \n\t"
- "3: br 1b \n\t"
- ".previous \n\t"
- :"=&r" (val), "=m"(*lock), "=&r"(tmp)
- :"m"(*lock)
- : "memory"
- );
- #else
- #error "unknown architecture"
- #endif
- return val;
- }
- inline static void get_lock(fl_lock_t* lock)
- {
- #ifdef ADAPTIVE_WAIT
- int i=ADAPTIVE_WAIT_LOOPS;
- #endif
-
- while(tsl(lock)){
- #ifdef BUSY_WAIT
- #elif defined ADAPTIVE_WAIT
- if (i>0) i--;
- else sched_yield();
- #else
- sched_yield();
- #endif
- }
- membar_getlock();
- }
- /* like get_lock, but it doesn't wait. If it gets the lock returns 0,
- * <0 otherwise (-1) */
- inline static int try_lock(fl_lock_t* lock)
- {
- if (tsl(lock)){
- return -1;
- }
- membar_getlock();
- return 0;
- }
- inline static void release_lock(fl_lock_t* lock)
- {
- #if defined(__CPU_i386)
- #ifdef NOSMP
- asm volatile(
- " movb $0, %0 \n\t"
- : "=m"(*lock) : : "memory"
- );
- #else /* ! NOSMP */
- int val;
- /* a simple mov $0, (lock) does not force StoreStore ordering on all
- x86 versions and it doesn't seem to force LoadStore either */
- asm volatile(
- " xchgb %b0, %1 \n\t"
- : "=q" (val), "=m" (*lock) : "0" (0) : "memory"
- );
- #endif /* NOSMP */
- #elif defined(__CPU_x86_64)
- asm volatile(
- " movb $0, %0 \n\t" /* on amd64 membar StoreStore | LoadStore is
- implicit (at least on the same mem. type) */
- : "=m"(*lock) : : "memory"
- );
- #elif defined(__CPU_sparc64) || defined(__CPU_sparc)
- asm volatile(
- #ifndef NOSMP
- #ifdef __CPU_sparc64
- "membar #LoadStore | #StoreStore \n\t"
- #else /* __CPU_sparc */
- "stbar \n\t"
- #endif /* __CPU_sparc64 */
- #endif
- "stb %%g0, [%1] \n\t"
- : "=m"(*lock) : "r" (lock) : "memory"
- );
- #elif defined __CPU_arm || defined __CPU_arm6
- #ifndef NOSMP
- #warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
- #endif
- asm volatile(
- " str %1, [%2] \n\r"
- : "=m"(*lock) : "r"(0), "r"(lock) : "memory"
- );
- #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
- asm volatile(
- /* "sync\n\t" lwsync is faster and will work
- * here too
- * [IBM Prgramming Environments Manual, D.4.2.2]
- */
- "lwsync\n\t"
- "stwx %1, 0, %2\n\t"
- : "=m"(*lock) : "r"(0), "r"(lock) : "memory"
- );
- #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
- || defined __CPU_mips64
- asm volatile(
- ".set push \n\t"
- ".set noreorder \n\t"
- ".set mips2 \n\t"
- #ifndef NOSMP
- #ifdef __CPU_mips
- #warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP
- #else
- " sync \n\t"
- #endif
- #endif
- " sw $0, %0 \n\t"
- ".set pop \n\t"
- : "=m" (*lock) : /* no input */ : "memory"
- );
- #elif defined __CPU_alpha
- asm volatile(
- #ifndef NOSMP
- " mb \n\t"
- #endif
- " stl $31, %0 \n\t"
- : "=m"(*lock) :/* no input*/ : "memory" /* because of the mb */
- );
- #else
- #error "unknown architecture"
- #endif
- }
- #endif
|