fastlock.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. /*
  2. * fast architecture specific locking
  3. *
  4. * $Id$
  5. *
  6. *
  7. *
  8. * Copyright (C) 2001-2003 FhG Fokus
  9. *
  10. * Permission to use, copy, modify, and distribute this software for any
  11. * purpose with or without fee is hereby granted, provided that the above
  12. * copyright notice and this permission notice appear in all copies.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  15. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  16. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  17. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  18. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  19. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  20. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  21. */
  22. /*
  23. *
  24. *History:
  25. *--------
  26. * 2002-02-05 created by andrei
  27. * 2003-01-16 added PPC locking code contributed by Dinos Dorkofikis
  28. * <[email protected]>
  29. * 2004-09-12 added MIPS locking for ISA>=2 (>r3000) (andrei)
  30. * 2004-12-16 for now use the same locking code for sparc32 as for sparc64
  31. * (it will work only if NOSMP is defined) (andrei)
  32. * 2005-04-27 added alpha locking code (andrei)
  33. * 2005-05-25 PPC locking code enabled for PPC64; added a lwsync to
  34. * the tsl part and replaced the sync with a lwsync for the
  35. * unlock part (andrei)
  36. * 2006-03-08 mips2 NOSMP (skip sync), optimized x86 & mips clobbers and
  37. * input/output constraints (andrei)
  38. * 2006-04-03 optimization: call lock_get memory barrier outside tsl,in the
  39. * calling function, only if the lock operation succeeded
  40. * (membar_getlock()) (andrei)
  41. * added try_lock(); more x86 optimizations, x86 release_lock
  42. * fix (andrei)
  43. * 2006-04-04 sparc* optimizations, sparc32 smp support, armv6 no smp support,
  44. * ppc, mips*, alpha optimizations (andrei)
  45. * 2006-04-05 ppc fixes (s/stw/stwx/, s/lwz/lwzx), early clobber added
  46. * where needed (andrei)
  47. * 2006-11-22 arm early clobber added: according to the swp instruction
  48. * specification the address register must be != from the other 2
  49. * (Julien Blache <[email protected]>)
  50. *
  51. */
  52. /*
  53. * WARNING: the code was not tested on the following architectures:
  54. * - arm6 (cross-compiles ok, no test)
  55. * - alpha (cross-compiles ok, no test)
  56. * - mips64 (cross-compiles ok)
  57. * - ppc64 (compiles ok)
  58. * - sparc32 (tested on a sparc64)
  59. */
  60. #ifndef fastlock_h
  61. #define fastlock_h
  62. #include "sched_yield.h"
  63. #define SPIN_OPTIMIZE /* if defined optimize spining on the lock:
  64. try first the lock with non-atomic/non memory locking
  65. operations, and only if the lock appears to be free
  66. switch to the more expensive version */
  67. typedef volatile int fl_lock_t;
  68. #define init_lock( l ) (l)=0
  69. /* what membar to use (if any) after taking a lock. This
  70. * was separated from the lock code to allow better optimizations.
  71. * e.g.: use the membar_getlock only after getting the lock and don't use
  72. * it if lock_get fails / when spinning on tsl.
  73. * There is no corresponding membar_release_lock (because lock_release
  74. * must always include the needed memory barrier).
  75. * WARNING: this is intended only for internal fastlock use*/
  76. #if defined(__CPU_i386) || defined(__CPU_x86_64)
  77. #define membar_getlock() /* not needed on x86 */
  78. #elif defined(__CPU_sparc64)
  79. #ifndef NOSMP
  80. #define membar_getlock() \
  81. asm volatile ("membar #StoreStore | #StoreLoad \n\t" : : : "memory");
  82. /* can be either StoreStore|StoreLoad or LoadStore|LoadLoad
  83. * since ldstub acts both as a store and as a load */
  84. #else
  85. /* no need for a compiler barrier, that is already included in lock_get/tsl*/
  86. #define membar_getlock() /* not needed if no smp*/
  87. #endif /* NOSMP */
  88. #elif defined(__CPU_sparc)
  89. #define membar_getlock()/* no need for a compiler barrier, already included */
  90. #elif defined __CPU_arm || defined __CPU_arm6
  91. #ifndef NOSMP
  92. #warning smp not supported on arm* (no membars), try compiling with -DNOSMP
  93. #endif /* NOSMP */
  94. #define membar_getlock()
  95. #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
  96. #ifndef NOSMP
  97. #define membar_getlock() \
  98. asm volatile("lwsync \n\t" : : : "memory");
  99. #else
  100. #define membar_getlock()
  101. #endif /* NOSMP */
  102. #elif defined __CPU_mips2 || defined __CPU_mips64
  103. #ifndef NOSMP
  104. #define membar_getlock() \
  105. asm volatile("sync \n\t" : : : "memory");
  106. #else
  107. #define membar_getlock()
  108. #endif /* NOSMP */
  109. #elif defined __CPU_mips
  110. #ifndef NOSMP
  111. #warning smp not supported on mips1 (no membars), try compiling with -DNOSMP
  112. #endif
  113. #define membar_getlock()
  114. #elif defined __CPU_alpha
  115. #ifndef NOSMP
  116. #define membar_getlock() \
  117. asm volatile("mb \n\t" : : : "memory");
  118. #else
  119. #define membar_getlock()
  120. #endif /* NOSMP */
  121. #else /* __CPU_xxx */
  122. #error "unknown architecture"
  123. #endif
  124. /*test and set lock, ret !=0 if lock held by someone else, 0 otherwise
  125. * WARNING: no memory barriers included, if you use this function directly
  126. * (not recommended) and it gets the lock (ret==0), you should call
  127. * membar_getlock() after it */
  128. inline static int tsl(fl_lock_t* lock)
  129. {
  130. int val;
  131. #if defined(__CPU_i386) || defined(__CPU_x86_64)
  132. #ifdef NOSMP
  133. asm volatile(
  134. " xor %0, %0 \n\t"
  135. " btsl $0, %2 \n\t"
  136. " setc %b0 \n\t"
  137. : "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory", "cc"
  138. );
  139. #else
  140. asm volatile(
  141. #ifdef SPIN_OPTIMIZE
  142. " cmpb $0, %2 \n\t"
  143. " mov $1, %0 \n\t"
  144. " jnz 1f \n\t"
  145. #else
  146. " mov $1, %0 \n\t"
  147. #endif
  148. " xchgb %2, %b0 \n\t"
  149. "1: \n\t"
  150. : "=&q" (val), "=m" (*lock) : "m"(*lock) : "memory"
  151. #ifdef SPIN_OPTIMIZE
  152. , "cc"
  153. #endif
  154. );
  155. #endif /*NOSMP*/
  156. #elif defined(__CPU_sparc64)
  157. asm volatile(
  158. #ifdef SPIN_OPTIMIZE
  159. " ldub [%2], %0 \n\t"
  160. " brnz,a,pn %0, 1f \n\t"
  161. " nop \n\t"
  162. #endif
  163. " ldstub [%2], %0 \n\t"
  164. "1: \n\t"
  165. /* membar_getlock must be called outside this function */
  166. : "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
  167. );
  168. #elif defined(__CPU_sparc)
  169. asm volatile(
  170. #ifdef SPIN_OPTIMIZE
  171. " ldub [%2], %0 \n\t"
  172. " tst %0 \n\t"
  173. " bne,a 1f \n\t"
  174. " nop \n\t"
  175. #endif
  176. " ldstub [%2], %0 \n\t"
  177. "1: \n\t"
  178. /* membar_getlock must be called outside this function */
  179. : "=&r"(val), "=m"(*lock) : "r"(lock): "memory"
  180. #ifdef SPIN_OPTIMIZE
  181. , "cc"
  182. #endif
  183. );
  184. #elif defined __CPU_arm
  185. asm volatile(
  186. "swp %0, %2, [%3] \n\t"
  187. : "=&r" (val), "=m"(*lock) : "r"(1), "r" (lock) : "memory"
  188. );
  189. #elif defined __CPU_arm6
  190. asm volatile(
  191. " ldrex %0, [%2] \n\t"
  192. " cmp %0, #0 \n\t"
  193. " strexeq %0, %3, [%2] \n\t" /* executed only if Z=1 */
  194. /* if %0!=0 => either it was 1 initially or was 0
  195. * and somebody changed it just before the strexeq (so the
  196. * lock is taken) => it's safe to return %0 */
  197. : "=&r"(val), "=m"(*lock) : "r"(lock), "r"(1) : "cc"
  198. );
  199. #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
  200. asm volatile(
  201. "1: \n\t"
  202. #ifdef SPIN_OPTIMIZE
  203. " lwzx %0, 0, %2 \n\t"
  204. " cmpwi %0, 0 \n\t"
  205. " bne- 2f \n\t" /* predict: not taken */
  206. #endif
  207. " lwarx %0, 0, %2\n\t"
  208. " cmpwi %0, 0\n\t"
  209. " bne- 2f\n\t"
  210. " stwcx. %3, 0, %2\n\t"
  211. " bne- 1b\n\t"
  212. /* membar_getlock must be called outside this function */
  213. "2:\n\t"
  214. : "=&r" (val), "=m"(*lock) : "r"(lock), "r"(1) : "memory", "cc"
  215. );
  216. #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
  217. || defined __CPU_mips64
  218. long tmp;
  219. asm volatile(
  220. ".set push \n\t"
  221. ".set noreorder\n\t"
  222. ".set mips2 \n\t"
  223. #ifdef SPIN_OPTIMIZE
  224. " lw %1, %2 \n\t"
  225. " bne %1, $0, 2f \n\t"
  226. " nop \n\t"
  227. #endif
  228. "1: ll %1, %2 \n\t"
  229. " bne %1, $0, 2f \n\t"
  230. " li %0, 1 \n\t" /* delay slot */
  231. " sc %0, %2 \n\t"
  232. " beqz %0, 1b \n\t"
  233. " nop \n\t"
  234. "2: \n\t"
  235. /* membar_getlock must be called outside this function */
  236. ".set pop\n\t"
  237. : "=&r" (tmp), "=&r" (val), "=m" (*lock)
  238. : "m" (*lock)
  239. : "memory"
  240. );
  241. #elif defined __CPU_alpha
  242. long tmp;
  243. tmp=0;
  244. /* lock low bit set to 1 when the lock is hold and to 0 otherwise */
  245. asm volatile(
  246. "1: ldl %0, %1 \n\t"
  247. " blbs %0, 2f \n\t" /* optimization if locked */
  248. " ldl_l %0, %1 \n\t"
  249. " blbs %0, 2f \n\t"
  250. " lda %2, 1 \n\t" /* or: or $31, 1, %2 ??? */
  251. " stl_c %2, %1 \n\t"
  252. " beq %2, 3f \n\t" /* back cond. jumps are always predicted to be
  253. taken => make forward jump */
  254. /* membar_getlock must be called outside this function */
  255. "2: \n\t"
  256. ".subsection 2 \n\t"
  257. "3: br 1b \n\t"
  258. ".previous \n\t"
  259. :"=&r" (val), "=m"(*lock), "=&r"(tmp)
  260. :"m"(*lock)
  261. : "memory"
  262. );
  263. #else
  264. #error "unknown architecture"
  265. #endif
  266. return val;
  267. }
  268. inline static void get_lock(fl_lock_t* lock)
  269. {
  270. #ifdef ADAPTIVE_WAIT
  271. int i=ADAPTIVE_WAIT_LOOPS;
  272. #endif
  273. while(tsl(lock)){
  274. #ifdef BUSY_WAIT
  275. #elif defined ADAPTIVE_WAIT
  276. if (i>0) i--;
  277. else sched_yield();
  278. #else
  279. sched_yield();
  280. #endif
  281. }
  282. membar_getlock();
  283. }
  284. /* like get_lock, but it doesn't wait. If it gets the lock returns 0,
  285. * <0 otherwise (-1) */
  286. inline static int try_lock(fl_lock_t* lock)
  287. {
  288. if (tsl(lock)){
  289. return -1;
  290. }
  291. membar_getlock();
  292. return 0;
  293. }
  294. inline static void release_lock(fl_lock_t* lock)
  295. {
  296. #if defined(__CPU_i386)
  297. #ifdef NOSMP
  298. asm volatile(
  299. " movb $0, %0 \n\t"
  300. : "=m"(*lock) : : "memory"
  301. );
  302. #else /* ! NOSMP */
  303. int val;
  304. /* a simple mov $0, (lock) does not force StoreStore ordering on all
  305. x86 versions and it doesn't seem to force LoadStore either */
  306. asm volatile(
  307. " xchgb %b0, %1 \n\t"
  308. : "=q" (val), "=m" (*lock) : "0" (0) : "memory"
  309. );
  310. #endif /* NOSMP */
  311. #elif defined(__CPU_x86_64)
  312. asm volatile(
  313. " movb $0, %0 \n\t" /* on amd64 membar StoreStore | LoadStore is
  314. implicit (at least on the same mem. type) */
  315. : "=m"(*lock) : : "memory"
  316. );
  317. #elif defined(__CPU_sparc64) || defined(__CPU_sparc)
  318. asm volatile(
  319. #ifndef NOSMP
  320. #ifdef __CPU_sparc64
  321. "membar #LoadStore | #StoreStore \n\t"
  322. #else /* __CPU_sparc */
  323. "stbar \n\t"
  324. #endif /* __CPU_sparc64 */
  325. #endif
  326. "stb %%g0, [%1] \n\t"
  327. : "=m"(*lock) : "r" (lock) : "memory"
  328. );
  329. #elif defined __CPU_arm || defined __CPU_arm6
  330. #ifndef NOSMP
  331. #warning arm* smp mode not supported (no membars), try compiling with -DNOSMP
  332. #endif
  333. asm volatile(
  334. " str %1, [%2] \n\r"
  335. : "=m"(*lock) : "r"(0), "r"(lock) : "memory"
  336. );
  337. #elif defined(__CPU_ppc) || defined(__CPU_ppc64)
  338. asm volatile(
  339. /* "sync\n\t" lwsync is faster and will work
  340. * here too
  341. * [IBM Prgramming Environments Manual, D.4.2.2]
  342. */
  343. "lwsync\n\t"
  344. "stwx %1, 0, %2\n\t"
  345. : "=m"(*lock) : "r"(0), "r"(lock) : "memory"
  346. );
  347. #elif defined __CPU_mips2 || ( defined __CPU_mips && defined MIPS_HAS_LLSC ) \
  348. || defined __CPU_mips64
  349. asm volatile(
  350. ".set push \n\t"
  351. ".set noreorder \n\t"
  352. ".set mips2 \n\t"
  353. #ifndef NOSMP
  354. #ifdef __CPU_mips
  355. #warning mips1 smp mode not supported (no membars), try compiling with -DNOSMP
  356. #else
  357. " sync \n\t"
  358. #endif
  359. #endif
  360. " sw $0, %0 \n\t"
  361. ".set pop \n\t"
  362. : "=m" (*lock) : /* no input */ : "memory"
  363. );
  364. #elif defined __CPU_alpha
  365. asm volatile(
  366. #ifndef NOSMP
  367. " mb \n\t"
  368. #endif
  369. " stl $31, %0 \n\t"
  370. : "=m"(*lock) :/* no input*/ : "memory" /* because of the mb */
  371. );
  372. #else
  373. #error "unknown architecture"
  374. #endif
  375. }
  376. #endif