atomic_ops_stack.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. /*
  2. * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
  3. * Original Author: Hans Boehm
  4. *
  5. * This file may be redistributed and/or modified under the
  6. * terms of the GNU General Public License as published by the Free Software
  7. * Foundation; either version 2, or (at your option) any later version.
  8. *
  9. * It is distributed in the hope that it will be useful, but WITHOUT ANY
  10. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. * FOR A PARTICULAR PURPOSE. See the GNU General Public License in the
  12. * file doc/COPYING for more details.
  13. */
  14. #if defined(HAVE_CONFIG_H)
  15. # include "config.h"
  16. #endif
  17. #include <string.h>
  18. #include <stdlib.h>
  19. #include <assert.h>
  20. #define AO_REQUIRE_CAS
  21. #include "atomic_ops_stack.h"
  22. #if defined(_MSC_VER) \
  23. || defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__CYGWIN__)
  24. /* AO_pause not defined elsewhere */
  25. /* FIXME: At least AO_spin should be factored out. */
  26. #include <windows.h>
  27. AO_t dummy;
  28. /* Spin for 2**n units. */
  29. static void AO_spin(int n)
  30. {
  31. int i;
  32. AO_T j = AO_load(&dummy);
  33. for (i = 0; i < (2 << n); ++i)
  34. {
  35. j *= 5;
  36. j -= 4;
  37. }
  38. AO_store(&dummy, j);
  39. }
  40. void AO_pause(int n)
  41. {
  42. if (n < 12)
  43. AO_spin(n);
  44. else
  45. {
  46. DWORD msecs;
  47. /* Short async-signal-safe sleep. */
  48. msecs = (n > 18? 100 : (1 << (n - 12)));
  49. Sleep(msecs);
  50. }
  51. }
  52. #else
  53. /* AO_pause is available elsewhere */
  54. extern void AO_pause(int);
  55. #endif
  56. #ifdef AO_USE_ALMOST_LOCK_FREE
  57. /* LIFO linked lists based on compare-and-swap. We need to avoid */
  58. /* the case of a node deleton and reinsertion while I'm deleting */
  59. /* it, since that may cause my CAS to succeed eventhough the next */
  60. /* pointer is now wrong. Our solution is not fully lock-free, but it */
  61. /* is good enough for signal handlers, provided we have a suitably low */
  62. /* bound on the number of recursive signal handler reentries. */
  63. /* A list consists of a first pointer and a blacklist */
  64. /* of pointer values that are currently being removed. No list element */
  65. /* on the blacklist may be inserted. If we would otherwise do so, we */
  66. /* are allowed to insert a variant that differs only in the least */
  67. /* significant, ignored, bits. If the list is full, we wait. */
  68. /* Crucial observation: A particular padded pointer x (i.e. pointer */
  69. /* plus arbitrary low order bits) can never be newly inserted into */
  70. /* a list while it's in the corresponding auxiliary data structure. */
  71. /* The second argument is a pointer to the link field of the element */
  72. /* to be inserted. */
  73. /* Both list headers and link fields contain "perturbed" pointers, i.e. */
  74. /* pointers with extra bits "or"ed into the low order bits. */
  75. void
  76. AO_stack_push_explicit_aux_release(volatile AO_t *list, AO_t *x,
  77. AO_stack_aux *a)
  78. {
  79. int i;
  80. AO_t x_bits = (AO_t)x;
  81. AO_t next;
  82. /* No deletions of x can start here, since x is not currently in the */
  83. /* list. */
  84. retry:
  85. # if AO_BL_SIZE == 2
  86. {
  87. /* Start all loads as close to concurrently as possible. */
  88. AO_t entry1 = AO_load(a -> AO_stack_bl);
  89. AO_t entry2 = AO_load(a -> AO_stack_bl + 1);
  90. if (entry1 == x_bits || entry2 == x_bits)
  91. {
  92. /* Entry is currently being removed. Change it a little. */
  93. ++x_bits;
  94. if ((x_bits & AO_BIT_MASK) == 0)
  95. /* Version count overflowed; */
  96. /* EXTREMELY unlikely, but possible. */
  97. x_bits = (AO_t)x;
  98. goto retry;
  99. }
  100. }
  101. # else
  102. for (i = 0; i < AO_BL_SIZE; ++i)
  103. {
  104. if (AO_load(a -> AO_stack_bl + i) == x_bits)
  105. {
  106. /* Entry is currently being removed. Change it a little. */
  107. ++x_bits;
  108. if ((x_bits & AO_BIT_MASK) == 0)
  109. /* Version count overflowed; */
  110. /* EXTREMELY unlikely, but possible. */
  111. x_bits = (AO_t)x;
  112. goto retry;
  113. }
  114. }
  115. # endif
  116. /* x_bits is not currently being deleted */
  117. do
  118. {
  119. next = AO_load(list);
  120. *x = next;
  121. }
  122. while(!AO_compare_and_swap_release(list, next, x_bits));
  123. }
  124. /*
  125. * I concluded experimentally that checking a value first before
  126. * performing a compare-and-swap is usually beneficial on X86, but
  127. * slows things down appreciably with contention on Itanium.
  128. * ince the Itanium behavior makes more sense to me (more cache line
  129. * movement unless we're mostly reading, but back-off should guard
  130. * against that), we take Itanium as the default. Measurements on
  131. * other multiprocessor architectures would be useful. (On a uniprocessor,
  132. * the initial check is almost certainly a very small loss.) - HB
  133. */
  134. #ifdef __i386__
  135. # define PRECHECK(a) (a) == 0 &&
  136. #else
  137. # define PRECHECK(a)
  138. #endif
  139. AO_t *
  140. AO_stack_pop_explicit_aux_acquire(volatile AO_t *list, AO_stack_aux * a)
  141. {
  142. unsigned i;
  143. int j = 0;
  144. AO_t first;
  145. AO_t * first_ptr;
  146. AO_t next;
  147. retry:
  148. first = AO_load(list);
  149. if (0 == first) return 0;
  150. /* Insert first into aux black list. */
  151. /* This may spin if more than AO_BL_SIZE removals using auxiliary */
  152. /* structure a are currently in progress. */
  153. for (i = 0; ; )
  154. {
  155. if (PRECHECK(a -> AO_stack_bl[i])
  156. AO_compare_and_swap_acquire(a->AO_stack_bl+i, 0, first))
  157. break;
  158. ++i;
  159. if ( i >= AO_BL_SIZE )
  160. {
  161. i = 0;
  162. AO_pause(++j);
  163. }
  164. }
  165. assert(i >= 0 && i < AO_BL_SIZE);
  166. assert(a -> AO_stack_bl[i] == first);
  167. /* First is on the auxiliary black list. It may be removed by */
  168. /* another thread before we get to it, but a new insertion of x */
  169. /* cannot be started here. */
  170. /* Only we can remove it from the black list. */
  171. /* We need to make sure that first is still the first entry on the */
  172. /* list. Otherwise it's possible that a reinsertion of it was */
  173. /* already started before we added the black list entry. */
  174. if (first != AO_load(list)) {
  175. AO_store_release(a->AO_stack_bl+i, 0);
  176. goto retry;
  177. }
  178. first_ptr = AO_REAL_NEXT_PTR(first);
  179. next = AO_load(first_ptr);
  180. if (!AO_compare_and_swap_release(list, first, next)) {
  181. AO_store_release(a->AO_stack_bl+i, 0);
  182. goto retry;
  183. }
  184. assert(*list != first);
  185. /* Since we never insert an entry on the black list, this cannot have */
  186. /* succeeded unless first remained on the list while we were running. */
  187. /* Thus its next link cannot have changed out from under us, and we */
  188. /* removed exactly one entry and preserved the rest of the list. */
  189. /* Note that it is quite possible that an additional entry was */
  190. /* inserted and removed while we were running; this is OK since the */
  191. /* part of the list following first must have remained unchanged, and */
  192. /* first must again have been at the head of the list when the */
  193. /* compare_and_swap succeeded. */
  194. AO_store_release(a->AO_stack_bl+i, 0);
  195. return first_ptr;
  196. }
  197. #else /* ! USE_ALMOST_LOCK_FREE */
  198. /* Better names for fields in AO_stack_t */
  199. #define ptr AO_val2
  200. #define version AO_val1
  201. #if defined(AO_HAVE_compare_double_and_swap_double)
  202. void AO_stack_push_release(AO_stack_t *list, AO_t *element)
  203. {
  204. AO_t next;
  205. do {
  206. next = AO_load(&(list -> ptr));
  207. *element = next;
  208. } while (!AO_compare_and_swap_release
  209. ( &(list -> ptr), next, (AO_t) element));
  210. /* This uses a narrow CAS here, an old optimization suggested */
  211. /* by Treiber. Pop is still safe, since we run into the ABA */
  212. /* problem only if there were both interveining "pop"s and "push"es.*/
  213. /* Inthat case we still see a change inthe version number. */
  214. }
  215. AO_t *AO_stack_pop_acquire(AO_stack_t *list)
  216. {
  217. AO_t *cptr;
  218. AO_t next;
  219. AO_t cversion;
  220. do {
  221. /* Version must be loaded first. */
  222. cversion = AO_load_acquire(&(list -> version));
  223. cptr = (AO_t *)AO_load(&(list -> ptr));
  224. if (cptr == 0) return 0;
  225. next = *cptr;
  226. } while (!AO_compare_double_and_swap_double_release
  227. (list, cversion, (AO_t) cptr, cversion+1, (AO_t) next));
  228. return cptr;
  229. }
  230. #elif defined(AO_HAVE_compare_and_swap_double)
  231. /* Needed for future IA64 processors. No current clients? */
  232. #error Untested! Probably doesnt work.
  233. /* We have a wide CAS, but only does an AO_t-wide comparison. */
  234. /* We can't use the Treiber optimization, since we only check */
  235. /* for an unchanged version number, not an unchanged pointer. */
  236. void AO_stack_push_release(AO_stack_t *list, AO_t *element)
  237. {
  238. AO_t version;
  239. AO_t next_ptr;
  240. do {
  241. /* Again version must be loaded first, for different reason. */
  242. version = AO_load_acquire(&(list -> version));
  243. next_ptr = AO_load(&(list -> ptr));
  244. *element = next_ptr;
  245. } while (!AO_compare_and_swap_double_release(
  246. list, version,
  247. version+1, (AO_t) element));
  248. }
  249. AO_t *AO_stack_pop_acquire(AO_stack_t *list)
  250. {
  251. AO_t *cptr;
  252. AO_t next;
  253. AO_t cversion;
  254. do {
  255. cversion = AO_load_acquire(&(list -> version));
  256. cptr = (AO_t *)AO_load(&(list -> ptr));
  257. if (cptr == 0) return 0;
  258. next = *cptr;
  259. } while (!AO_compare_double_and_swap_double_release
  260. (list, cversion, (AO_t) cptr, cversion+1, next));
  261. return cptr;
  262. }
  263. #endif /* AO_HAVE_compare_and_swap_double */
  264. #endif /* ! USE_ALMOST_LOCK_FREE */