threading.cpp 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108
  1. #if defined(GB_SYSTEM_LINUX)
  2. #include <signal.h>
  3. #if __has_include(<valgrind/helgrind.h>)
  4. #include <valgrind/helgrind.h>
  5. #define HAS_VALGRIND
  6. #endif
  7. #endif
  8. #if defined(GB_SYSTEM_WINDOWS)
  9. #pragma warning(push)
  10. #pragma warning(disable: 4505)
  11. #endif
  12. #if defined(HAS_VALGRIND)
  13. #define ANNOTATE_LOCK_PRE(m, t) VALGRIND_HG_MUTEX_LOCK_PRE(m, t)
  14. #define ANNOTATE_LOCK_POST(m) VALGRIND_HG_MUTEX_LOCK_POST(m)
  15. #define ANNOTATE_UNLOCK_PRE(m) VALGRIND_HG_MUTEX_UNLOCK_PRE(m)
  16. #define ANNOTATE_UNLOCK_POST(m) VALGRIND_HG_MUTEX_UNLOCK_POST(m)
  17. #define ANNOTATE_SEM_WAIT_POST(s) VALGRIND_HG_SEM_WAIT_POST(s)
  18. #define ANNOTATE_SEM_POST_PRE(s) VALGRIND_HG_SEM_POST_PRE(s)
  19. #else
  20. #define ANNOTATE_LOCK_PRE(m, t)
  21. #define ANNOTATE_LOCK_POST(m)
  22. #define ANNOTATE_UNLOCK_PRE(m)
  23. #define ANNOTATE_UNLOCK_POST(m)
  24. #define ANNOTATE_SEM_WAIT_POST(s)
  25. #define ANNOTATE_SEM_POST_PRE(s)
  26. #endif
  27. struct BlockingMutex;
  28. struct RecursiveMutex;
  29. struct RwMutex;
  30. struct Semaphore;
  31. struct Condition;
  32. struct Thread;
  33. struct ThreadPool;
  34. struct Parker;
  35. #define THREAD_PROC(name) isize name(struct Thread *thread)
  36. gb_internal THREAD_PROC(thread_pool_thread_proc);
  37. #define WORKER_TASK_PROC(name) isize name(void *data)
  38. typedef WORKER_TASK_PROC(WorkerTaskProc);
  39. typedef struct WorkerTask {
  40. WorkerTaskProc *do_work;
  41. void *data;
  42. } WorkerTask;
  43. typedef struct TaskRingBuffer {
  44. std::atomic<isize> size;
  45. std::atomic<WorkerTask *> buffer;
  46. } TaskRingBuffer;
  47. typedef struct TaskQueue {
  48. std::atomic<isize> top;
  49. std::atomic<isize> bottom;
  50. std::atomic<TaskRingBuffer *> ring;
  51. } TaskQueue;
  52. struct Thread {
  53. #if defined(GB_SYSTEM_WINDOWS)
  54. void *win32_handle;
  55. #else
  56. pthread_t posix_handle;
  57. #endif
  58. isize idx;
  59. isize stack_size;
  60. struct TaskQueue queue;
  61. struct ThreadPool *pool;
  62. };
  63. typedef std::atomic<i32> Futex;
  64. typedef volatile i32 Footex;
  65. gb_internal void futex_wait(Futex *addr, Footex val);
  66. gb_internal void futex_signal(Futex *addr);
  67. gb_internal void futex_broadcast(Futex *addr);
  68. gb_internal void mutex_lock (BlockingMutex *m);
  69. gb_internal bool mutex_try_lock(BlockingMutex *m);
  70. gb_internal void mutex_unlock (BlockingMutex *m);
  71. gb_internal void mutex_lock (RecursiveMutex *m);
  72. gb_internal bool mutex_try_lock(RecursiveMutex *m);
  73. gb_internal void mutex_unlock (RecursiveMutex *m);
  74. gb_internal void rw_mutex_lock (RwMutex *m);
  75. gb_internal bool rw_mutex_try_lock (RwMutex *m);
  76. gb_internal void rw_mutex_unlock (RwMutex *m);
  77. gb_internal void rw_mutex_shared_lock (RwMutex *m);
  78. gb_internal bool rw_mutex_try_shared_lock(RwMutex *m);
  79. gb_internal void rw_mutex_shared_unlock (RwMutex *m);
  80. gb_internal void semaphore_post(Semaphore *s, i32 count);
  81. gb_internal void semaphore_wait(Semaphore *s);
  82. gb_internal void condition_broadcast(Condition *c);
  83. gb_internal void condition_signal(Condition *c);
  84. gb_internal void condition_wait(Condition *c, BlockingMutex *m);
  85. gb_internal void park(Parker *p);
  86. gb_internal void unpark_one(Parker *p);
  87. gb_internal void unpark_all(Parker *p);
  88. gb_internal u32 thread_current_id(void);
  89. gb_internal void thread_init (ThreadPool *pool, Thread *t, isize idx);
  90. gb_internal void thread_init_and_start (ThreadPool *pool, Thread *t, isize idx);
  91. gb_internal void thread_join_and_destroy(Thread *t);
  92. gb_internal void thread_set_name (Thread *t, char const *name);
  93. gb_internal void yield_thread(void);
  94. gb_internal void yield_process(void);
  95. struct Wait_Signal {
  96. Futex futex;
  97. };
  98. gb_internal void wait_signal_until_available(Wait_Signal *ws) {
  99. if (ws->futex.load() == 0) {
  100. futex_wait(&ws->futex, 0);
  101. }
  102. }
  103. gb_internal void wait_signal_set(Wait_Signal *ws) {
  104. ws->futex.store(1);
  105. futex_broadcast(&ws->futex);
  106. }
  107. struct MutexGuard {
  108. MutexGuard() = delete;
  109. MutexGuard(MutexGuard const &) = delete;
  110. MutexGuard(MutexGuard &&) = delete;
  111. explicit MutexGuard(BlockingMutex *bm) noexcept : bm{bm} {
  112. mutex_lock(this->bm);
  113. }
  114. explicit MutexGuard(RecursiveMutex *rm) noexcept : rm{rm} {
  115. mutex_lock(this->rm);
  116. }
  117. explicit MutexGuard(RwMutex *rwm) noexcept : rwm{rwm} {
  118. rw_mutex_lock(this->rwm);
  119. }
  120. explicit MutexGuard(BlockingMutex &bm) noexcept : bm{&bm} {
  121. mutex_lock(this->bm);
  122. }
  123. explicit MutexGuard(RecursiveMutex &rm) noexcept : rm{&rm} {
  124. mutex_lock(this->rm);
  125. }
  126. explicit MutexGuard(RwMutex &rwm) noexcept : rwm{&rwm} {
  127. rw_mutex_lock(this->rwm);
  128. }
  129. ~MutexGuard() noexcept {
  130. if (this->bm) {
  131. mutex_unlock(this->bm);
  132. } else if (this->rm) {
  133. mutex_unlock(this->rm);
  134. } else if (this->rwm) {
  135. rw_mutex_unlock(this->rwm);
  136. }
  137. }
  138. operator bool() const noexcept { return true; }
  139. BlockingMutex *bm;
  140. RecursiveMutex *rm;
  141. RwMutex *rwm;
  142. };
  143. #define MUTEX_GUARD_BLOCK(m) if (MutexGuard GB_DEFER_3(_mutex_guard_){m})
  144. #define MUTEX_GUARD(m) mutex_lock(m); defer (mutex_unlock(m))
  145. #define RW_MUTEX_GUARD(m) rw_mutex_lock(m); defer (rw_mutex_unlock(m))
  146. struct RecursiveMutex {
  147. Futex owner;
  148. i32 recursion;
  149. };
  150. gb_internal void mutex_lock(RecursiveMutex *m) {
  151. Futex tid;
  152. tid.store(cast(i32)thread_current_id());
  153. for (;;) {
  154. i32 prev_owner = 0;
  155. m->owner.compare_exchange_strong(prev_owner, tid, std::memory_order_acquire, std::memory_order_acquire);
  156. if (prev_owner == 0 || prev_owner == tid) {
  157. m->recursion++;
  158. // inside the lock
  159. return;
  160. }
  161. futex_wait(&m->owner, prev_owner);
  162. }
  163. }
  164. gb_internal bool mutex_try_lock(RecursiveMutex *m) {
  165. Futex tid;
  166. tid.store(cast(i32)thread_current_id());
  167. i32 prev_owner = 0;
  168. m->owner.compare_exchange_strong(prev_owner, tid, std::memory_order_acquire, std::memory_order_acquire);
  169. if (prev_owner == 0 || prev_owner == tid) {
  170. m->recursion++;
  171. // inside the lock
  172. return true;
  173. }
  174. return false;
  175. }
  176. gb_internal void mutex_unlock(RecursiveMutex *m) {
  177. m->recursion--;
  178. if (m->recursion != 0) {
  179. return;
  180. }
  181. m->owner.exchange(0, std::memory_order_release);
  182. futex_signal(&m->owner);
  183. // outside the lock
  184. }
  185. struct Semaphore {
  186. Footex count_;
  187. Futex &count() noexcept {
  188. return *(Futex *)&this->count_;
  189. }
  190. Futex const &count() const noexcept {
  191. return *(Futex *)&this->count_;
  192. }
  193. };
  194. gb_internal void semaphore_post(Semaphore *s, i32 count) {
  195. s->count().fetch_add(count, std::memory_order_release);
  196. if (s->count().load() == 1) {
  197. futex_signal(&s->count());
  198. } else {
  199. futex_broadcast(&s->count());
  200. }
  201. }
  202. gb_internal void semaphore_wait(Semaphore *s) {
  203. for (;;) {
  204. i32 original_count = s->count().load(std::memory_order_relaxed);
  205. while (original_count == 0) {
  206. futex_wait(&s->count(), original_count);
  207. original_count = s->count().load(std::memory_order_relaxed);
  208. }
  209. if (s->count().compare_exchange_strong(original_count, original_count-1, std::memory_order_acquire, std::memory_order_acquire)) {
  210. return;
  211. }
  212. }
  213. }
  214. #if defined(GB_SYSTEM_WINDOWS)
  215. struct BlockingMutex {
  216. SRWLOCK srwlock;
  217. };
  218. gb_internal void mutex_lock(BlockingMutex *m) {
  219. AcquireSRWLockExclusive(&m->srwlock);
  220. }
  221. gb_internal bool mutex_try_lock(BlockingMutex *m) {
  222. return !!TryAcquireSRWLockExclusive(&m->srwlock);
  223. }
  224. gb_internal void mutex_unlock(BlockingMutex *m) {
  225. ReleaseSRWLockExclusive(&m->srwlock);
  226. }
  227. struct Condition {
  228. CONDITION_VARIABLE cond;
  229. };
  230. gb_internal void condition_broadcast(Condition *c) {
  231. WakeAllConditionVariable(&c->cond);
  232. }
  233. gb_internal void condition_signal(Condition *c) {
  234. WakeConditionVariable(&c->cond);
  235. }
  236. gb_internal void condition_wait(Condition *c, BlockingMutex *m) {
  237. SleepConditionVariableSRW(&c->cond, &m->srwlock, INFINITE, 0);
  238. }
  239. struct RwMutex {
  240. SRWLOCK srwlock;
  241. };
  242. gb_internal void rw_mutex_lock(RwMutex *m) {
  243. AcquireSRWLockExclusive(&m->srwlock);
  244. }
  245. gb_internal bool rw_mutex_try_lock(RwMutex *m) {
  246. return !!TryAcquireSRWLockExclusive(&m->srwlock);
  247. }
  248. gb_internal void rw_mutex_unlock(RwMutex *m) {
  249. ReleaseSRWLockExclusive(&m->srwlock);
  250. }
  251. gb_internal void rw_mutex_shared_lock(RwMutex *m) {
  252. AcquireSRWLockShared(&m->srwlock);
  253. }
  254. gb_internal bool rw_mutex_try_shared_lock(RwMutex *m) {
  255. return !!TryAcquireSRWLockShared(&m->srwlock);
  256. }
  257. gb_internal void rw_mutex_shared_unlock(RwMutex *m) {
  258. ReleaseSRWLockShared(&m->srwlock);
  259. }
  260. #else
  261. enum Internal_Mutex_State : i32 {
  262. Internal_Mutex_State_Unlocked = 0,
  263. Internal_Mutex_State_Locked = 1,
  264. Internal_Mutex_State_Waiting = 2,
  265. };
  266. struct BlockingMutex {
  267. #if defined(HAS_VALGRIND)
  268. // BlockingMutex() {
  269. // VALGRIND_HG_MUTEX_INIT_POST(this, 0);
  270. // }
  271. // ~BlockingMutex() {
  272. // VALGRIND_HG_MUTEX_DESTROY_PRE(this);
  273. // }
  274. #endif
  275. i32 state_;
  276. Futex &state() {
  277. return *(Futex *)&this->state_;
  278. }
  279. Futex const &state() const {
  280. return *(Futex const *)&this->state_;
  281. }
  282. };
  283. gb_no_inline gb_internal void mutex_lock_slow(BlockingMutex *m, i32 curr_state) {
  284. i32 new_state = curr_state;
  285. for (i32 spin = 0; spin < 100; spin++) {
  286. i32 state = Internal_Mutex_State_Unlocked;
  287. bool ok = m->state().compare_exchange_weak(state, new_state, std::memory_order_acquire, std::memory_order_consume);
  288. if (ok) {
  289. return;
  290. }
  291. if (state == Internal_Mutex_State_Waiting) {
  292. break;
  293. }
  294. for (i32 i = gb_min(spin+1, 32); i > 0; i--) {
  295. yield_thread();
  296. }
  297. }
  298. // Set just in case 100 iterations did not do it
  299. new_state = Internal_Mutex_State_Waiting;
  300. for (;;) {
  301. if (m->state().exchange(Internal_Mutex_State_Waiting, std::memory_order_acquire) == Internal_Mutex_State_Unlocked) {
  302. return;
  303. }
  304. futex_wait(&m->state(), new_state);
  305. yield_thread();
  306. }
  307. }
  308. gb_internal void mutex_lock(BlockingMutex *m) {
  309. ANNOTATE_LOCK_PRE(m, 0);
  310. i32 v = m->state().exchange(Internal_Mutex_State_Locked, std::memory_order_acquire);
  311. if (v != Internal_Mutex_State_Unlocked) {
  312. mutex_lock_slow(m, v);
  313. }
  314. ANNOTATE_LOCK_POST(m);
  315. }
  316. gb_internal bool mutex_try_lock(BlockingMutex *m) {
  317. ANNOTATE_LOCK_PRE(m, 1);
  318. i32 v = m->state().exchange(Internal_Mutex_State_Locked, std::memory_order_acquire);
  319. if (v == Internal_Mutex_State_Unlocked) {
  320. ANNOTATE_LOCK_POST(m);
  321. return true;
  322. }
  323. return false;
  324. }
  325. gb_no_inline gb_internal void mutex_unlock_slow(BlockingMutex *m) {
  326. futex_signal(&m->state());
  327. }
  328. gb_internal void mutex_unlock(BlockingMutex *m) {
  329. ANNOTATE_UNLOCK_PRE(m);
  330. i32 v = m->state().exchange(Internal_Mutex_State_Unlocked, std::memory_order_release);
  331. switch (v) {
  332. case Internal_Mutex_State_Unlocked:
  333. GB_PANIC("Unreachable");
  334. break;
  335. case Internal_Mutex_State_Locked:
  336. // Okay
  337. break;
  338. case Internal_Mutex_State_Waiting:
  339. mutex_unlock_slow(m);
  340. break;
  341. }
  342. ANNOTATE_UNLOCK_POST(m);
  343. }
  344. struct Condition {
  345. i32 state_;
  346. Futex &state() {
  347. return *(Futex *)&this->state_;
  348. }
  349. Futex const &state() const {
  350. return *(Futex const *)&this->state_;
  351. }
  352. };
  353. gb_internal void condition_broadcast(Condition *c) {
  354. c->state().fetch_add(1, std::memory_order_release);
  355. futex_broadcast(&c->state());
  356. }
  357. gb_internal void condition_signal(Condition *c) {
  358. c->state().fetch_add(1, std::memory_order_release);
  359. futex_signal(&c->state());
  360. }
  361. gb_internal void condition_wait(Condition *c, BlockingMutex *m) {
  362. i32 state = c->state().load(std::memory_order_relaxed);
  363. mutex_unlock(m);
  364. futex_wait(&c->state(), state);
  365. mutex_lock(m);
  366. }
  367. struct RwMutex {
  368. // TODO(bill): make this a proper RW mutex
  369. BlockingMutex mutex;
  370. };
  371. gb_internal void rw_mutex_lock(RwMutex *m) {
  372. mutex_lock(&m->mutex);
  373. }
  374. gb_internal bool rw_mutex_try_lock(RwMutex *m) {
  375. return mutex_try_lock(&m->mutex);
  376. }
  377. gb_internal void rw_mutex_unlock(RwMutex *m) {
  378. mutex_unlock(&m->mutex);
  379. }
  380. gb_internal void rw_mutex_shared_lock(RwMutex *m) {
  381. mutex_lock(&m->mutex);
  382. }
  383. gb_internal bool rw_mutex_try_shared_lock(RwMutex *m) {
  384. return mutex_try_lock(&m->mutex);
  385. }
  386. gb_internal void rw_mutex_shared_unlock(RwMutex *m) {
  387. mutex_unlock(&m->mutex);
  388. }
  389. #endif
  390. struct Parker {
  391. Futex state;
  392. };
  393. enum ParkerState : u32 {
  394. ParkerState_Empty = 0,
  395. ParkerState_Notified = 1,
  396. ParkerState_Parked = UINT32_MAX,
  397. };
  398. gb_internal void park(Parker *p) {
  399. if (p->state.fetch_sub(1, std::memory_order_acquire) == ParkerState_Notified) {
  400. return;
  401. }
  402. for (;;) {
  403. futex_wait(&p->state, ParkerState_Parked);
  404. i32 notified = ParkerState_Empty;
  405. if (p->state.compare_exchange_strong(notified, ParkerState_Empty, std::memory_order_acquire, std::memory_order_acquire)) {
  406. return;
  407. }
  408. }
  409. }
  410. gb_internal void unpark_one(Parker *p) {
  411. if (p->state.exchange(ParkerState_Notified, std::memory_order_release) == ParkerState_Parked) {
  412. futex_signal(&p->state);
  413. }
  414. }
  415. gb_internal void unpark_all(Parker *p) {
  416. if (p->state.exchange(ParkerState_Notified, std::memory_order_release) == ParkerState_Parked) {
  417. futex_broadcast(&p->state);
  418. }
  419. }
  420. gb_internal u32 thread_current_id(void) {
  421. u32 thread_id;
  422. #if defined(GB_SYSTEM_WINDOWS)
  423. #if defined(GB_ARCH_32_BIT) && defined(GB_CPU_X86)
  424. thread_id = (cast(u32 *)__readfsdword(24))[9];
  425. #elif defined(GB_ARCH_64_BIT) && defined(GB_CPU_X86)
  426. thread_id = (cast(u32 *)__readgsqword(48))[18];
  427. #else
  428. thread_id = GetCurrentThreadId();
  429. #endif
  430. #elif defined(GB_SYSTEM_OSX) && defined(GB_ARCH_64_BIT)
  431. thread_id = pthread_mach_thread_np(pthread_self());
  432. #elif defined(GB_ARCH_32_BIT) && defined(GB_CPU_X86)
  433. __asm__("mov %%gs:0x08,%0" : "=r"(thread_id));
  434. #elif defined(GB_ARCH_64_BIT) && defined(GB_CPU_X86)
  435. __asm__("mov %%fs:0x10,%0" : "=r"(thread_id));
  436. #elif defined(GB_SYSTEM_LINUX)
  437. thread_id = gettid();
  438. #elif defined(GB_SYSTEM_HAIKU)
  439. thread_id = find_thread(NULL);
  440. #elif defined(GB_SYSTEM_FREEBSD)
  441. thread_id = pthread_getthreadid_np();
  442. #elif defined(GB_SYSTEM_NETBSD)
  443. thread_id = (u32)_lwp_self();
  444. #else
  445. #error Unsupported architecture for thread_current_id()
  446. #endif
  447. return thread_id;
  448. }
  449. gb_internal gb_inline void yield_thread(void) {
  450. #if defined(GB_SYSTEM_WINDOWS)
  451. _mm_pause();
  452. #elif defined(GB_SYSTEM_OSX)
  453. #if defined(GB_CPU_X86)
  454. __asm__ volatile ("" : : : "memory");
  455. #elif defined(GB_CPU_ARM)
  456. __asm__ volatile ("yield" : : : "memory");
  457. #endif
  458. #elif defined(GB_CPU_X86)
  459. _mm_pause();
  460. #elif defined(GB_CPU_ARM)
  461. __asm__ volatile ("yield" : : : "memory");
  462. #else
  463. #error Unknown architecture
  464. #endif
  465. }
  466. gb_internal gb_inline void yield(void) {
  467. #if defined(GB_SYSTEM_WINDOWS)
  468. YieldProcessor();
  469. #else
  470. sched_yield();
  471. #endif
  472. }
  473. #if defined(GB_SYSTEM_WINDOWS)
  474. gb_internal DWORD __stdcall internal_thread_proc(void *arg) {
  475. Thread *t = cast(Thread *)arg;
  476. thread_pool_thread_proc(t);
  477. return 0;
  478. }
  479. #else
  480. gb_internal void *internal_thread_proc(void *arg) {
  481. #if (GB_SYSTEM_LINUX)
  482. // NOTE: Don't permit any signal delivery to threads on Linux.
  483. sigset_t mask = {};
  484. sigfillset(&mask);
  485. GB_ASSERT_MSG(pthread_sigmask(SIG_BLOCK, &mask, nullptr) == 0, "failed to block signals");
  486. #endif
  487. Thread *t = cast(Thread *)arg;
  488. thread_pool_thread_proc(t);
  489. return NULL;
  490. }
  491. #endif
  492. TaskRingBuffer *taskring_init(isize size) {
  493. TaskRingBuffer *ring = (TaskRingBuffer *)gb_alloc(heap_allocator(), sizeof(TaskRingBuffer));
  494. ring->size = size;
  495. ring->buffer = (WorkerTask *)gb_alloc_array(heap_allocator(), WorkerTask, ring->size);
  496. return ring;
  497. }
  498. void thread_queue_destroy(TaskQueue *q) {
  499. gb_free(heap_allocator(), (*q->ring).buffer);
  500. gb_free(heap_allocator(), q->ring);
  501. }
  502. gb_internal void thread_init(ThreadPool *pool, Thread *t, isize idx) {
  503. gb_zero_item(t);
  504. #if defined(GB_SYSTEM_WINDOWS)
  505. t->win32_handle = INVALID_HANDLE_VALUE;
  506. #else
  507. t->posix_handle = 0;
  508. #endif
  509. // Size must be a power of 2
  510. t->queue.ring = taskring_init(1 << 14);
  511. t->pool = pool;
  512. t->idx = idx;
  513. }
  514. gb_internal void thread_init_and_start(ThreadPool *pool, Thread *t, isize idx) {
  515. thread_init(pool, t, idx);
  516. isize stack_size = 0;
  517. #if defined(GB_SYSTEM_WINDOWS)
  518. t->win32_handle = CreateThread(NULL, stack_size, internal_thread_proc, t, 0, NULL);
  519. GB_ASSERT_MSG(t->win32_handle != NULL, "CreateThread: GetLastError");
  520. #else
  521. {
  522. pthread_attr_t attr;
  523. pthread_attr_init(&attr);
  524. defer (pthread_attr_destroy(&attr));
  525. pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  526. if (stack_size != 0) {
  527. pthread_attr_setstacksize(&attr, stack_size);
  528. }
  529. pthread_create(&t->posix_handle, &attr, internal_thread_proc, t);
  530. }
  531. #endif
  532. }
  533. gb_internal void thread_join_and_destroy(Thread *t) {
  534. #if defined(GB_SYSTEM_WINDOWS)
  535. WaitForSingleObject(t->win32_handle, INFINITE);
  536. CloseHandle(t->win32_handle);
  537. t->win32_handle = INVALID_HANDLE_VALUE;
  538. #else
  539. pthread_join(t->posix_handle, NULL);
  540. t->posix_handle = 0;
  541. #endif
  542. thread_queue_destroy(&t->queue);
  543. }
  544. gb_internal void thread_set_name(Thread *t, char const *name) {
  545. #if defined(GB_COMPILER_MSVC)
  546. #pragma pack(push, 8)
  547. typedef struct {
  548. DWORD type;
  549. char const *name;
  550. DWORD id;
  551. DWORD flags;
  552. } gbprivThreadName;
  553. #pragma pack(pop)
  554. gbprivThreadName tn;
  555. tn.type = 0x1000;
  556. tn.name = name;
  557. tn.id = GetThreadId(cast(HANDLE)t->win32_handle);
  558. tn.flags = 0;
  559. __try {
  560. RaiseException(0x406d1388, 0, gb_size_of(tn)/4, cast(ULONG_PTR *)&tn);
  561. } __except(1 /*EXCEPTION_EXECUTE_HANDLER*/) {
  562. }
  563. #elif defined(GB_SYSTEM_WINDOWS) && !defined(GB_COMPILER_MSVC)
  564. // IMPORTANT TODO(bill): Set thread name for GCC/Clang on windows
  565. return;
  566. #elif defined(GB_SYSTEM_OSX)
  567. // TODO(bill): Test if this works
  568. pthread_setname_np(name);
  569. #elif defined(GB_SYSTEM_FREEBSD) || defined(GB_SYSTEM_OPENBSD)
  570. pthread_set_name_np(t->posix_handle, name);
  571. #elif defined(GB_SYSTEM_NETBSD)
  572. pthread_setname_np(t->posix_handle, "%s", (void*)name);
  573. #else
  574. // TODO(bill): Test if this works
  575. pthread_setname_np(t->posix_handle, name);
  576. #endif
  577. }
  578. #if defined(GB_SYSTEM_LINUX) || defined(GB_SYSTEM_NETBSD)
  579. #include <sys/syscall.h>
  580. #ifdef GB_SYSTEM_LINUX
  581. #include <linux/futex.h>
  582. #else
  583. #include <sys/futex.h>
  584. #define SYS_futex SYS___futex
  585. #endif
  586. gb_internal void futex_signal(Futex *addr) {
  587. int ret = syscall(SYS_futex, addr, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1, NULL, NULL, 0);
  588. if (ret == -1) {
  589. perror("Futex wake");
  590. GB_PANIC("Failed in futex wake!\n");
  591. }
  592. }
  593. gb_internal void futex_broadcast(Futex *addr) {
  594. int ret = syscall(SYS_futex, addr, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, INT32_MAX, NULL, NULL, 0);
  595. if (ret == -1) {
  596. perror("Futex wake");
  597. GB_PANIC("Failed in futex wake!\n");
  598. }
  599. }
  600. gb_internal void futex_wait(Futex *addr, Footex val) {
  601. for (;;) {
  602. int ret = syscall(SYS_futex, addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, val, NULL, NULL, 0);
  603. if (ret == -1) {
  604. if (errno != EAGAIN) {
  605. perror("Futex wait");
  606. GB_PANIC("Failed in futex wait!\n");
  607. } else {
  608. return;
  609. }
  610. } else if (ret == 0) {
  611. if (*addr != val) {
  612. return;
  613. }
  614. }
  615. }
  616. }
  617. #elif defined(GB_SYSTEM_FREEBSD)
  618. #include <sys/types.h>
  619. #include <sys/umtx.h>
  620. gb_internal void futex_signal(Futex *addr) {
  621. _umtx_op(addr, UMTX_OP_WAKE, 1, 0, 0);
  622. }
  623. gb_internal void futex_broadcast(Futex *addr) {
  624. _umtx_op(addr, UMTX_OP_WAKE, INT32_MAX, 0, 0);
  625. }
  626. gb_internal void futex_wait(Futex *addr, Footex val) {
  627. for (;;) {
  628. int ret = _umtx_op(addr, UMTX_OP_WAIT_UINT, val, 0, NULL);
  629. if (ret == -1) {
  630. if (errno == ETIMEDOUT || errno == EINTR) {
  631. continue;
  632. }
  633. perror("Futex wait");
  634. GB_PANIC("Failed in futex wait!\n");
  635. } else if (ret == 0) {
  636. if (*addr != val) {
  637. return;
  638. }
  639. }
  640. }
  641. }
  642. #elif defined(GB_SYSTEM_OPENBSD)
  643. #include <sys/futex.h>
  644. gb_internal void futex_signal(Futex *f) {
  645. for (;;) {
  646. int ret = futex((volatile uint32_t *)f, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1, NULL, NULL);
  647. if (ret == -1) {
  648. if (errno == ETIMEDOUT || errno == EINTR) {
  649. continue;
  650. }
  651. perror("Futex wake");
  652. GB_PANIC("futex wake fail");
  653. } else if (ret == 1) {
  654. return;
  655. }
  656. }
  657. }
  658. gb_internal void futex_broadcast(Futex *f) {
  659. for (;;) {
  660. int ret = futex((volatile uint32_t *)f, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, INT32_MAX, NULL, NULL);
  661. if (ret == -1) {
  662. if (errno == ETIMEDOUT || errno == EINTR) {
  663. continue;
  664. }
  665. perror("Futex wake");
  666. GB_PANIC("futex wake fail");
  667. } else if (ret == 1) {
  668. return;
  669. }
  670. }
  671. }
  672. gb_internal void futex_wait(Futex *f, Footex val) {
  673. for (;;) {
  674. int ret = futex((volatile uint32_t *)f, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, val, NULL, NULL);
  675. if (ret == -1) {
  676. if (*f != val) {
  677. return;
  678. }
  679. if (errno == ETIMEDOUT || errno == EINTR) {
  680. continue;
  681. }
  682. perror("Futex wait");
  683. GB_PANIC("Failed in futex wait!\n");
  684. }
  685. }
  686. }
  687. #elif defined(GB_SYSTEM_OSX)
  688. #if __has_include(<os/os_sync_wait_on_address.h>)
  689. #define DARWIN_WAIT_ON_ADDRESS_AVAILABLE
  690. #include <os/os_sync_wait_on_address.h>
  691. #endif
  692. #define UL_COMPARE_AND_WAIT 0x00000001
  693. #define ULF_NO_ERRNO 0x01000000
  694. extern "C" int __ulock_wait(uint32_t operation, void *addr, uint64_t value, uint32_t timeout); /* timeout is specified in microseconds */
  695. extern "C" int __ulock_wake(uint32_t operation, void *addr, uint64_t wake_value);
  696. gb_internal void futex_signal(Futex *f) {
  697. #ifdef DARWIN_WAIT_ON_ADDRESS_AVAILABLE
  698. if (__builtin_available(macOS 14.4, *)) {
  699. for (;;) {
  700. int ret = os_sync_wake_by_address_any(f, sizeof(Futex), OS_SYNC_WAKE_BY_ADDRESS_NONE);
  701. if (ret >= 0) {
  702. return;
  703. }
  704. if (errno == EINTR || errno == EFAULT) {
  705. continue;
  706. }
  707. if (errno == ENOENT) {
  708. return;
  709. }
  710. GB_PANIC("Failed in futex wake %d %d!\n", ret, errno);
  711. }
  712. } else {
  713. #endif
  714. for (;;) {
  715. int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, 0);
  716. if (ret >= 0) {
  717. return;
  718. }
  719. if (ret == -EINTR || ret == -EFAULT) {
  720. continue;
  721. }
  722. if (ret == -ENOENT) {
  723. return;
  724. }
  725. GB_PANIC("Failed in futex wake!\n");
  726. }
  727. #ifdef DARWIN_WAIT_ON_ADDRESS_AVAILABLE
  728. }
  729. #endif
  730. }
  731. gb_internal void futex_broadcast(Futex *f) {
  732. #ifdef DARWIN_WAIT_ON_ADDRESS_AVAILABLE
  733. if (__builtin_available(macOS 14.4, *)) {
  734. for (;;) {
  735. int ret = os_sync_wake_by_address_all(f, sizeof(Footex), OS_SYNC_WAKE_BY_ADDRESS_NONE);
  736. if (ret >= 0) {
  737. return;
  738. }
  739. if (errno == EINTR || errno == EFAULT) {
  740. continue;
  741. }
  742. if (errno == ENOENT) {
  743. return;
  744. }
  745. GB_PANIC("Failed in futext wake %d %d!\n", ret, errno);
  746. }
  747. } else {
  748. #endif
  749. for (;;) {
  750. enum { ULF_WAKE_ALL = 0x00000100 };
  751. int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, f, 0);
  752. if (ret == 0) {
  753. return;
  754. }
  755. if (ret == -EINTR || ret == -EFAULT) {
  756. continue;
  757. }
  758. if (ret == -ENOENT) {
  759. return;
  760. }
  761. GB_PANIC("Failed in futex wake!\n");
  762. }
  763. #ifdef DARWIN_WAIT_ON_ADDRESS_AVAILABLE
  764. }
  765. #endif
  766. }
  767. gb_internal void futex_wait(Futex *f, Footex val) {
  768. #ifdef DARWIN_WAIT_ON_ADDRESS_AVAILABLE
  769. if (__builtin_available(macOS 14.4, *)) {
  770. for (;;) {
  771. int ret = os_sync_wait_on_address(f, cast(uint64_t)(val), sizeof(Footex), OS_SYNC_WAIT_ON_ADDRESS_NONE);
  772. if (ret >= 0) {
  773. if (*f != val) {
  774. return;
  775. }
  776. continue;
  777. }
  778. if (errno == EINTR || errno == EFAULT) {
  779. continue;
  780. }
  781. if (errno == ENOENT) {
  782. return;
  783. }
  784. GB_PANIC("Failed in futex wait %d %d!\n", ret, errno);
  785. }
  786. } else {
  787. #endif
  788. for (;;) {
  789. int ret = __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, val, 0);
  790. if (ret >= 0) {
  791. if (*f != val) {
  792. return;
  793. }
  794. continue;
  795. }
  796. if (ret == -EINTR || ret == -EFAULT) {continue;
  797. ret = -ret;
  798. }
  799. if (ret == -ENOENT) {
  800. return;
  801. }
  802. GB_PANIC("Failed in futex wait!\n");
  803. }
  804. #ifdef DARWIN_WAIT_ON_ADDRESS_AVAILABLE
  805. }
  806. #endif
  807. }
  808. #elif defined(GB_SYSTEM_WINDOWS)
  809. gb_internal void futex_signal(Futex *f) {
  810. WakeByAddressSingle(f);
  811. }
  812. gb_internal void futex_broadcast(Futex *f) {
  813. WakeByAddressAll(f);
  814. }
  815. gb_internal void futex_wait(Futex *f, Footex val) {
  816. do {
  817. WaitOnAddress(f, (void *)&val, sizeof(val), INFINITE);
  818. } while (f->load() == val);
  819. }
  820. #elif defined(GB_SYSTEM_HAIKU)
  821. // Futex implementation taken from https://tavianator.com/2023/futex.html
  822. #include <pthread.h>
  823. #include <atomic>
  824. struct _Spinlock {
  825. std::atomic_flag state;
  826. void init() {
  827. state.clear();
  828. }
  829. void lock() {
  830. while (state.test_and_set(std::memory_order_acquire)) {
  831. #if defined(GB_CPU_X86)
  832. _mm_pause();
  833. #else
  834. (void)0; // spin...
  835. #endif
  836. }
  837. }
  838. void unlock() {
  839. state.clear(std::memory_order_release);
  840. }
  841. };
  842. struct Futex_Waitq;
  843. struct Futex_Waiter {
  844. _Spinlock lock;
  845. pthread_t thread;
  846. Futex *futex;
  847. Futex_Waitq *waitq;
  848. Futex_Waiter *prev, *next;
  849. };
  850. struct Futex_Waitq {
  851. _Spinlock lock;
  852. Futex_Waiter list;
  853. void init() {
  854. auto head = &list;
  855. head->prev = head->next = head;
  856. }
  857. };
  858. // FIXME: This approach may scale badly in the future,
  859. // possible solution - hash map (leads to deadlocks now).
  860. Futex_Waitq g_waitq = {
  861. .lock = ATOMIC_FLAG_INIT,
  862. .list = {
  863. .prev = &g_waitq.list,
  864. .next = &g_waitq.list,
  865. },
  866. };
  867. Futex_Waitq *get_waitq(Futex *f) {
  868. // Future hash map method...
  869. return &g_waitq;
  870. }
  871. void futex_signal(Futex *f) {
  872. auto waitq = get_waitq(f);
  873. waitq->lock.lock();
  874. auto head = &waitq->list;
  875. for (auto waiter = head->next; waiter != head; waiter = waiter->next) {
  876. if (waiter->futex != f) {
  877. continue;
  878. }
  879. waitq->lock.unlock();
  880. pthread_kill(waiter->thread, SIGCONT);
  881. return;
  882. }
  883. waitq->lock.unlock();
  884. }
  885. void futex_broadcast(Futex *f) {
  886. auto waitq = get_waitq(f);
  887. waitq->lock.lock();
  888. auto head = &waitq->list;
  889. for (auto waiter = head->next; waiter != head; waiter = waiter->next) {
  890. if (waiter->futex != f) {
  891. continue;
  892. }
  893. if (waiter->next == head) {
  894. waitq->lock.unlock();
  895. pthread_kill(waiter->thread, SIGCONT);
  896. return;
  897. } else {
  898. pthread_kill(waiter->thread, SIGCONT);
  899. }
  900. }
  901. waitq->lock.unlock();
  902. }
  903. void futex_wait(Futex *f, Footex val) {
  904. Futex_Waiter waiter;
  905. waiter.thread = pthread_self();
  906. waiter.futex = f;
  907. auto waitq = get_waitq(f);
  908. while (waitq->lock.state.test_and_set(std::memory_order_acquire)) {
  909. if (f->load(std::memory_order_relaxed) != val) {
  910. return;
  911. }
  912. #if defined(GB_CPU_X86)
  913. _mm_pause();
  914. #else
  915. (void)0; // spin...
  916. #endif
  917. }
  918. waiter.waitq = waitq;
  919. waiter.lock.init();
  920. waiter.lock.lock();
  921. auto head = &waitq->list;
  922. waiter.prev = head->prev;
  923. waiter.next = head;
  924. waiter.prev->next = &waiter;
  925. waiter.next->prev = &waiter;
  926. waiter.prev->next = &waiter;
  927. waiter.next->prev = &waiter;
  928. sigset_t old_mask, mask;
  929. sigemptyset(&mask);
  930. sigaddset(&mask, SIGCONT);
  931. pthread_sigmask(SIG_BLOCK, &mask, &old_mask);
  932. if (f->load(std::memory_order_relaxed) == val) {
  933. waiter.lock.unlock();
  934. waitq->lock.unlock();
  935. int sig;
  936. sigwait(&mask, &sig);
  937. waitq->lock.lock();
  938. waiter.lock.lock();
  939. while (waitq != waiter.waitq) {
  940. auto req = waiter.waitq;
  941. waiter.lock.unlock();
  942. waitq->lock.unlock();
  943. waitq = req;
  944. waitq->lock.lock();
  945. waiter.lock.lock();
  946. }
  947. }
  948. waiter.prev->next = waiter.next;
  949. waiter.next->prev = waiter.prev;
  950. pthread_sigmask(SIG_SETMASK, &old_mask, NULL);
  951. waiter.lock.unlock();
  952. waitq->lock.unlock();
  953. }
  954. #endif
  955. #if defined(GB_SYSTEM_WINDOWS)
  956. #pragma warning(pop)
  957. #endif