threading.cpp 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. #if defined(GB_SYSTEM_LINUX)
  2. #include <signal.h>
  3. #if __has_include(<valgrind/helgrind.h>)
  4. #include <valgrind/helgrind.h>
  5. #define HAS_VALGRIND
  6. #endif
  7. #endif
  8. #if defined(GB_SYSTEM_WINDOWS)
  9. #pragma warning(push)
  10. #pragma warning(disable: 4505)
  11. #endif
  12. #if defined(HAS_VALGRIND)
  13. #define ANNOTATE_LOCK_PRE(m, t) VALGRIND_HG_MUTEX_LOCK_PRE(m, t)
  14. #define ANNOTATE_LOCK_POST(m) VALGRIND_HG_MUTEX_LOCK_POST(m)
  15. #define ANNOTATE_UNLOCK_PRE(m) VALGRIND_HG_MUTEX_UNLOCK_PRE(m)
  16. #define ANNOTATE_UNLOCK_POST(m) VALGRIND_HG_MUTEX_UNLOCK_POST(m)
  17. #define ANNOTATE_SEM_WAIT_POST(s) VALGRIND_HG_SEM_WAIT_POST(s)
  18. #define ANNOTATE_SEM_POST_PRE(s) VALGRIND_HG_SEM_POST_PRE(s)
  19. #else
  20. #define ANNOTATE_LOCK_PRE(m, t)
  21. #define ANNOTATE_LOCK_POST(m)
  22. #define ANNOTATE_UNLOCK_PRE(m)
  23. #define ANNOTATE_UNLOCK_POST(m)
  24. #define ANNOTATE_SEM_WAIT_POST(s)
  25. #define ANNOTATE_SEM_POST_PRE(s)
  26. #endif
  27. struct BlockingMutex;
  28. struct RecursiveMutex;
  29. struct RwMutex;
  30. struct Semaphore;
  31. struct Condition;
  32. struct Thread;
  33. struct ThreadPool;
  34. struct Parker;
  35. #define THREAD_PROC(name) isize name(struct Thread *thread)
  36. gb_internal THREAD_PROC(thread_pool_thread_proc);
  37. #define WORKER_TASK_PROC(name) isize name(void *data)
  38. typedef WORKER_TASK_PROC(WorkerTaskProc);
  39. typedef struct WorkerTask {
  40. WorkerTaskProc *do_work;
  41. void *data;
  42. } WorkerTask;
  43. struct Thread {
  44. #if defined(GB_SYSTEM_WINDOWS)
  45. void *win32_handle;
  46. #else
  47. pthread_t posix_handle;
  48. #endif
  49. isize idx;
  50. WorkerTask *queue;
  51. size_t capacity;
  52. std::atomic<uint64_t> head_and_tail;
  53. isize stack_size;
  54. struct ThreadPool *pool;
  55. };
  56. typedef std::atomic<i32> Futex;
  57. typedef volatile i32 Footex;
  58. gb_internal void futex_wait(Futex *addr, Footex val);
  59. gb_internal void futex_signal(Futex *addr);
  60. gb_internal void futex_broadcast(Futex *addr);
  61. gb_internal void mutex_lock (BlockingMutex *m);
  62. gb_internal bool mutex_try_lock(BlockingMutex *m);
  63. gb_internal void mutex_unlock (BlockingMutex *m);
  64. gb_internal void mutex_lock (RecursiveMutex *m);
  65. gb_internal bool mutex_try_lock(RecursiveMutex *m);
  66. gb_internal void mutex_unlock (RecursiveMutex *m);
  67. gb_internal void rw_mutex_lock (RwMutex *m);
  68. gb_internal bool rw_mutex_try_lock (RwMutex *m);
  69. gb_internal void rw_mutex_unlock (RwMutex *m);
  70. gb_internal void rw_mutex_shared_lock (RwMutex *m);
  71. gb_internal bool rw_mutex_try_shared_lock(RwMutex *m);
  72. gb_internal void rw_mutex_shared_unlock (RwMutex *m);
  73. gb_internal void semaphore_post(Semaphore *s, i32 count);
  74. gb_internal void semaphore_wait(Semaphore *s);
  75. gb_internal void condition_broadcast(Condition *c);
  76. gb_internal void condition_signal(Condition *c);
  77. gb_internal void condition_wait(Condition *c, BlockingMutex *m);
  78. gb_internal void park(Parker *p);
  79. gb_internal void unpark_one(Parker *p);
  80. gb_internal void unpark_all(Parker *p);
  81. gb_internal u32 thread_current_id(void);
  82. gb_internal void thread_init (ThreadPool *pool, Thread *t, isize idx);
  83. gb_internal void thread_init_and_start (ThreadPool *pool, Thread *t, isize idx);
  84. gb_internal void thread_join_and_destroy(Thread *t);
  85. gb_internal void thread_set_name (Thread *t, char const *name);
  86. gb_internal void yield_thread(void);
  87. gb_internal void yield_process(void);
  88. struct Wait_Signal {
  89. Futex futex;
  90. };
  91. gb_internal void wait_signal_until_available(Wait_Signal *ws) {
  92. if (ws->futex.load() == 0) {
  93. futex_wait(&ws->futex, 1);
  94. }
  95. }
  96. gb_internal void wait_signal_set(Wait_Signal *ws) {
  97. ws->futex.store(1);
  98. futex_broadcast(&ws->futex);
  99. }
  100. struct MutexGuard {
  101. MutexGuard() = delete;
  102. MutexGuard(MutexGuard const &) = delete;
  103. MutexGuard(MutexGuard &&) = delete;
  104. explicit MutexGuard(BlockingMutex *bm) noexcept : bm{bm} {
  105. mutex_lock(this->bm);
  106. }
  107. explicit MutexGuard(RecursiveMutex *rm) noexcept : rm{rm} {
  108. mutex_lock(this->rm);
  109. }
  110. explicit MutexGuard(RwMutex *rwm) noexcept : rwm{rwm} {
  111. rw_mutex_lock(this->rwm);
  112. }
  113. explicit MutexGuard(BlockingMutex &bm) noexcept : bm{&bm} {
  114. mutex_lock(this->bm);
  115. }
  116. explicit MutexGuard(RecursiveMutex &rm) noexcept : rm{&rm} {
  117. mutex_lock(this->rm);
  118. }
  119. explicit MutexGuard(RwMutex &rwm) noexcept : rwm{&rwm} {
  120. rw_mutex_lock(this->rwm);
  121. }
  122. ~MutexGuard() noexcept {
  123. if (this->bm) {
  124. mutex_unlock(this->bm);
  125. } else if (this->rm) {
  126. mutex_unlock(this->rm);
  127. } else if (this->rwm) {
  128. rw_mutex_unlock(this->rwm);
  129. }
  130. }
  131. operator bool() const noexcept { return true; }
  132. BlockingMutex *bm;
  133. RecursiveMutex *rm;
  134. RwMutex *rwm;
  135. };
  136. #define MUTEX_GUARD_BLOCK(m) if (MutexGuard GB_DEFER_3(_mutex_guard_){m})
  137. #define MUTEX_GUARD(m) mutex_lock(m); defer (mutex_unlock(m))
  138. #define RW_MUTEX_GUARD(m) rw_mutex_lock(m); defer (rw_mutex_unlock(m))
  139. struct RecursiveMutex {
  140. Futex owner;
  141. i32 recursion;
  142. };
  143. gb_internal void mutex_lock(RecursiveMutex *m) {
  144. Futex tid;
  145. tid.store(cast(i32)thread_current_id());
  146. for (;;) {
  147. i32 prev_owner = 0;
  148. m->owner.compare_exchange_strong(prev_owner, tid, std::memory_order_acquire, std::memory_order_acquire);
  149. if (prev_owner == 0 || prev_owner == tid) {
  150. m->recursion++;
  151. // inside the lock
  152. return;
  153. }
  154. futex_wait(&m->owner, prev_owner);
  155. }
  156. }
  157. gb_internal bool mutex_try_lock(RecursiveMutex *m) {
  158. Futex tid;
  159. tid.store(cast(i32)thread_current_id());
  160. i32 prev_owner = 0;
  161. m->owner.compare_exchange_strong(prev_owner, tid, std::memory_order_acquire, std::memory_order_acquire);
  162. if (prev_owner == 0 || prev_owner == tid) {
  163. m->recursion++;
  164. // inside the lock
  165. return true;
  166. }
  167. return false;
  168. }
  169. gb_internal void mutex_unlock(RecursiveMutex *m) {
  170. m->recursion--;
  171. if (m->recursion != 0) {
  172. return;
  173. }
  174. m->owner.exchange(0, std::memory_order_release);
  175. futex_signal(&m->owner);
  176. // outside the lock
  177. }
  178. struct Semaphore {
  179. Footex count_;
  180. Futex &count() noexcept {
  181. return *(Futex *)&this->count_;
  182. }
  183. Futex const &count() const noexcept {
  184. return *(Futex *)&this->count_;
  185. }
  186. };
  187. gb_internal void semaphore_post(Semaphore *s, i32 count) {
  188. s->count().fetch_add(count, std::memory_order_release);
  189. if (s->count().load() == 1) {
  190. futex_signal(&s->count());
  191. } else {
  192. futex_broadcast(&s->count());
  193. }
  194. }
  195. gb_internal void semaphore_wait(Semaphore *s) {
  196. for (;;) {
  197. i32 original_count = s->count().load(std::memory_order_relaxed);
  198. while (original_count == 0) {
  199. futex_wait(&s->count(), original_count);
  200. original_count = s->count().load(std::memory_order_relaxed);
  201. }
  202. if (s->count().compare_exchange_strong(original_count, original_count-1, std::memory_order_acquire, std::memory_order_acquire)) {
  203. return;
  204. }
  205. }
  206. }
  207. #if defined(GB_SYSTEM_WINDOWS)
  208. struct BlockingMutex {
  209. SRWLOCK srwlock;
  210. };
  211. gb_internal void mutex_lock(BlockingMutex *m) {
  212. AcquireSRWLockExclusive(&m->srwlock);
  213. }
  214. gb_internal bool mutex_try_lock(BlockingMutex *m) {
  215. return !!TryAcquireSRWLockExclusive(&m->srwlock);
  216. }
  217. gb_internal void mutex_unlock(BlockingMutex *m) {
  218. ReleaseSRWLockExclusive(&m->srwlock);
  219. }
  220. struct Condition {
  221. CONDITION_VARIABLE cond;
  222. };
  223. gb_internal void condition_broadcast(Condition *c) {
  224. WakeAllConditionVariable(&c->cond);
  225. }
  226. gb_internal void condition_signal(Condition *c) {
  227. WakeConditionVariable(&c->cond);
  228. }
  229. gb_internal void condition_wait(Condition *c, BlockingMutex *m) {
  230. SleepConditionVariableSRW(&c->cond, &m->srwlock, INFINITE, 0);
  231. }
  232. struct RwMutex {
  233. SRWLOCK srwlock;
  234. };
  235. gb_internal void rw_mutex_lock(RwMutex *m) {
  236. AcquireSRWLockExclusive(&m->srwlock);
  237. }
  238. gb_internal bool rw_mutex_try_lock(RwMutex *m) {
  239. return !!TryAcquireSRWLockExclusive(&m->srwlock);
  240. }
  241. gb_internal void rw_mutex_unlock(RwMutex *m) {
  242. ReleaseSRWLockExclusive(&m->srwlock);
  243. }
  244. gb_internal void rw_mutex_shared_lock(RwMutex *m) {
  245. AcquireSRWLockShared(&m->srwlock);
  246. }
  247. gb_internal bool rw_mutex_try_shared_lock(RwMutex *m) {
  248. return !!TryAcquireSRWLockShared(&m->srwlock);
  249. }
  250. gb_internal void rw_mutex_shared_unlock(RwMutex *m) {
  251. ReleaseSRWLockShared(&m->srwlock);
  252. }
  253. #else
  254. enum Internal_Mutex_State : i32 {
  255. Internal_Mutex_State_Unlocked = 0,
  256. Internal_Mutex_State_Locked = 1,
  257. Internal_Mutex_State_Waiting = 2,
  258. };
  259. struct BlockingMutex {
  260. #if defined(HAS_VALGRIND)
  261. // BlockingMutex() {
  262. // VALGRIND_HG_MUTEX_INIT_POST(this, 0);
  263. // }
  264. // ~BlockingMutex() {
  265. // VALGRIND_HG_MUTEX_DESTROY_PRE(this);
  266. // }
  267. #endif
  268. i32 state_;
  269. Futex &state() {
  270. return *(Futex *)&this->state_;
  271. }
  272. Futex const &state() const {
  273. return *(Futex const *)&this->state_;
  274. }
  275. };
  276. gb_no_inline gb_internal void mutex_lock_slow(BlockingMutex *m, i32 curr_state) {
  277. i32 new_state = curr_state;
  278. for (i32 spin = 0; spin < 100; spin++) {
  279. i32 state = Internal_Mutex_State_Unlocked;
  280. bool ok = m->state().compare_exchange_weak(state, new_state, std::memory_order_acquire, std::memory_order_consume);
  281. if (ok) {
  282. return;
  283. }
  284. if (state == Internal_Mutex_State_Waiting) {
  285. break;
  286. }
  287. for (i32 i = gb_min(spin+1, 32); i > 0; i--) {
  288. yield_thread();
  289. }
  290. }
  291. // Set just in case 100 iterations did not do it
  292. new_state = Internal_Mutex_State_Waiting;
  293. for (;;) {
  294. if (m->state().exchange(Internal_Mutex_State_Waiting, std::memory_order_acquire) == Internal_Mutex_State_Unlocked) {
  295. return;
  296. }
  297. futex_wait(&m->state(), new_state);
  298. yield_thread();
  299. }
  300. }
  301. gb_internal void mutex_lock(BlockingMutex *m) {
  302. ANNOTATE_LOCK_PRE(m, 0);
  303. i32 v = m->state().exchange(Internal_Mutex_State_Locked, std::memory_order_acquire);
  304. if (v != Internal_Mutex_State_Unlocked) {
  305. mutex_lock_slow(m, v);
  306. }
  307. ANNOTATE_LOCK_POST(m);
  308. }
  309. gb_internal bool mutex_try_lock(BlockingMutex *m) {
  310. ANNOTATE_LOCK_PRE(m, 1);
  311. i32 v = m->state().exchange(Internal_Mutex_State_Locked, std::memory_order_acquire);
  312. if (v == Internal_Mutex_State_Unlocked) {
  313. ANNOTATE_LOCK_POST(m);
  314. return true;
  315. }
  316. return false;
  317. }
  318. gb_no_inline gb_internal void mutex_unlock_slow(BlockingMutex *m) {
  319. futex_signal(&m->state());
  320. }
  321. gb_internal void mutex_unlock(BlockingMutex *m) {
  322. ANNOTATE_UNLOCK_PRE(m);
  323. i32 v = m->state().exchange(Internal_Mutex_State_Unlocked, std::memory_order_release);
  324. switch (v) {
  325. case Internal_Mutex_State_Unlocked:
  326. GB_PANIC("Unreachable");
  327. break;
  328. case Internal_Mutex_State_Locked:
  329. // Okay
  330. break;
  331. case Internal_Mutex_State_Waiting:
  332. mutex_unlock_slow(m);
  333. break;
  334. }
  335. ANNOTATE_UNLOCK_POST(m);
  336. }
  337. struct Condition {
  338. i32 state_;
  339. Futex &state() {
  340. return *(Futex *)&this->state_;
  341. }
  342. Futex const &state() const {
  343. return *(Futex const *)&this->state_;
  344. }
  345. };
  346. gb_internal void condition_broadcast(Condition *c) {
  347. c->state().fetch_add(1, std::memory_order_release);
  348. futex_broadcast(&c->state());
  349. }
  350. gb_internal void condition_signal(Condition *c) {
  351. c->state().fetch_add(1, std::memory_order_release);
  352. futex_signal(&c->state());
  353. }
  354. gb_internal void condition_wait(Condition *c, BlockingMutex *m) {
  355. i32 state = c->state().load(std::memory_order_relaxed);
  356. mutex_unlock(m);
  357. futex_wait(&c->state(), state);
  358. mutex_lock(m);
  359. }
  360. struct RwMutex {
  361. // TODO(bill): make this a proper RW mutex
  362. BlockingMutex mutex;
  363. };
  364. gb_internal void rw_mutex_lock(RwMutex *m) {
  365. mutex_lock(&m->mutex);
  366. }
  367. gb_internal bool rw_mutex_try_lock(RwMutex *m) {
  368. return mutex_try_lock(&m->mutex);
  369. }
  370. gb_internal void rw_mutex_unlock(RwMutex *m) {
  371. mutex_unlock(&m->mutex);
  372. }
  373. gb_internal void rw_mutex_shared_lock(RwMutex *m) {
  374. mutex_lock(&m->mutex);
  375. }
  376. gb_internal bool rw_mutex_try_shared_lock(RwMutex *m) {
  377. return mutex_try_lock(&m->mutex);
  378. }
  379. gb_internal void rw_mutex_shared_unlock(RwMutex *m) {
  380. mutex_unlock(&m->mutex);
  381. }
  382. #endif
  383. struct Parker {
  384. Futex state;
  385. };
  386. enum ParkerState : u32 {
  387. ParkerState_Empty = 0,
  388. ParkerState_Notified = 1,
  389. ParkerState_Parked = UINT32_MAX,
  390. };
  391. gb_internal void park(Parker *p) {
  392. if (p->state.fetch_sub(1, std::memory_order_acquire) == ParkerState_Notified) {
  393. return;
  394. }
  395. for (;;) {
  396. futex_wait(&p->state, ParkerState_Parked);
  397. i32 notified = ParkerState_Empty;
  398. if (p->state.compare_exchange_strong(notified, ParkerState_Empty, std::memory_order_acquire, std::memory_order_acquire)) {
  399. return;
  400. }
  401. }
  402. }
  403. gb_internal void unpark_one(Parker *p) {
  404. if (p->state.exchange(ParkerState_Notified, std::memory_order_release) == ParkerState_Parked) {
  405. futex_signal(&p->state);
  406. }
  407. }
  408. gb_internal void unpark_all(Parker *p) {
  409. if (p->state.exchange(ParkerState_Notified, std::memory_order_release) == ParkerState_Parked) {
  410. futex_broadcast(&p->state);
  411. }
  412. }
  413. gb_internal u32 thread_current_id(void) {
  414. u32 thread_id;
  415. #if defined(GB_SYSTEM_WINDOWS)
  416. #if defined(GB_ARCH_32_BIT) && defined(GB_CPU_X86)
  417. thread_id = (cast(u32 *)__readfsdword(24))[9];
  418. #elif defined(GB_ARCH_64_BIT) && defined(GB_CPU_X86)
  419. thread_id = (cast(u32 *)__readgsqword(48))[18];
  420. #else
  421. thread_id = GetCurrentThreadId();
  422. #endif
  423. #elif defined(GB_SYSTEM_OSX) && defined(GB_ARCH_64_BIT)
  424. thread_id = pthread_mach_thread_np(pthread_self());
  425. #elif defined(GB_ARCH_32_BIT) && defined(GB_CPU_X86)
  426. __asm__("mov %%gs:0x08,%0" : "=r"(thread_id));
  427. #elif defined(GB_ARCH_64_BIT) && defined(GB_CPU_X86)
  428. __asm__("mov %%fs:0x10,%0" : "=r"(thread_id));
  429. #elif defined(GB_SYSTEM_LINUX)
  430. thread_id = gettid();
  431. #elif defined(GB_SYSTEM_HAIKU)
  432. thread_id = find_thread(NULL);
  433. #else
  434. #error Unsupported architecture for thread_current_id()
  435. #endif
  436. return thread_id;
  437. }
  438. gb_internal gb_inline void yield_thread(void) {
  439. #if defined(GB_SYSTEM_WINDOWS)
  440. _mm_pause();
  441. #elif defined(GB_SYSTEM_OSX)
  442. #if defined(GB_CPU_X86)
  443. __asm__ volatile ("" : : : "memory");
  444. #elif defined(GB_CPU_ARM)
  445. __asm__ volatile ("yield" : : : "memory");
  446. #endif
  447. #elif defined(GB_CPU_X86)
  448. _mm_pause();
  449. #elif defined(GB_CPU_ARM)
  450. __asm__ volatile ("yield" : : : "memory");
  451. #else
  452. #error Unknown architecture
  453. #endif
  454. }
  455. gb_internal gb_inline void yield(void) {
  456. #if defined(GB_SYSTEM_WINDOWS)
  457. YieldProcessor();
  458. #else
  459. sched_yield();
  460. #endif
  461. }
  462. #if defined(GB_SYSTEM_WINDOWS)
  463. gb_internal DWORD __stdcall internal_thread_proc(void *arg) {
  464. Thread *t = cast(Thread *)arg;
  465. thread_pool_thread_proc(t);
  466. return 0;
  467. }
  468. #else
  469. gb_internal void *internal_thread_proc(void *arg) {
  470. #if (GB_SYSTEM_LINUX)
  471. // NOTE: Don't permit any signal delivery to threads on Linux.
  472. sigset_t mask = {};
  473. sigfillset(&mask);
  474. GB_ASSERT_MSG(pthread_sigmask(SIG_BLOCK, &mask, nullptr) == 0, "failed to block signals");
  475. #endif
  476. Thread *t = cast(Thread *)arg;
  477. thread_pool_thread_proc(t);
  478. return NULL;
  479. }
  480. #endif
  481. gb_internal void thread_init(ThreadPool *pool, Thread *t, isize idx) {
  482. gb_zero_item(t);
  483. #if defined(GB_SYSTEM_WINDOWS)
  484. t->win32_handle = INVALID_HANDLE_VALUE;
  485. #else
  486. t->posix_handle = 0;
  487. #endif
  488. t->capacity = 1 << 14; // must be a power of 2
  489. t->queue = gb_alloc_array(heap_allocator(), WorkerTask, t->capacity);
  490. t->head_and_tail = 0;
  491. t->pool = pool;
  492. t->idx = idx;
  493. }
  494. gb_internal void thread_init_and_start(ThreadPool *pool, Thread *t, isize idx) {
  495. thread_init(pool, t, idx);
  496. isize stack_size = 0;
  497. #if defined(GB_SYSTEM_WINDOWS)
  498. t->win32_handle = CreateThread(NULL, stack_size, internal_thread_proc, t, 0, NULL);
  499. GB_ASSERT_MSG(t->win32_handle != NULL, "CreateThread: GetLastError");
  500. #else
  501. {
  502. pthread_attr_t attr;
  503. pthread_attr_init(&attr);
  504. defer (pthread_attr_destroy(&attr));
  505. pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  506. if (stack_size != 0) {
  507. pthread_attr_setstacksize(&attr, stack_size);
  508. }
  509. pthread_create(&t->posix_handle, &attr, internal_thread_proc, t);
  510. }
  511. #endif
  512. }
  513. gb_internal void thread_join_and_destroy(Thread *t) {
  514. #if defined(GB_SYSTEM_WINDOWS)
  515. WaitForSingleObject(t->win32_handle, INFINITE);
  516. CloseHandle(t->win32_handle);
  517. t->win32_handle = INVALID_HANDLE_VALUE;
  518. #else
  519. pthread_join(t->posix_handle, NULL);
  520. t->posix_handle = 0;
  521. #endif
  522. gb_free(heap_allocator(), t->queue);
  523. }
  524. gb_internal void thread_set_name(Thread *t, char const *name) {
  525. #if defined(GB_COMPILER_MSVC)
  526. #pragma pack(push, 8)
  527. typedef struct {
  528. DWORD type;
  529. char const *name;
  530. DWORD id;
  531. DWORD flags;
  532. } gbprivThreadName;
  533. #pragma pack(pop)
  534. gbprivThreadName tn;
  535. tn.type = 0x1000;
  536. tn.name = name;
  537. tn.id = GetThreadId(cast(HANDLE)t->win32_handle);
  538. tn.flags = 0;
  539. __try {
  540. RaiseException(0x406d1388, 0, gb_size_of(tn)/4, cast(ULONG_PTR *)&tn);
  541. } __except(1 /*EXCEPTION_EXECUTE_HANDLER*/) {
  542. }
  543. #elif defined(GB_SYSTEM_WINDOWS) && !defined(GB_COMPILER_MSVC)
  544. // IMPORTANT TODO(bill): Set thread name for GCC/Clang on windows
  545. return;
  546. #elif defined(GB_SYSTEM_OSX)
  547. // TODO(bill): Test if this works
  548. pthread_setname_np(name);
  549. #elif defined(GB_SYSTEM_FREEBSD) || defined(GB_SYSTEM_OPENBSD)
  550. pthread_set_name_np(t->posix_handle, name);
  551. #else
  552. // TODO(bill): Test if this works
  553. pthread_setname_np(t->posix_handle, name);
  554. #endif
  555. }
  556. #if defined(GB_SYSTEM_LINUX)
  557. #include <linux/futex.h>
  558. #include <sys/syscall.h>
  559. gb_internal void futex_signal(Futex *addr) {
  560. int ret = syscall(SYS_futex, addr, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1, NULL, NULL, 0);
  561. if (ret == -1) {
  562. perror("Futex wake");
  563. GB_PANIC("Failed in futex wake!\n");
  564. }
  565. }
  566. gb_internal void futex_broadcast(Futex *addr) {
  567. int ret = syscall(SYS_futex, addr, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, INT32_MAX, NULL, NULL, 0);
  568. if (ret == -1) {
  569. perror("Futex wake");
  570. GB_PANIC("Failed in futex wake!\n");
  571. }
  572. }
  573. gb_internal void futex_wait(Futex *addr, Footex val) {
  574. for (;;) {
  575. int ret = syscall(SYS_futex, addr, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, val, NULL, NULL, 0);
  576. if (ret == -1) {
  577. if (errno != EAGAIN) {
  578. perror("Futex wait");
  579. GB_PANIC("Failed in futex wait!\n");
  580. } else {
  581. return;
  582. }
  583. } else if (ret == 0) {
  584. if (*addr != val) {
  585. return;
  586. }
  587. }
  588. }
  589. }
  590. #elif defined(GB_SYSTEM_FREEBSD)
  591. #include <sys/types.h>
  592. #include <sys/umtx.h>
  593. gb_internal void futex_signal(Futex *addr) {
  594. _umtx_op(addr, UMTX_OP_WAKE, 1, 0, 0);
  595. }
  596. gb_internal void futex_broadcast(Futex *addr) {
  597. _umtx_op(addr, UMTX_OP_WAKE, INT32_MAX, 0, 0);
  598. }
  599. gb_internal void futex_wait(Futex *addr, Footex val) {
  600. for (;;) {
  601. int ret = _umtx_op(addr, UMTX_OP_WAIT_UINT, val, 0, NULL);
  602. if (ret == -1) {
  603. if (errno == ETIMEDOUT || errno == EINTR) {
  604. continue;
  605. }
  606. perror("Futex wait");
  607. GB_PANIC("Failed in futex wait!\n");
  608. } else if (ret == 0) {
  609. if (*addr != val) {
  610. return;
  611. }
  612. }
  613. }
  614. }
  615. #elif defined(GB_SYSTEM_OPENBSD)
  616. #include <sys/futex.h>
  617. gb_internal void futex_signal(Futex *f) {
  618. for (;;) {
  619. int ret = futex((volatile uint32_t *)f, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1, NULL, NULL);
  620. if (ret == -1) {
  621. if (errno == ETIMEDOUT || errno == EINTR) {
  622. continue;
  623. }
  624. perror("Futex wake");
  625. GB_PANIC("futex wake fail");
  626. } else if (ret == 1) {
  627. return;
  628. }
  629. }
  630. }
  631. gb_internal void futex_broadcast(Futex *f) {
  632. for (;;) {
  633. int ret = futex((volatile uint32_t *)f, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, INT32_MAX, NULL, NULL);
  634. if (ret == -1) {
  635. if (errno == ETIMEDOUT || errno == EINTR) {
  636. continue;
  637. }
  638. perror("Futex wake");
  639. GB_PANIC("futex wake fail");
  640. } else if (ret == 1) {
  641. return;
  642. }
  643. }
  644. }
  645. gb_internal void futex_wait(Futex *f, Footex val) {
  646. for (;;) {
  647. int ret = futex((volatile uint32_t *)f, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, val, NULL, NULL);
  648. if (ret == -1) {
  649. if (*f != val) {
  650. return;
  651. }
  652. if (errno == ETIMEDOUT || errno == EINTR) {
  653. continue;
  654. }
  655. perror("Futex wait");
  656. GB_PANIC("Failed in futex wait!\n");
  657. }
  658. }
  659. }
  660. #elif defined(GB_SYSTEM_OSX)
  661. #define UL_COMPARE_AND_WAIT 0x00000001
  662. #define ULF_NO_ERRNO 0x01000000
  663. extern "C" int __ulock_wait(uint32_t operation, void *addr, uint64_t value, uint32_t timeout); /* timeout is specified in microseconds */
  664. extern "C" int __ulock_wake(uint32_t operation, void *addr, uint64_t wake_value);
  665. gb_internal void futex_signal(Futex *f) {
  666. for (;;) {
  667. int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, 0);
  668. if (ret >= 0) {
  669. return;
  670. }
  671. if (ret == -EINTR || ret == -EFAULT) {
  672. continue;
  673. }
  674. if (ret == -ENOENT) {
  675. return;
  676. }
  677. GB_PANIC("Failed in futex wake!\n");
  678. }
  679. }
  680. gb_internal void futex_broadcast(Futex *f) {
  681. for (;;) {
  682. enum { ULF_WAKE_ALL = 0x00000100 };
  683. int ret = __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, f, 0);
  684. if (ret == 0) {
  685. return;
  686. }
  687. if (ret == -EINTR || ret == -EFAULT) {
  688. continue;
  689. }
  690. if (ret == -ENOENT) {
  691. return;
  692. }
  693. GB_PANIC("Failed in futex wake!\n");
  694. }
  695. }
  696. gb_internal void futex_wait(Futex *f, Footex val) {
  697. for (;;) {
  698. int ret = __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, val, 0);
  699. if (ret >= 0) {
  700. if (*f != val) {
  701. return;
  702. }
  703. continue;
  704. }
  705. if (ret == -EINTR || ret == -EFAULT) {continue;
  706. ret = -ret;
  707. }
  708. if (ret == -ENOENT) {
  709. return;
  710. }
  711. GB_PANIC("Failed in futex wait!\n");
  712. }
  713. }
  714. #elif defined(GB_SYSTEM_WINDOWS)
  715. gb_internal void futex_signal(Futex *f) {
  716. WakeByAddressSingle(f);
  717. }
  718. gb_internal void futex_broadcast(Futex *f) {
  719. WakeByAddressAll(f);
  720. }
  721. gb_internal void futex_wait(Futex *f, Footex val) {
  722. do {
  723. WaitOnAddress(f, (void *)&val, sizeof(val), INFINITE);
  724. } while (f->load() == val);
  725. }
  726. #elif defined(GB_SYSTEM_HAIKU)
  727. // Futex implementation taken from https://tavianator.com/2023/futex.html
  728. #include <pthread.h>
  729. #include <atomic>
  730. struct _Spinlock {
  731. std::atomic_flag state;
  732. void init() {
  733. state.clear();
  734. }
  735. void lock() {
  736. while (state.test_and_set(std::memory_order_acquire)) {
  737. #if defined(GB_CPU_X86)
  738. _mm_pause();
  739. #else
  740. (void)0; // spin...
  741. #endif
  742. }
  743. }
  744. void unlock() {
  745. state.clear(std::memory_order_release);
  746. }
  747. };
  748. struct Futex_Waitq;
  749. struct Futex_Waiter {
  750. _Spinlock lock;
  751. pthread_t thread;
  752. Futex *futex;
  753. Futex_Waitq *waitq;
  754. Futex_Waiter *prev, *next;
  755. };
  756. struct Futex_Waitq {
  757. _Spinlock lock;
  758. Futex_Waiter list;
  759. void init() {
  760. auto head = &list;
  761. head->prev = head->next = head;
  762. }
  763. };
  764. // FIXME: This approach may scale badly in the future,
  765. // possible solution - hash map (leads to deadlocks now).
  766. Futex_Waitq g_waitq = {
  767. .lock = ATOMIC_FLAG_INIT,
  768. .list = {
  769. .prev = &g_waitq.list,
  770. .next = &g_waitq.list,
  771. },
  772. };
  773. Futex_Waitq *get_waitq(Futex *f) {
  774. // Future hash map method...
  775. return &g_waitq;
  776. }
  777. void futex_signal(Futex *f) {
  778. auto waitq = get_waitq(f);
  779. waitq->lock.lock();
  780. auto head = &waitq->list;
  781. for (auto waiter = head->next; waiter != head; waiter = waiter->next) {
  782. if (waiter->futex != f) {
  783. continue;
  784. }
  785. waitq->lock.unlock();
  786. pthread_kill(waiter->thread, SIGCONT);
  787. return;
  788. }
  789. waitq->lock.unlock();
  790. }
  791. void futex_broadcast(Futex *f) {
  792. auto waitq = get_waitq(f);
  793. waitq->lock.lock();
  794. auto head = &waitq->list;
  795. for (auto waiter = head->next; waiter != head; waiter = waiter->next) {
  796. if (waiter->futex != f) {
  797. continue;
  798. }
  799. if (waiter->next == head) {
  800. waitq->lock.unlock();
  801. pthread_kill(waiter->thread, SIGCONT);
  802. return;
  803. } else {
  804. pthread_kill(waiter->thread, SIGCONT);
  805. }
  806. }
  807. waitq->lock.unlock();
  808. }
  809. void futex_wait(Futex *f, Footex val) {
  810. Futex_Waiter waiter;
  811. waiter.thread = pthread_self();
  812. waiter.futex = f;
  813. auto waitq = get_waitq(f);
  814. while (waitq->lock.state.test_and_set(std::memory_order_acquire)) {
  815. if (f->load(std::memory_order_relaxed) != val) {
  816. return;
  817. }
  818. #if defined(GB_CPU_X86)
  819. _mm_pause();
  820. #else
  821. (void)0; // spin...
  822. #endif
  823. }
  824. waiter.waitq = waitq;
  825. waiter.lock.init();
  826. waiter.lock.lock();
  827. auto head = &waitq->list;
  828. waiter.prev = head->prev;
  829. waiter.next = head;
  830. waiter.prev->next = &waiter;
  831. waiter.next->prev = &waiter;
  832. waiter.prev->next = &waiter;
  833. waiter.next->prev = &waiter;
  834. sigset_t old_mask, mask;
  835. sigemptyset(&mask);
  836. sigaddset(&mask, SIGCONT);
  837. pthread_sigmask(SIG_BLOCK, &mask, &old_mask);
  838. if (f->load(std::memory_order_relaxed) == val) {
  839. waiter.lock.unlock();
  840. waitq->lock.unlock();
  841. int sig;
  842. sigwait(&mask, &sig);
  843. waitq->lock.lock();
  844. waiter.lock.lock();
  845. while (waitq != waiter.waitq) {
  846. auto req = waiter.waitq;
  847. waiter.lock.unlock();
  848. waitq->lock.unlock();
  849. waitq = req;
  850. waitq->lock.lock();
  851. waiter.lock.lock();
  852. }
  853. }
  854. waiter.prev->next = waiter.next;
  855. waiter.next->prev = waiter.prev;
  856. pthread_sigmask(SIG_SETMASK, &old_mask, NULL);
  857. waiter.lock.unlock();
  858. waitq->lock.unlock();
  859. }
  860. #endif
  861. #if defined(GB_SYSTEM_WINDOWS)
  862. #pragma warning(pop)
  863. #endif