lzham_pthreads_threading.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. // File: lzham_task_pool_pthreads.h
  2. // See Copyright Notice and license at the end of include/lzham.h
  3. #pragma once
  4. #if LZHAM_USE_PTHREADS_API
  5. #if LZHAM_NO_ATOMICS
  6. #error No atomic operations defined in lzham_platform.h!
  7. #endif
  8. #ifdef __APPLE__
  9. #include <libkern/OSAtomic.h>
  10. #endif
  11. #include <pthread.h>
  12. #include <semaphore.h>
  13. #include <unistd.h>
  14. #include <sys/time.h>
  15. #define LZHAM_RND_CONG(jcong) (69069U * jcong + 1234567U)
  16. namespace lzham
  17. {
  18. // semaphore
  19. #ifdef __APPLE__
  20. class semaphore
  21. {
  22. LZHAM_NO_COPY_OR_ASSIGNMENT_OP(semaphore);
  23. public:
  24. inline semaphore(long initialCount, long maximumCount)
  25. {
  26. LZHAM_NOTE_UNUSED(maximumCount);
  27. LZHAM_ASSERT(maximumCount >= initialCount);
  28. for (uint tries = 0; tries < 16; tries++)
  29. {
  30. struct timeval tp;
  31. struct timezone tzp;
  32. gettimeofday(&tp, &tzp);
  33. uint x = tp.tv_usec;
  34. // Argh this stinks. Try to choose a name that won't conflict with anything the calling process uses.
  35. for (uint i = 0; i < sizeof(m_name) - 1; i++)
  36. {
  37. x = LZHAM_RND_CONG(x);
  38. char c = 'A' + (static_cast<uint8>(x ^ (x >> 20)) % 26);
  39. m_name[i] = c;
  40. }
  41. m_name[sizeof(m_name) - 1] = '\0';
  42. m_pSem = sem_open(m_name, O_CREAT | O_EXCL, S_IRWXU, initialCount);
  43. if (m_pSem != SEM_FAILED)
  44. break;
  45. }
  46. if (m_pSem == SEM_FAILED)
  47. {
  48. LZHAM_FAIL("semaphore: sem_init() failed");
  49. }
  50. }
  51. inline ~semaphore()
  52. {
  53. sem_close(m_pSem);
  54. sem_unlink(m_name);
  55. }
  56. inline void release(long releaseCount = 1)
  57. {
  58. LZHAM_ASSERT(releaseCount >= 1);
  59. int status = 0;
  60. #ifdef WIN32
  61. if (1 == releaseCount)
  62. status = sem_post(m_pSem);
  63. else
  64. status = sem_post_multiple(m_pSem, releaseCount);
  65. #else
  66. while (releaseCount > 0)
  67. {
  68. status = sem_post(m_pSem);
  69. if (status)
  70. break;
  71. releaseCount--;
  72. }
  73. #endif
  74. if (status)
  75. {
  76. LZHAM_FAIL("semaphore: sem_post() or sem_post_multiple() failed");
  77. }
  78. }
  79. inline bool wait()
  80. {
  81. int status = sem_wait(m_pSem);
  82. if (status)
  83. {
  84. if (errno != ETIMEDOUT)
  85. {
  86. LZHAM_FAIL("semaphore: sem_wait() or sem_timedwait() failed");
  87. }
  88. return false;
  89. }
  90. return true;
  91. }
  92. private:
  93. sem_t *m_pSem;
  94. char m_name[16];
  95. };
  96. #else
  97. class semaphore
  98. {
  99. LZHAM_NO_COPY_OR_ASSIGNMENT_OP(semaphore);
  100. public:
  101. inline semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL)
  102. {
  103. LZHAM_NOTE_UNUSED(maximumCount), LZHAM_NOTE_UNUSED(pName);
  104. LZHAM_ASSERT(maximumCount >= initialCount);
  105. if (sem_init(&m_sem, 0, initialCount))
  106. {
  107. LZHAM_FAIL("semaphore: sem_init() failed");
  108. }
  109. }
  110. inline ~semaphore()
  111. {
  112. sem_destroy(&m_sem);
  113. }
  114. inline void release(long releaseCount = 1)
  115. {
  116. LZHAM_ASSERT(releaseCount >= 1);
  117. int status = 0;
  118. #ifdef WIN32
  119. if (1 == releaseCount)
  120. status = sem_post(&m_sem);
  121. else
  122. status = sem_post_multiple(&m_sem, releaseCount);
  123. #else
  124. while (releaseCount > 0)
  125. {
  126. status = sem_post(&m_sem);
  127. if (status)
  128. break;
  129. releaseCount--;
  130. }
  131. #endif
  132. if (status)
  133. {
  134. LZHAM_FAIL("semaphore: sem_post() or sem_post_multiple() failed");
  135. }
  136. }
  137. inline bool wait(uint32 milliseconds = UINT32_MAX)
  138. {
  139. int status;
  140. if (milliseconds == UINT32_MAX)
  141. {
  142. status = sem_wait(&m_sem);
  143. }
  144. else
  145. {
  146. struct timespec interval;
  147. interval.tv_sec = milliseconds / 1000;
  148. interval.tv_nsec = (milliseconds % 1000) * 1000000L;
  149. status = sem_timedwait(&m_sem, &interval);
  150. }
  151. if (status)
  152. {
  153. if (errno != ETIMEDOUT)
  154. {
  155. LZHAM_FAIL("semaphore: sem_wait() or sem_timedwait() failed");
  156. }
  157. return false;
  158. }
  159. return true;
  160. }
  161. private:
  162. sem_t m_sem;
  163. };
  164. #endif
  165. // spinlock
  166. #ifdef __APPLE__
  167. class spinlock
  168. {
  169. public:
  170. inline spinlock() : m_lock(0)
  171. {
  172. }
  173. inline ~spinlock()
  174. {
  175. }
  176. inline void lock()
  177. {
  178. OSSpinLockLock(&m_lock);
  179. }
  180. inline void unlock()
  181. {
  182. OSSpinLockUnlock(&m_lock);
  183. }
  184. private:
  185. OSSpinLock m_lock;
  186. };
  187. #else
  188. class spinlock
  189. {
  190. public:
  191. inline spinlock()
  192. {
  193. if (pthread_spin_init(&m_spinlock, 0))
  194. {
  195. LZHAM_FAIL("spinlock: pthread_spin_init() failed");
  196. }
  197. }
  198. inline ~spinlock()
  199. {
  200. pthread_spin_destroy(&m_spinlock);
  201. }
  202. inline void lock()
  203. {
  204. if (pthread_spin_lock(&m_spinlock))
  205. {
  206. LZHAM_FAIL("spinlock: pthread_spin_lock() failed");
  207. }
  208. }
  209. inline void unlock()
  210. {
  211. if (pthread_spin_unlock(&m_spinlock))
  212. {
  213. LZHAM_FAIL("spinlock: pthread_spin_unlock() failed");
  214. }
  215. }
  216. private:
  217. pthread_spinlock_t m_spinlock;
  218. };
  219. #endif // __APPLE__
  220. // Thread safe stack
  221. template<typename T, uint cMaxSize>
  222. class tsstack
  223. {
  224. public:
  225. inline tsstack() : m_top(0)
  226. {
  227. }
  228. inline ~tsstack()
  229. {
  230. }
  231. inline void clear()
  232. {
  233. m_spinlock.lock();
  234. m_top = 0;
  235. m_spinlock.unlock();
  236. }
  237. inline bool try_push(const T& obj)
  238. {
  239. bool result = false;
  240. m_spinlock.lock();
  241. if (m_top < (int)cMaxSize)
  242. {
  243. m_stack[m_top++] = obj;
  244. result = true;
  245. }
  246. m_spinlock.unlock();
  247. return result;
  248. }
  249. inline bool pop(T& obj)
  250. {
  251. bool result = false;
  252. m_spinlock.lock();
  253. if (m_top > 0)
  254. {
  255. obj = m_stack[--m_top];
  256. result = true;
  257. }
  258. m_spinlock.unlock();
  259. return result;
  260. }
  261. private:
  262. spinlock m_spinlock;
  263. T m_stack[cMaxSize];
  264. int m_top;
  265. };
  266. // Simple task pool
  267. class task_pool
  268. {
  269. public:
  270. task_pool();
  271. task_pool(uint num_threads);
  272. ~task_pool();
  273. enum { cMaxThreads = LZHAM_MAX_HELPER_THREADS };
  274. bool init(uint num_threads);
  275. void deinit();
  276. inline uint get_num_threads() const { return m_num_threads; }
  277. inline uint get_num_outstanding_tasks() const { return static_cast<uint>(m_num_outstanding_tasks); }
  278. // C-style task callback
  279. typedef void (*task_callback_func)(uint64 data, void* pData_ptr);
  280. bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL);
  281. class executable_task
  282. {
  283. public:
  284. virtual void execute_task(uint64 data, void* pData_ptr) = 0;
  285. };
  286. // It's the caller's responsibility to delete pObj within the execute_task() method, if needed!
  287. bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL);
  288. template<typename S, typename T>
  289. inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL);
  290. template<typename S, typename T>
  291. inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL);
  292. void join();
  293. private:
  294. struct task
  295. {
  296. inline task() : m_data(0), m_pData_ptr(NULL), m_pObj(NULL), m_flags(0) { }
  297. uint64 m_data;
  298. void* m_pData_ptr;
  299. union
  300. {
  301. task_callback_func m_callback;
  302. executable_task* m_pObj;
  303. };
  304. uint m_flags;
  305. };
  306. tsstack<task, cMaxThreads> m_task_stack;
  307. uint m_num_threads;
  308. pthread_t m_threads[cMaxThreads];
  309. semaphore m_tasks_available;
  310. enum task_flags
  311. {
  312. cTaskFlagObject = 1
  313. };
  314. volatile atomic32_t m_num_outstanding_tasks;
  315. volatile atomic32_t m_exit_flag;
  316. void process_task(task& tsk);
  317. static void* thread_func(void *pContext);
  318. };
  319. enum object_task_flags
  320. {
  321. cObjectTaskFlagDefault = 0,
  322. cObjectTaskFlagDeleteAfterExecution = 1
  323. };
  324. template<typename T>
  325. class object_task : public task_pool::executable_task
  326. {
  327. public:
  328. object_task(uint flags = cObjectTaskFlagDefault) :
  329. m_pObject(NULL),
  330. m_pMethod(NULL),
  331. m_flags(flags)
  332. {
  333. }
  334. typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr);
  335. object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) :
  336. m_pObject(pObject),
  337. m_pMethod(pMethod),
  338. m_flags(flags)
  339. {
  340. LZHAM_ASSERT(pObject && pMethod);
  341. }
  342. void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault)
  343. {
  344. LZHAM_ASSERT(pObject && pMethod);
  345. m_pObject = pObject;
  346. m_pMethod = pMethod;
  347. m_flags = flags;
  348. }
  349. T* get_object() const { return m_pObject; }
  350. object_method_ptr get_method() const { return m_pMethod; }
  351. virtual void execute_task(uint64 data, void* pData_ptr)
  352. {
  353. (m_pObject->*m_pMethod)(data, pData_ptr);
  354. if (m_flags & cObjectTaskFlagDeleteAfterExecution)
  355. lzham_delete(this);
  356. }
  357. protected:
  358. T* m_pObject;
  359. object_method_ptr m_pMethod;
  360. uint m_flags;
  361. };
  362. template<typename S, typename T>
  363. inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr)
  364. {
  365. object_task<S> *pTask = lzham_new< object_task<S> >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution);
  366. if (!pTask)
  367. return false;
  368. return queue_task(pTask, data, pData_ptr);
  369. }
  370. template<typename S, typename T>
  371. inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr)
  372. {
  373. LZHAM_ASSERT(m_num_threads);
  374. LZHAM_ASSERT(pObject);
  375. LZHAM_ASSERT(num_tasks);
  376. if (!num_tasks)
  377. return true;
  378. bool status = true;
  379. uint i;
  380. for (i = 0; i < num_tasks; i++)
  381. {
  382. task tsk;
  383. tsk.m_pObj = lzham_new< object_task<S> >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution);
  384. if (!tsk.m_pObj)
  385. {
  386. status = false;
  387. break;
  388. }
  389. tsk.m_data = first_data + i;
  390. tsk.m_pData_ptr = pData_ptr;
  391. tsk.m_flags = cTaskFlagObject;
  392. if (!m_task_stack.try_push(tsk))
  393. {
  394. status = false;
  395. break;
  396. }
  397. }
  398. if (i)
  399. {
  400. atomic_add32(&m_num_outstanding_tasks, i);
  401. m_tasks_available.release(i);
  402. }
  403. return status;
  404. }
  405. // Sleep
  406. inline void lzham_sleep(unsigned int milliseconds)
  407. {
  408. #ifdef WIN32
  409. struct timespec interval;
  410. interval.tv_sec = milliseconds / 1000;
  411. interval.tv_nsec = (milliseconds % 1000) * 1000000L;
  412. pthread_delay_np(&interval);
  413. #else
  414. while (milliseconds)
  415. {
  416. int msecs_to_sleep = LZHAM_MIN(milliseconds, 1000);
  417. usleep(msecs_to_sleep * 1000);
  418. milliseconds -= msecs_to_sleep;
  419. }
  420. #endif
  421. }
  422. // Returns number of helper threads we can add to the process on the current system (i.e. for a 4 CPU system this returns 3).
  423. uint lzham_get_max_helper_threads();
  424. } // namespace lzham
  425. #endif // LZHAM_USE_PTHREADS_API