thread.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #include "thread.h"
  4. #include "sysinfo.h"
  5. #include "estring.h"
  6. #include <iostream>
  7. #if defined(__ARM_NEON)
  8. #include "../simd/arm/emulation.h"
  9. #else
  10. #include <xmmintrin.h>
  11. #if defined(__EMSCRIPTEN__)
  12. #include "../simd/wasm/emulation.h"
  13. #endif
  14. #endif
  15. #if defined(PTHREADS_WIN32)
  16. #pragma comment (lib, "pthreadVC.lib")
  17. #endif
  18. ////////////////////////////////////////////////////////////////////////////////
  19. /// Windows Platform
  20. ////////////////////////////////////////////////////////////////////////////////
  21. #if defined(__WIN32__)
  22. #define WIN32_LEAN_AND_MEAN
  23. #include <windows.h>
  24. namespace embree
  25. {
  26. /*! set the affinity of a given thread */
  27. void setAffinity(HANDLE thread, ssize_t affinity)
  28. {
  29. typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
  30. typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
  31. typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
  32. typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
  33. HMODULE hlib = LoadLibrary("Kernel32");
  34. GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
  35. GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
  36. SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
  37. SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
  38. if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
  39. {
  40. int groups = pGetActiveProcessorGroupCount();
  41. int totalProcessors = 0, group = 0, number = 0;
  42. for (int i = 0; i<groups; i++) {
  43. int processors = pGetActiveProcessorCount(i);
  44. if (totalProcessors + processors > affinity) {
  45. group = i;
  46. number = (int)affinity - totalProcessors;
  47. break;
  48. }
  49. totalProcessors += processors;
  50. }
  51. GROUP_AFFINITY groupAffinity;
  52. groupAffinity.Group = (WORD)group;
  53. groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
  54. groupAffinity.Reserved[0] = 0;
  55. groupAffinity.Reserved[1] = 0;
  56. groupAffinity.Reserved[2] = 0;
  57. if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
  58. WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
  59. PROCESSOR_NUMBER processorNumber;
  60. processorNumber.Group = group;
  61. processorNumber.Number = number;
  62. processorNumber.Reserved = 0;
  63. if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
  64. WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
  65. }
  66. else
  67. {
  68. if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
  69. WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
  70. if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
  71. WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
  72. }
  73. }
  74. /*! set affinity of the calling thread */
  75. void setAffinity(ssize_t affinity) {
  76. setAffinity(GetCurrentThread(), affinity);
  77. }
  78. struct ThreadStartupData
  79. {
  80. public:
  81. ThreadStartupData (thread_func f, void* arg)
  82. : f(f), arg(arg) {}
  83. public:
  84. thread_func f;
  85. void* arg;
  86. };
  87. DWORD WINAPI threadStartup(LPVOID ptr)
  88. {
  89. ThreadStartupData* parg = (ThreadStartupData*) ptr;
  90. _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
  91. parg->f(parg->arg);
  92. delete parg;
  93. return 0;
  94. }
  95. #if !defined(PTHREADS_WIN32)
  96. /*! creates a hardware thread running on specific core */
  97. thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
  98. {
  99. HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
  100. if (thread == nullptr) FATAL("CreateThread failed");
  101. if (threadID >= 0) setAffinity(thread, threadID);
  102. return thread_t(thread);
  103. }
  104. /*! the thread calling this function gets yielded */
  105. void yield() {
  106. SwitchToThread();
  107. }
  108. /*! waits until the given thread has terminated */
  109. void join(thread_t tid) {
  110. WaitForSingleObject(HANDLE(tid), INFINITE);
  111. CloseHandle(HANDLE(tid));
  112. }
  113. /*! destroy a hardware thread by its handle */
  114. void destroyThread(thread_t tid) {
  115. TerminateThread(HANDLE(tid),0);
  116. CloseHandle(HANDLE(tid));
  117. }
  118. /*! creates thread local storage */
  119. tls_t createTls() {
  120. return tls_t(size_t(TlsAlloc()));
  121. }
  122. /*! set the thread local storage pointer */
  123. void setTls(tls_t tls, void* const ptr) {
  124. TlsSetValue(DWORD(size_t(tls)), ptr);
  125. }
  126. /*! return the thread local storage pointer */
  127. void* getTls(tls_t tls) {
  128. return TlsGetValue(DWORD(size_t(tls)));
  129. }
  130. /*! destroys thread local storage identifier */
  131. void destroyTls(tls_t tls) {
  132. TlsFree(DWORD(size_t(tls)));
  133. }
  134. #endif
  135. }
  136. #endif
  137. ////////////////////////////////////////////////////////////////////////////////
  138. /// Linux Platform
  139. ////////////////////////////////////////////////////////////////////////////////
  140. #if defined(__LINUX__) && !defined(__ANDROID__)
  141. #include <fstream>
  142. #include <sstream>
  143. #include <algorithm>
  144. namespace embree
  145. {
  146. static MutexSys mutex;
  147. static std::vector<size_t> threadIDs;
  148. /* changes thread ID mapping such that we first fill up all thread on one core */
  149. size_t mapThreadID(size_t threadID)
  150. {
  151. Lock<MutexSys> lock(mutex);
  152. if (threadIDs.size() == 0)
  153. {
  154. /* parse thread/CPU topology */
  155. for (size_t cpuID=0;;cpuID++)
  156. {
  157. std::fstream fs;
  158. std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");
  159. fs.open (cpu.c_str(), std::fstream::in);
  160. if (fs.fail()) break;
  161. int i;
  162. while (fs >> i)
  163. {
  164. if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))
  165. threadIDs.push_back(i);
  166. if (fs.peek() == ',')
  167. fs.ignore();
  168. }
  169. fs.close();
  170. }
  171. #if 0
  172. for (size_t i=0;i<threadIDs.size();i++)
  173. std::cout << i << " -> " << threadIDs[i] << std::endl;
  174. #endif
  175. /* verify the mapping and do not use it if the mapping has errors */
  176. for (size_t i=0;i<threadIDs.size();i++) {
  177. for (size_t j=0;j<threadIDs.size();j++) {
  178. if (i != j && threadIDs[i] == threadIDs[j]) {
  179. threadIDs.clear();
  180. }
  181. }
  182. }
  183. }
  184. /* re-map threadIDs if mapping is available */
  185. size_t ID = threadID;
  186. if (threadID < threadIDs.size())
  187. ID = threadIDs[threadID];
  188. /* find correct thread to affinitize to */
  189. cpu_set_t set;
  190. CPU_ZERO(&set);
  191. if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
  192. {
  193. for (int i=0, j=0; i<CPU_SETSIZE; i++)
  194. {
  195. if (!CPU_ISSET(i,&set)) continue;
  196. if (j == ID) {
  197. ID = i;
  198. break;
  199. }
  200. j++;
  201. }
  202. }
  203. return ID;
  204. }
  205. /*! set affinity of the calling thread */
  206. void setAffinity(ssize_t affinity)
  207. {
  208. cpu_set_t cset;
  209. CPU_ZERO(&cset);
  210. //size_t threadID = mapThreadID(affinity); // this is not working properly in LXC containers when some processors are disabled
  211. size_t threadID = affinity;
  212. CPU_SET(threadID, &cset);
  213. pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
  214. }
  215. }
  216. #endif
  217. ////////////////////////////////////////////////////////////////////////////////
  218. /// Android Platform
  219. ////////////////////////////////////////////////////////////////////////////////
  220. #if defined(__ANDROID__)
  221. namespace embree
  222. {
  223. /*! set affinity of the calling thread */
  224. void setAffinity(ssize_t affinity)
  225. {
  226. cpu_set_t cset;
  227. CPU_ZERO(&cset);
  228. CPU_SET(affinity, &cset);
  229. sched_setaffinity(0, sizeof(cset), &cset);
  230. }
  231. }
  232. #endif
  233. ////////////////////////////////////////////////////////////////////////////////
  234. /// FreeBSD Platform
  235. ////////////////////////////////////////////////////////////////////////////////
  236. #if defined(__FreeBSD__)
  237. #include <pthread_np.h>
  238. namespace embree
  239. {
  240. /*! set affinity of the calling thread */
  241. void setAffinity(ssize_t affinity)
  242. {
  243. cpuset_t cset;
  244. CPU_ZERO(&cset);
  245. CPU_SET(affinity, &cset);
  246. pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
  247. }
  248. }
  249. #endif
  250. ////////////////////////////////////////////////////////////////////////////////
  251. /// WebAssembly Platform
  252. ////////////////////////////////////////////////////////////////////////////////
  253. #if defined(__EMSCRIPTEN__)
  254. namespace embree
  255. {
  256. /*! set affinity of the calling thread */
  257. void setAffinity(ssize_t affinity)
  258. {
  259. // Setting thread affinity is not supported in WASM.
  260. }
  261. }
  262. #endif
  263. ////////////////////////////////////////////////////////////////////////////////
  264. /// MacOSX Platform
  265. ////////////////////////////////////////////////////////////////////////////////
  266. #if defined(__MACOSX__)
  267. #include <mach/thread_act.h>
  268. #include <mach/thread_policy.h>
  269. #include <mach/mach_init.h>
  270. namespace embree
  271. {
  272. /*! set affinity of the calling thread */
  273. void setAffinity(ssize_t affinity)
  274. {
  275. #if !defined(__ARM_NEON) // affinity seems not supported on M1 chip
  276. thread_affinity_policy ap;
  277. ap.affinity_tag = affinity;
  278. if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
  279. WARNING("setting thread affinity failed"); // on purpose only a warning
  280. #endif
  281. }
  282. }
  283. #endif
  284. ////////////////////////////////////////////////////////////////////////////////
  285. /// Unix Platform
  286. ////////////////////////////////////////////////////////////////////////////////
  287. #if defined(__UNIX__) || defined(PTHREADS_WIN32)
  288. #include <pthread.h>
  289. #include <sched.h>
  290. #if defined(__USE_NUMA__)
  291. #include <numa.h>
  292. #endif
  293. namespace embree
  294. {
  295. struct ThreadStartupData
  296. {
  297. public:
  298. ThreadStartupData (thread_func f, void* arg, int affinity)
  299. : f(f), arg(arg), affinity(affinity) {}
  300. public:
  301. thread_func f;
  302. void* arg;
  303. ssize_t affinity;
  304. };
  305. static void* threadStartup(ThreadStartupData* parg)
  306. {
  307. _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
  308. /*! Mac OS X does not support setting affinity at thread creation time */
  309. #if defined(__MACOSX__)
  310. if (parg->affinity >= 0)
  311. setAffinity(parg->affinity);
  312. #endif
  313. parg->f(parg->arg);
  314. delete parg;
  315. return nullptr;
  316. }
  317. /*! creates a hardware thread running on specific core */
  318. thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
  319. {
  320. /* set stack size */
  321. pthread_attr_t attr;
  322. pthread_attr_init(&attr);
  323. if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);
  324. /* create thread */
  325. pthread_t* tid = new pthread_t;
  326. if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {
  327. pthread_attr_destroy(&attr);
  328. delete tid;
  329. FATAL("pthread_create failed");
  330. }
  331. pthread_attr_destroy(&attr);
  332. /* set affinity */
  333. #if defined(__LINUX__) && !defined(__ANDROID__)
  334. if (threadID >= 0) {
  335. cpu_set_t cset;
  336. CPU_ZERO(&cset);
  337. threadID = mapThreadID(threadID);
  338. CPU_SET(threadID, &cset);
  339. pthread_setaffinity_np(*tid, sizeof(cset), &cset);
  340. }
  341. #elif defined(__FreeBSD__)
  342. if (threadID >= 0) {
  343. cpuset_t cset;
  344. CPU_ZERO(&cset);
  345. CPU_SET(threadID, &cset);
  346. pthread_setaffinity_np(*tid, sizeof(cset), &cset);
  347. }
  348. #elif defined(__ANDROID__)
  349. if (threadID >= 0) {
  350. cpu_set_t cset;
  351. CPU_ZERO(&cset);
  352. CPU_SET(threadID, &cset);
  353. sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);
  354. }
  355. #endif
  356. return thread_t(tid);
  357. }
  358. /*! the thread calling this function gets yielded */
  359. void yield() {
  360. sched_yield();
  361. }
  362. /*! waits until the given thread has terminated */
  363. void join(thread_t tid) {
  364. if (pthread_join(*(pthread_t*)tid, nullptr) != 0)
  365. FATAL("pthread_join failed");
  366. delete (pthread_t*)tid;
  367. }
  368. /*! destroy a hardware thread by its handle */
  369. void destroyThread(thread_t tid) {
  370. #if defined(__ANDROID__)
  371. FATAL("Can't destroy threads on Android."); // pthread_cancel not implemented.
  372. #else
  373. pthread_cancel(*(pthread_t*)tid);
  374. delete (pthread_t*)tid;
  375. #endif
  376. }
  377. /*! creates thread local storage */
  378. tls_t createTls()
  379. {
  380. pthread_key_t* key = new pthread_key_t;
  381. if (pthread_key_create(key,nullptr) != 0) {
  382. delete key;
  383. FATAL("pthread_key_create failed");
  384. }
  385. return tls_t(key);
  386. }
  387. /*! return the thread local storage pointer */
  388. void* getTls(tls_t tls)
  389. {
  390. assert(tls);
  391. return pthread_getspecific(*(pthread_key_t*)tls);
  392. }
  393. /*! set the thread local storage pointer */
  394. void setTls(tls_t tls, void* const ptr)
  395. {
  396. assert(tls);
  397. if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)
  398. FATAL("pthread_setspecific failed");
  399. }
  400. /*! destroys thread local storage identifier */
  401. void destroyTls(tls_t tls)
  402. {
  403. assert(tls);
  404. if (pthread_key_delete(*(pthread_key_t*)tls) != 0)
  405. FATAL("pthread_key_delete failed");
  406. delete (pthread_key_t*)tls;
  407. }
  408. }
  409. #endif