thread.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #include "thread.h"
  4. #include "sysinfo.h"
  5. #include "string.h"
  6. #include <iostream>
  7. #if defined(__ARM_NEON)
  8. #include "../simd/arm/emulation.h"
  9. #else
  10. #include <xmmintrin.h>
  11. #endif
  12. #if defined(PTHREADS_WIN32)
  13. #pragma comment (lib, "pthreadVC.lib")
  14. #endif
  15. ////////////////////////////////////////////////////////////////////////////////
  16. /// Windows Platform
  17. ////////////////////////////////////////////////////////////////////////////////
  18. #if defined(__WIN32__)
  19. #define WIN32_LEAN_AND_MEAN
  20. #include <windows.h>
  21. namespace embree
  22. {
  23. /*! set the affinity of a given thread */
  24. void setAffinity(HANDLE thread, ssize_t affinity)
  25. {
  26. typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
  27. typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
  28. typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
  29. typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
  30. HMODULE hlib = LoadLibrary("Kernel32");
  31. GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
  32. GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
  33. SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
  34. SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
  35. if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
  36. {
  37. int groups = pGetActiveProcessorGroupCount();
  38. int totalProcessors = 0, group = 0, number = 0;
  39. for (int i = 0; i<groups; i++) {
  40. int processors = pGetActiveProcessorCount(i);
  41. if (totalProcessors + processors > affinity) {
  42. group = i;
  43. number = (int)affinity - totalProcessors;
  44. break;
  45. }
  46. totalProcessors += processors;
  47. }
  48. GROUP_AFFINITY groupAffinity;
  49. groupAffinity.Group = (WORD)group;
  50. groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
  51. groupAffinity.Reserved[0] = 0;
  52. groupAffinity.Reserved[1] = 0;
  53. groupAffinity.Reserved[2] = 0;
  54. if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
  55. WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
  56. PROCESSOR_NUMBER processorNumber;
  57. processorNumber.Group = group;
  58. processorNumber.Number = number;
  59. processorNumber.Reserved = 0;
  60. if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
  61. WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
  62. }
  63. else
  64. {
  65. if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
  66. WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
  67. if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
  68. WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
  69. }
  70. }
  71. /*! set affinity of the calling thread */
  72. void setAffinity(ssize_t affinity) {
  73. setAffinity(GetCurrentThread(), affinity);
  74. }
  75. struct ThreadStartupData
  76. {
  77. public:
  78. ThreadStartupData (thread_func f, void* arg)
  79. : f(f), arg(arg) {}
  80. public:
  81. thread_func f;
  82. void* arg;
  83. };
  84. DWORD WINAPI threadStartup(LPVOID ptr)
  85. {
  86. ThreadStartupData* parg = (ThreadStartupData*) ptr;
  87. _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
  88. parg->f(parg->arg);
  89. delete parg;
  90. return 0;
  91. }
  92. #if !defined(PTHREADS_WIN32)
  93. /*! creates a hardware thread running on specific core */
  94. thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
  95. {
  96. HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
  97. if (thread == nullptr) FATAL("CreateThread failed");
  98. if (threadID >= 0) setAffinity(thread, threadID);
  99. return thread_t(thread);
  100. }
  101. /*! the thread calling this function gets yielded */
  102. void yield() {
  103. SwitchToThread();
  104. }
  105. /*! waits until the given thread has terminated */
  106. void join(thread_t tid) {
  107. WaitForSingleObject(HANDLE(tid), INFINITE);
  108. CloseHandle(HANDLE(tid));
  109. }
  110. /*! destroy a hardware thread by its handle */
  111. void destroyThread(thread_t tid) {
  112. TerminateThread(HANDLE(tid),0);
  113. CloseHandle(HANDLE(tid));
  114. }
  115. /*! creates thread local storage */
  116. tls_t createTls() {
  117. return tls_t(size_t(TlsAlloc()));
  118. }
  119. /*! set the thread local storage pointer */
  120. void setTls(tls_t tls, void* const ptr) {
  121. TlsSetValue(DWORD(size_t(tls)), ptr);
  122. }
  123. /*! return the thread local storage pointer */
  124. void* getTls(tls_t tls) {
  125. return TlsGetValue(DWORD(size_t(tls)));
  126. }
  127. /*! destroys thread local storage identifier */
  128. void destroyTls(tls_t tls) {
  129. TlsFree(DWORD(size_t(tls)));
  130. }
  131. #endif
  132. }
  133. #endif
  134. ////////////////////////////////////////////////////////////////////////////////
  135. /// Linux Platform
  136. ////////////////////////////////////////////////////////////////////////////////
  137. // -- GODOT start --
  138. #if defined(__LINUX__) && !defined(__ANDROID__)
  139. // -- GODOT end --
  140. #include <fstream>
  141. #include <sstream>
  142. #include <algorithm>
  143. namespace embree
  144. {
  145. static MutexSys mutex;
  146. static std::vector<size_t> threadIDs;
  147. /* changes thread ID mapping such that we first fill up all thread on one core */
  148. size_t mapThreadID(size_t threadID)
  149. {
  150. Lock<MutexSys> lock(mutex);
  151. if (threadIDs.size() == 0)
  152. {
  153. /* parse thread/CPU topology */
  154. for (size_t cpuID=0;;cpuID++)
  155. {
  156. std::fstream fs;
  157. std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");
  158. fs.open (cpu.c_str(), std::fstream::in);
  159. if (fs.fail()) break;
  160. int i;
  161. while (fs >> i)
  162. {
  163. if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))
  164. threadIDs.push_back(i);
  165. if (fs.peek() == ',')
  166. fs.ignore();
  167. }
  168. fs.close();
  169. }
  170. #if 0
  171. for (size_t i=0;i<threadIDs.size();i++)
  172. std::cout << i << " -> " << threadIDs[i] << std::endl;
  173. #endif
  174. /* verify the mapping and do not use it if the mapping has errors */
  175. for (size_t i=0;i<threadIDs.size();i++) {
  176. for (size_t j=0;j<threadIDs.size();j++) {
  177. if (i != j && threadIDs[i] == threadIDs[j]) {
  178. threadIDs.clear();
  179. }
  180. }
  181. }
  182. }
  183. /* re-map threadIDs if mapping is available */
  184. size_t ID = threadID;
  185. if (threadID < threadIDs.size())
  186. ID = threadIDs[threadID];
  187. /* find correct thread to affinitize to */
  188. cpu_set_t set;
  189. if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
  190. {
  191. for (int i=0, j=0; i<CPU_SETSIZE; i++)
  192. {
  193. if (!CPU_ISSET(i,&set)) continue;
  194. if (j == ID) {
  195. ID = i;
  196. break;
  197. }
  198. j++;
  199. }
  200. }
  201. return ID;
  202. }
  203. /*! set affinity of the calling thread */
  204. void setAffinity(ssize_t affinity)
  205. {
  206. cpu_set_t cset;
  207. CPU_ZERO(&cset);
  208. size_t threadID = mapThreadID(affinity);
  209. CPU_SET(threadID, &cset);
  210. pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
  211. }
  212. }
  213. #endif
  214. // -- GODOT start --
  215. ////////////////////////////////////////////////////////////////////////////////
  216. /// Android Platform
  217. ////////////////////////////////////////////////////////////////////////////////
  218. #if defined(__ANDROID__)
  219. namespace embree
  220. {
  221. /*! set affinity of the calling thread */
  222. void setAffinity(ssize_t affinity)
  223. {
  224. cpu_set_t cset;
  225. CPU_ZERO(&cset);
  226. CPU_SET(affinity, &cset);
  227. sched_setaffinity(0, sizeof(cset), &cset);
  228. }
  229. }
  230. #endif
  231. // -- GODOT end --
  232. ////////////////////////////////////////////////////////////////////////////////
  233. /// FreeBSD Platform
  234. ////////////////////////////////////////////////////////////////////////////////
  235. #if defined(__FreeBSD__)
  236. #include <pthread_np.h>
  237. namespace embree
  238. {
  239. /*! set affinity of the calling thread */
  240. void setAffinity(ssize_t affinity)
  241. {
  242. cpuset_t cset;
  243. CPU_ZERO(&cset);
  244. CPU_SET(affinity, &cset);
  245. pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
  246. }
  247. }
  248. #endif
  249. ////////////////////////////////////////////////////////////////////////////////
  250. /// MacOSX Platform
  251. ////////////////////////////////////////////////////////////////////////////////
  252. #if defined(__MACOSX__)
  253. #include <mach/thread_act.h>
  254. #include <mach/thread_policy.h>
  255. #include <mach/mach_init.h>
  256. namespace embree
  257. {
  258. /*! set affinity of the calling thread */
  259. void setAffinity(ssize_t affinity)
  260. {
  261. #if !defined(__ARM_NEON) // affinity seems not supported on M1 chip
  262. thread_affinity_policy ap;
  263. ap.affinity_tag = affinity;
  264. if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
  265. WARNING("setting thread affinity failed"); // on purpose only a warning
  266. #endif
  267. }
  268. }
  269. #endif
  270. ////////////////////////////////////////////////////////////////////////////////
  271. /// Unix Platform
  272. ////////////////////////////////////////////////////////////////////////////////
  273. #if defined(__UNIX__) || defined(PTHREADS_WIN32)
  274. #include <pthread.h>
  275. #include <sched.h>
  276. #if defined(__USE_NUMA__)
  277. #include <numa.h>
  278. #endif
  279. namespace embree
  280. {
  281. struct ThreadStartupData
  282. {
  283. public:
  284. ThreadStartupData (thread_func f, void* arg, int affinity)
  285. : f(f), arg(arg), affinity(affinity) {}
  286. public:
  287. thread_func f;
  288. void* arg;
  289. ssize_t affinity;
  290. };
  291. static void* threadStartup(ThreadStartupData* parg)
  292. {
  293. _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
  294. /*! Mac OS X does not support setting affinity at thread creation time */
  295. #if defined(__MACOSX__)
  296. if (parg->affinity >= 0)
  297. setAffinity(parg->affinity);
  298. #endif
  299. parg->f(parg->arg);
  300. delete parg;
  301. return nullptr;
  302. }
  303. /*! creates a hardware thread running on specific core */
  304. thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
  305. {
  306. /* set stack size */
  307. pthread_attr_t attr;
  308. pthread_attr_init(&attr);
  309. if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);
  310. /* create thread */
  311. pthread_t* tid = new pthread_t;
  312. if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {
  313. pthread_attr_destroy(&attr);
  314. delete tid;
  315. FATAL("pthread_create failed");
  316. }
  317. pthread_attr_destroy(&attr);
  318. /* set affinity */
  319. // -- GODOT start --
  320. #if defined(__LINUX__) && !defined(__ANDROID__)
  321. // -- GODOT end --
  322. if (threadID >= 0) {
  323. cpu_set_t cset;
  324. CPU_ZERO(&cset);
  325. threadID = mapThreadID(threadID);
  326. CPU_SET(threadID, &cset);
  327. pthread_setaffinity_np(*tid, sizeof(cset), &cset);
  328. }
  329. #elif defined(__FreeBSD__)
  330. if (threadID >= 0) {
  331. cpuset_t cset;
  332. CPU_ZERO(&cset);
  333. CPU_SET(threadID, &cset);
  334. pthread_setaffinity_np(*tid, sizeof(cset), &cset);
  335. }
  336. // -- GODOT start --
  337. #elif defined(__ANDROID__)
  338. if (threadID >= 0) {
  339. cpu_set_t cset;
  340. CPU_ZERO(&cset);
  341. CPU_SET(threadID, &cset);
  342. sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);
  343. }
  344. #endif
  345. // -- GODOT end --
  346. return thread_t(tid);
  347. }
  348. /*! the thread calling this function gets yielded */
  349. void yield() {
  350. sched_yield();
  351. }
  352. /*! waits until the given thread has terminated */
  353. void join(thread_t tid) {
  354. if (pthread_join(*(pthread_t*)tid, nullptr) != 0)
  355. FATAL("pthread_join failed");
  356. delete (pthread_t*)tid;
  357. }
  358. /*! destroy a hardware thread by its handle */
  359. void destroyThread(thread_t tid) {
  360. // -- GODOT start --
  361. #if defined(__ANDROID__)
  362. FATAL("Can't destroy threads on Android.");
  363. #else
  364. pthread_cancel(*(pthread_t*)tid);
  365. delete (pthread_t*)tid;
  366. #endif
  367. // -- GODOT end --
  368. }
  369. /*! creates thread local storage */
  370. tls_t createTls()
  371. {
  372. pthread_key_t* key = new pthread_key_t;
  373. if (pthread_key_create(key,nullptr) != 0) {
  374. delete key;
  375. FATAL("pthread_key_create failed");
  376. }
  377. return tls_t(key);
  378. }
  379. /*! return the thread local storage pointer */
  380. void* getTls(tls_t tls)
  381. {
  382. assert(tls);
  383. return pthread_getspecific(*(pthread_key_t*)tls);
  384. }
  385. /*! set the thread local storage pointer */
  386. void setTls(tls_t tls, void* const ptr)
  387. {
  388. assert(tls);
  389. if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)
  390. FATAL("pthread_setspecific failed");
  391. }
  392. /*! destroys thread local storage identifier */
  393. void destroyTls(tls_t tls)
  394. {
  395. assert(tls);
  396. if (pthread_key_delete(*(pthread_key_t*)tls) != 0)
  397. FATAL("pthread_key_delete failed");
  398. delete (pthread_key_t*)tls;
  399. }
  400. }
  401. #endif