thread.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "thread.h"
  17. #include "sysinfo.h"
  18. #include "string.h"
  19. #include <iostream>
  20. #include <xmmintrin.h>
  21. #if defined(PTHREADS_WIN32)
  22. #pragma comment (lib, "pthreadVC.lib")
  23. #endif
  24. ////////////////////////////////////////////////////////////////////////////////
  25. /// Windows Platform
  26. ////////////////////////////////////////////////////////////////////////////////
  27. #if defined(__WIN32__)
  28. #define WIN32_LEAN_AND_MEAN
  29. #include <windows.h>
  30. namespace embree
  31. {
  32. /*! set the affinity of a given thread */
  33. void setAffinity(HANDLE thread, ssize_t affinity)
  34. {
  35. typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
  36. typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
  37. typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
  38. typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
  39. HMODULE hlib = LoadLibrary("Kernel32");
  40. GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
  41. GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
  42. SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
  43. SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
  44. if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
  45. {
  46. int groups = pGetActiveProcessorGroupCount();
  47. int totalProcessors = 0, group = 0, number = 0;
  48. for (int i = 0; i<groups; i++) {
  49. int processors = pGetActiveProcessorCount(i);
  50. if (totalProcessors + processors > affinity) {
  51. group = i;
  52. number = (int)affinity - totalProcessors;
  53. break;
  54. }
  55. totalProcessors += processors;
  56. }
  57. GROUP_AFFINITY groupAffinity;
  58. groupAffinity.Group = (WORD)group;
  59. groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
  60. groupAffinity.Reserved[0] = 0;
  61. groupAffinity.Reserved[1] = 0;
  62. groupAffinity.Reserved[2] = 0;
  63. if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
  64. WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
  65. PROCESSOR_NUMBER processorNumber;
  66. processorNumber.Group = group;
  67. processorNumber.Number = number;
  68. processorNumber.Reserved = 0;
  69. if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
  70. WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
  71. }
  72. else
  73. {
  74. if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
  75. WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
  76. if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
  77. WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
  78. }
  79. }
  80. /*! set affinity of the calling thread */
  81. void setAffinity(ssize_t affinity) {
  82. setAffinity(GetCurrentThread(), affinity);
  83. }
  84. struct ThreadStartupData
  85. {
  86. public:
  87. ThreadStartupData (thread_func f, void* arg)
  88. : f(f), arg(arg) {}
  89. public:
  90. thread_func f;
  91. void* arg;
  92. };
  93. DWORD WINAPI threadStartup(LPVOID ptr)
  94. {
  95. ThreadStartupData* parg = (ThreadStartupData*) ptr;
  96. _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
  97. parg->f(parg->arg);
  98. delete parg;
  99. return 0;
  100. }
  101. #if !defined(PTHREADS_WIN32)
  102. /*! creates a hardware thread running on specific core */
  103. thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
  104. {
  105. HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
  106. if (thread == nullptr) FATAL("CreateThread failed");
  107. if (threadID >= 0) setAffinity(thread, threadID);
  108. return thread_t(thread);
  109. }
  110. /*! the thread calling this function gets yielded */
  111. void yield() {
  112. SwitchToThread();
  113. }
  114. /*! waits until the given thread has terminated */
  115. void join(thread_t tid) {
  116. WaitForSingleObject(HANDLE(tid), INFINITE);
  117. CloseHandle(HANDLE(tid));
  118. }
  119. /*! destroy a hardware thread by its handle */
  120. void destroyThread(thread_t tid) {
  121. TerminateThread(HANDLE(tid),0);
  122. CloseHandle(HANDLE(tid));
  123. }
  124. /*! creates thread local storage */
  125. tls_t createTls() {
  126. return tls_t(size_t(TlsAlloc()));
  127. }
  128. /*! set the thread local storage pointer */
  129. void setTls(tls_t tls, void* const ptr) {
  130. TlsSetValue(DWORD(size_t(tls)), ptr);
  131. }
  132. /*! return the thread local storage pointer */
  133. void* getTls(tls_t tls) {
  134. return TlsGetValue(DWORD(size_t(tls)));
  135. }
  136. /*! destroys thread local storage identifier */
  137. void destroyTls(tls_t tls) {
  138. TlsFree(DWORD(size_t(tls)));
  139. }
  140. #endif
  141. }
  142. #endif
  143. ////////////////////////////////////////////////////////////////////////////////
  144. /// Linux Platform
  145. ////////////////////////////////////////////////////////////////////////////////
  146. #if defined(__LINUX__)
  147. #include <fstream>
  148. #include <sstream>
  149. #include <algorithm>
  150. namespace embree
  151. {
  152. /* changes thread ID mapping such that we first fill up all thread on one core */
  153. size_t mapThreadID(size_t threadID)
  154. {
  155. static MutexSys mutex;
  156. Lock<MutexSys> lock(mutex);
  157. static std::vector<size_t> threadIDs;
  158. if (threadIDs.size() == 0)
  159. {
  160. /* parse thread/CPU topology */
  161. for (size_t cpuID=0;;cpuID++)
  162. {
  163. std::fstream fs;
  164. std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");
  165. fs.open (cpu.c_str(), std::fstream::in);
  166. if (fs.fail()) break;
  167. int i;
  168. while (fs >> i)
  169. {
  170. if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))
  171. threadIDs.push_back(i);
  172. if (fs.peek() == ',')
  173. fs.ignore();
  174. }
  175. fs.close();
  176. }
  177. #if 0
  178. for (size_t i=0;i<threadIDs.size();i++)
  179. std::cout << i << " -> " << threadIDs[i] << std::endl;
  180. #endif
  181. /* verify the mapping and do not use it if the mapping has errors */
  182. for (size_t i=0;i<threadIDs.size();i++) {
  183. for (size_t j=0;j<threadIDs.size();j++) {
  184. if (i != j && threadIDs[i] == threadIDs[j]) {
  185. threadIDs.clear();
  186. }
  187. }
  188. }
  189. }
  190. /* re-map threadIDs if mapping is available */
  191. size_t ID = threadID;
  192. if (threadID < threadIDs.size())
  193. ID = threadIDs[threadID];
  194. return ID;
  195. }
  196. /*! set affinity of the calling thread */
  197. void setAffinity(ssize_t affinity)
  198. {
  199. cpu_set_t cset;
  200. CPU_ZERO(&cset);
  201. CPU_SET(mapThreadID(affinity), &cset);
  202. if (pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset) != 0)
  203. WARNING("pthread_setaffinity_np failed"); // on purpose only a warning
  204. }
  205. }
  206. #endif
  207. ////////////////////////////////////////////////////////////////////////////////
  208. /// FreeBSD Platform
  209. ////////////////////////////////////////////////////////////////////////////////
  210. #if defined(__FreeBSD__)
  211. #include <pthread_np.h>
  212. namespace embree
  213. {
  214. /*! set affinity of the calling thread */
  215. void setAffinity(ssize_t affinity)
  216. {
  217. cpuset_t cset;
  218. CPU_ZERO(&cset);
  219. CPU_SET(affinity, &cset);
  220. if (pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset) != 0)
  221. WARNING("pthread_setaffinity_np failed"); // on purpose only a warning
  222. }
  223. }
  224. #endif
  225. ////////////////////////////////////////////////////////////////////////////////
  226. /// MacOSX Platform
  227. ////////////////////////////////////////////////////////////////////////////////
  228. #if defined(__MACOSX__)
  229. #include <mach/thread_act.h>
  230. #include <mach/thread_policy.h>
  231. #include <mach/mach_init.h>
  232. namespace embree
  233. {
  234. /*! set affinity of the calling thread */
  235. void setAffinity(ssize_t affinity)
  236. {
  237. thread_affinity_policy ap;
  238. ap.affinity_tag = affinity;
  239. if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
  240. WARNING("setting thread affinity failed"); // on purpose only a warning
  241. }
  242. }
  243. #endif
  244. ////////////////////////////////////////////////////////////////////////////////
  245. /// Unix Platform
  246. ////////////////////////////////////////////////////////////////////////////////
  247. #if defined(__UNIX__) || defined(PTHREADS_WIN32)
  248. #include <pthread.h>
  249. #include <sched.h>
  250. #if defined(__USE_NUMA__)
  251. #include <numa.h>
  252. #endif
  253. namespace embree
  254. {
  255. struct ThreadStartupData
  256. {
  257. public:
  258. ThreadStartupData (thread_func f, void* arg, int affinity)
  259. : f(f), arg(arg), affinity(affinity) {}
  260. public:
  261. thread_func f;
  262. void* arg;
  263. ssize_t affinity;
  264. };
  265. static void* threadStartup(ThreadStartupData* parg)
  266. {
  267. _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
  268. /*! Mac OS X does not support setting affinity at thread creation time */
  269. #if defined(__MACOSX__)
  270. if (parg->affinity >= 0)
  271. setAffinity(parg->affinity);
  272. #endif
  273. parg->f(parg->arg);
  274. delete parg;
  275. return nullptr;
  276. }
  277. /*! creates a hardware thread running on specific core */
  278. thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
  279. {
  280. /* set stack size */
  281. pthread_attr_t attr;
  282. pthread_attr_init(&attr);
  283. if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);
  284. /* create thread */
  285. pthread_t* tid = new pthread_t;
  286. if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {
  287. pthread_attr_destroy(&attr);
  288. delete tid;
  289. FATAL("pthread_create failed");
  290. }
  291. pthread_attr_destroy(&attr);
  292. /* set affinity */
  293. #if defined(__LINUX__)
  294. if (threadID >= 0) {
  295. cpu_set_t cset;
  296. CPU_ZERO(&cset);
  297. CPU_SET(mapThreadID(threadID), &cset);
  298. if (pthread_setaffinity_np(*tid, sizeof(cset), &cset))
  299. WARNING("pthread_setaffinity_np failed"); // on purpose only a warning
  300. }
  301. #elif defined(__FreeBSD__)
  302. if (threadID >= 0) {
  303. cpuset_t cset;
  304. CPU_ZERO(&cset);
  305. CPU_SET(threadID, &cset);
  306. if (pthread_setaffinity_np(*tid, sizeof(cset), &cset))
  307. WARNING("pthread_setaffinity_np failed"); // on purpose only a warning
  308. }
  309. #endif
  310. return thread_t(tid);
  311. }
  312. /*! the thread calling this function gets yielded */
  313. void yield() {
  314. sched_yield();
  315. }
  316. /*! waits until the given thread has terminated */
  317. void join(thread_t tid) {
  318. if (pthread_join(*(pthread_t*)tid, nullptr) != 0)
  319. FATAL("pthread_join failed");
  320. delete (pthread_t*)tid;
  321. }
  322. /*! destroy a hardware thread by its handle */
  323. void destroyThread(thread_t tid) {
  324. pthread_cancel(*(pthread_t*)tid);
  325. delete (pthread_t*)tid;
  326. }
  327. /*! creates thread local storage */
  328. tls_t createTls()
  329. {
  330. pthread_key_t* key = new pthread_key_t;
  331. if (pthread_key_create(key,nullptr) != 0) {
  332. delete key;
  333. FATAL("pthread_key_create failed");
  334. }
  335. return tls_t(key);
  336. }
  337. /*! return the thread local storage pointer */
  338. void* getTls(tls_t tls)
  339. {
  340. assert(tls);
  341. return pthread_getspecific(*(pthread_key_t*)tls);
  342. }
  343. /*! set the thread local storage pointer */
  344. void setTls(tls_t tls, void* const ptr)
  345. {
  346. assert(tls);
  347. if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)
  348. FATAL("pthread_setspecific failed");
  349. }
  350. /*! destroys thread local storage identifier */
  351. void destroyTls(tls_t tls)
  352. {
  353. assert(tls);
  354. if (pthread_key_delete(*(pthread_key_t*)tls) != 0)
  355. FATAL("pthread_key_delete failed");
  356. delete (pthread_key_t*)tls;
  357. }
  358. }
  359. #endif
  360. ////////////////////////////////////////////////////////////////////////////////
  361. /// All Platforms
  362. ////////////////////////////////////////////////////////////////////////////////
  363. namespace embree
  364. {
  365. ThreadLocalStorage ThreadLocalStorage::single_instance;
  366. }