btThreadSupportWin32.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /*
  2. Bullet Continuous Collision Detection and Physics Library
  3. Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
  4. This software is provided 'as-is', without any express or implied warranty.
  5. In no event will the authors be held liable for any damages arising from the use of this software.
  6. Permission is granted to anyone to use this software for any purpose,
  7. including commercial applications, and to alter it and redistribute it freely,
  8. subject to the following restrictions:
  9. 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  10. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  11. 3. This notice may not be removed or altered from any source distribution.
  12. */
  13. #if defined(_WIN32) && BT_THREADSAFE
  14. #include "LinearMath/btScalar.h"
  15. #include "LinearMath/btMinMax.h"
  16. #include "LinearMath/btAlignedObjectArray.h"
  17. #include "LinearMath/btThreads.h"
  18. #include "btThreadSupportInterface.h"
  19. #include <windows.h>
  20. #include <stdio.h>
  21. struct btProcessorInfo
  22. {
  23. int numLogicalProcessors;
  24. int numCores;
  25. int numNumaNodes;
  26. int numL1Cache;
  27. int numL2Cache;
  28. int numL3Cache;
  29. int numPhysicalPackages;
  30. static const int maxNumTeamMasks = 32;
  31. int numTeamMasks;
  32. UINT64 processorTeamMasks[maxNumTeamMasks];
  33. };
  34. UINT64 getProcessorTeamMask(const btProcessorInfo& procInfo, int procId)
  35. {
  36. UINT64 procMask = UINT64(1) << procId;
  37. for (int i = 0; i < procInfo.numTeamMasks; ++i)
  38. {
  39. if (procMask & procInfo.processorTeamMasks[i])
  40. {
  41. return procInfo.processorTeamMasks[i];
  42. }
  43. }
  44. return 0;
  45. }
  46. int getProcessorTeamIndex(const btProcessorInfo& procInfo, int procId)
  47. {
  48. UINT64 procMask = UINT64(1) << procId;
  49. for (int i = 0; i < procInfo.numTeamMasks; ++i)
  50. {
  51. if (procMask & procInfo.processorTeamMasks[i])
  52. {
  53. return i;
  54. }
  55. }
  56. return -1;
  57. }
  58. int countSetBits(ULONG64 bits)
  59. {
  60. int count = 0;
  61. while (bits)
  62. {
  63. if (bits & 1)
  64. {
  65. count++;
  66. }
  67. bits >>= 1;
  68. }
  69. return count;
  70. }
  71. typedef BOOL(WINAPI* Pfn_GetLogicalProcessorInformation)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
  72. void getProcessorInformation(btProcessorInfo* procInfo)
  73. {
  74. memset(procInfo, 0, sizeof(*procInfo));
  75. #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
  76. !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
  77. // Can't dlopen libraries on UWP.
  78. return;
  79. #else
  80. Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
  81. (Pfn_GetLogicalProcessorInformation)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
  82. if (getLogicalProcInfo == NULL)
  83. {
  84. // no info
  85. return;
  86. }
  87. PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
  88. DWORD bufSize = 0;
  89. while (true)
  90. {
  91. if (getLogicalProcInfo(buf, &bufSize))
  92. {
  93. break;
  94. }
  95. else
  96. {
  97. if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
  98. {
  99. if (buf)
  100. {
  101. free(buf);
  102. }
  103. buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(bufSize);
  104. }
  105. }
  106. }
  107. int len = bufSize / sizeof(*buf);
  108. for (int i = 0; i < len; ++i)
  109. {
  110. PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
  111. switch (info->Relationship)
  112. {
  113. case RelationNumaNode:
  114. procInfo->numNumaNodes++;
  115. break;
  116. case RelationProcessorCore:
  117. procInfo->numCores++;
  118. procInfo->numLogicalProcessors += countSetBits(info->ProcessorMask);
  119. break;
  120. case RelationCache:
  121. if (info->Cache.Level == 1)
  122. {
  123. procInfo->numL1Cache++;
  124. }
  125. else if (info->Cache.Level == 2)
  126. {
  127. procInfo->numL2Cache++;
  128. }
  129. else if (info->Cache.Level == 3)
  130. {
  131. procInfo->numL3Cache++;
  132. // processors that share L3 cache are considered to be on the same team
  133. // because they can more easily work together on the same data.
  134. // Large performance penalties will occur if 2 or more threads from different
  135. // teams attempt to frequently read and modify the same cache lines.
  136. //
  137. // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
  138. // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
  139. // CCXs are operating on the same data, many cycles will be spent keeping the
  140. // two caches coherent.
  141. if (procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks)
  142. {
  143. procInfo->processorTeamMasks[procInfo->numTeamMasks] = info->ProcessorMask;
  144. procInfo->numTeamMasks++;
  145. }
  146. }
  147. break;
  148. case RelationProcessorPackage:
  149. procInfo->numPhysicalPackages++;
  150. break;
  151. }
  152. }
  153. free(buf);
  154. #endif
  155. }
  156. ///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
  157. class btThreadSupportWin32 : public btThreadSupportInterface
  158. {
  159. public:
  160. struct btThreadStatus
  161. {
  162. int m_taskId;
  163. int m_commandId;
  164. int m_status;
  165. ThreadFunc m_userThreadFunc;
  166. void* m_userPtr; //for taskDesc etc
  167. void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
  168. void* m_eventStartHandle;
  169. char m_eventStartHandleName[32];
  170. void* m_eventCompleteHandle;
  171. char m_eventCompleteHandleName[32];
  172. };
  173. private:
  174. btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
  175. btAlignedObjectArray<void*> m_completeHandles;
  176. int m_numThreads;
  177. DWORD_PTR m_startedThreadMask;
  178. btProcessorInfo m_processorInfo;
  179. void startThreads(const ConstructionInfo& threadInfo);
  180. void stopThreads();
  181. int waitForResponse();
  182. public:
  183. btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo);
  184. virtual ~btThreadSupportWin32();
  185. virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
  186. virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
  187. virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
  188. virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
  189. virtual void waitForAllTasks() BT_OVERRIDE;
  190. virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
  191. virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
  192. };
  193. btThreadSupportWin32::btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo)
  194. {
  195. startThreads(threadConstructionInfo);
  196. }
  197. btThreadSupportWin32::~btThreadSupportWin32()
  198. {
  199. stopThreads();
  200. }
  201. DWORD WINAPI win32threadStartFunc(LPVOID lpParam)
  202. {
  203. btThreadSupportWin32::btThreadStatus* status = (btThreadSupportWin32::btThreadStatus*)lpParam;
  204. while (1)
  205. {
  206. WaitForSingleObject(status->m_eventStartHandle, INFINITE);
  207. void* userPtr = status->m_userPtr;
  208. if (userPtr)
  209. {
  210. btAssert(status->m_status);
  211. status->m_userThreadFunc(userPtr);
  212. status->m_status = 2;
  213. SetEvent(status->m_eventCompleteHandle);
  214. }
  215. else
  216. {
  217. //exit Thread
  218. status->m_status = 3;
  219. printf("Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle);
  220. SetEvent(status->m_eventCompleteHandle);
  221. break;
  222. }
  223. }
  224. printf("Thread TERMINATED\n");
  225. return 0;
  226. }
  227. void btThreadSupportWin32::runTask(int threadIndex, void* userData)
  228. {
  229. btThreadStatus& threadStatus = m_activeThreadStatus[threadIndex];
  230. btAssert(threadIndex >= 0);
  231. btAssert(int(threadIndex) < m_activeThreadStatus.size());
  232. threadStatus.m_commandId = 1;
  233. threadStatus.m_status = 1;
  234. threadStatus.m_userPtr = userData;
  235. m_startedThreadMask |= DWORD_PTR(1) << threadIndex;
  236. ///fire event to start new task
  237. SetEvent(threadStatus.m_eventStartHandle);
  238. }
  239. int btThreadSupportWin32::waitForResponse()
  240. {
  241. btAssert(m_activeThreadStatus.size());
  242. int last = -1;
  243. DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
  244. btAssert(res != WAIT_FAILED);
  245. last = res - WAIT_OBJECT_0;
  246. btThreadStatus& threadStatus = m_activeThreadStatus[last];
  247. btAssert(threadStatus.m_threadHandle);
  248. btAssert(threadStatus.m_eventCompleteHandle);
  249. //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
  250. btAssert(threadStatus.m_status > 1);
  251. threadStatus.m_status = 0;
  252. ///need to find an active spu
  253. btAssert(last >= 0);
  254. m_startedThreadMask &= ~(DWORD_PTR(1) << last);
  255. return last;
  256. }
  257. void btThreadSupportWin32::waitForAllTasks()
  258. {
  259. while (m_startedThreadMask)
  260. {
  261. waitForResponse();
  262. }
  263. }
  264. void btThreadSupportWin32::startThreads(const ConstructionInfo& threadConstructionInfo)
  265. {
  266. static int uniqueId = 0;
  267. uniqueId++;
  268. btProcessorInfo& procInfo = m_processorInfo;
  269. getProcessorInformation(&procInfo);
  270. DWORD_PTR dwProcessAffinityMask = 0;
  271. DWORD_PTR dwSystemAffinityMask = 0;
  272. if (!GetProcessAffinityMask(GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask))
  273. {
  274. dwProcessAffinityMask = 0;
  275. }
  276. ///The number of threads should be equal to the number of available cores - 1
  277. m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
  278. m_activeThreadStatus.resize(m_numThreads);
  279. m_completeHandles.resize(m_numThreads);
  280. m_startedThreadMask = 0;
  281. // set main thread affinity
  282. if (DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask(procInfo, 0))
  283. {
  284. SetThreadAffinityMask(GetCurrentThread(), mask);
  285. SetThreadIdealProcessor(GetCurrentThread(), 0);
  286. }
  287. for (int i = 0; i < m_numThreads; i++)
  288. {
  289. printf("starting thread %d\n", i);
  290. btThreadStatus& threadStatus = m_activeThreadStatus[i];
  291. LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
  292. SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
  293. LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
  294. LPVOID lpParameter = &threadStatus;
  295. DWORD dwCreationFlags = 0;
  296. LPDWORD lpThreadId = 0;
  297. threadStatus.m_userPtr = 0;
  298. sprintf(threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
  299. threadStatus.m_eventStartHandle = CreateEventA(0, false, false, threadStatus.m_eventStartHandleName);
  300. sprintf(threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
  301. threadStatus.m_eventCompleteHandle = CreateEventA(0, false, false, threadStatus.m_eventCompleteHandleName);
  302. m_completeHandles[i] = threadStatus.m_eventCompleteHandle;
  303. HANDLE handle = CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId);
  304. //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
  305. // highest priority -- can cause erratic performance when numThreads > numCores
  306. // we don't want worker threads to be higher priority than the main thread or the main thread could get
  307. // totally shut out and unable to tell the workers to stop
  308. //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
  309. {
  310. int processorId = i + 1; // leave processor 0 for main thread
  311. DWORD_PTR teamMask = getProcessorTeamMask(procInfo, processorId);
  312. if (teamMask)
  313. {
  314. // bind each thread to only execute on processors of it's assigned team
  315. // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
  316. // - for multi-socket Intel this will keep threads from migrating from one socket to another
  317. // - for AMD Ryzen this will keep threads from migrating from one CCX to another
  318. DWORD_PTR mask = teamMask & dwProcessAffinityMask;
  319. if (mask)
  320. {
  321. SetThreadAffinityMask(handle, mask);
  322. }
  323. }
  324. SetThreadIdealProcessor(handle, processorId);
  325. }
  326. threadStatus.m_taskId = i;
  327. threadStatus.m_commandId = 0;
  328. threadStatus.m_status = 0;
  329. threadStatus.m_threadHandle = handle;
  330. threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
  331. printf("started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle);
  332. }
  333. }
  334. ///tell the task scheduler we are done with the SPU tasks
  335. void btThreadSupportWin32::stopThreads()
  336. {
  337. for (int i = 0; i < m_activeThreadStatus.size(); i++)
  338. {
  339. btThreadStatus& threadStatus = m_activeThreadStatus[i];
  340. if (threadStatus.m_status > 0)
  341. {
  342. WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
  343. }
  344. threadStatus.m_userPtr = NULL;
  345. SetEvent(threadStatus.m_eventStartHandle);
  346. WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
  347. CloseHandle(threadStatus.m_eventCompleteHandle);
  348. CloseHandle(threadStatus.m_eventStartHandle);
  349. CloseHandle(threadStatus.m_threadHandle);
  350. }
  351. m_activeThreadStatus.clear();
  352. m_completeHandles.clear();
  353. }
  354. class btWin32CriticalSection : public btCriticalSection
  355. {
  356. private:
  357. CRITICAL_SECTION mCriticalSection;
  358. public:
  359. btWin32CriticalSection()
  360. {
  361. InitializeCriticalSection(&mCriticalSection);
  362. }
  363. ~btWin32CriticalSection()
  364. {
  365. DeleteCriticalSection(&mCriticalSection);
  366. }
  367. void lock()
  368. {
  369. EnterCriticalSection(&mCriticalSection);
  370. }
  371. void unlock()
  372. {
  373. LeaveCriticalSection(&mCriticalSection);
  374. }
  375. };
  376. btCriticalSection* btThreadSupportWin32::createCriticalSection()
  377. {
  378. unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32CriticalSection), 16);
  379. btWin32CriticalSection* cs = new (mem) btWin32CriticalSection();
  380. return cs;
  381. }
  382. void btThreadSupportWin32::deleteCriticalSection(btCriticalSection* criticalSection)
  383. {
  384. criticalSection->~btCriticalSection();
  385. btAlignedFree(criticalSection);
  386. }
  387. btThreadSupportInterface* btThreadSupportInterface::create(const ConstructionInfo& info)
  388. {
  389. return new btThreadSupportWin32(info);
  390. }
  391. #endif //defined(_WIN32) && BT_THREADSAFE