123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472 |
- /*
- Bullet Continuous Collision Detection and Physics Library
- Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
- This software is provided 'as-is', without any express or implied warranty.
- In no event will the authors be held liable for any damages arising from the use of this software.
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it freely,
- subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
- */
- #if defined( _WIN32 ) && BT_THREADSAFE
- #include "LinearMath/btScalar.h"
- #include "LinearMath/btMinMax.h"
- #include "LinearMath/btAlignedObjectArray.h"
- #include "LinearMath/btThreads.h"
- #include "btThreadSupportInterface.h"
- #include <windows.h>
- #include <stdio.h>
- struct btProcessorInfo
- {
- int numLogicalProcessors;
- int numCores;
- int numNumaNodes;
- int numL1Cache;
- int numL2Cache;
- int numL3Cache;
- int numPhysicalPackages;
- static const int maxNumTeamMasks = 32;
- int numTeamMasks;
- UINT64 processorTeamMasks[ maxNumTeamMasks ];
- };
- UINT64 getProcessorTeamMask( const btProcessorInfo& procInfo, int procId )
- {
- UINT64 procMask = UINT64( 1 ) << procId;
- for ( int i = 0; i < procInfo.numTeamMasks; ++i )
- {
- if ( procMask & procInfo.processorTeamMasks[ i ] )
- {
- return procInfo.processorTeamMasks[ i ];
- }
- }
- return 0;
- }
- int getProcessorTeamIndex( const btProcessorInfo& procInfo, int procId )
- {
- UINT64 procMask = UINT64( 1 ) << procId;
- for ( int i = 0; i < procInfo.numTeamMasks; ++i )
- {
- if ( procMask & procInfo.processorTeamMasks[ i ] )
- {
- return i;
- }
- }
- return -1;
- }
- int countSetBits( ULONG64 bits )
- {
- int count = 0;
- while ( bits )
- {
- if ( bits & 1 )
- {
- count++;
- }
- bits >>= 1;
- }
- return count;
- }
- typedef BOOL( WINAPI *Pfn_GetLogicalProcessorInformation )( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD );
- void getProcessorInformation( btProcessorInfo* procInfo )
- {
- memset( procInfo, 0, sizeof( *procInfo ) );
- Pfn_GetLogicalProcessorInformation getLogicalProcInfo =
- (Pfn_GetLogicalProcessorInformation) GetProcAddress( GetModuleHandle( TEXT( "kernel32" ) ), "GetLogicalProcessorInformation" );
- if ( getLogicalProcInfo == NULL )
- {
- // no info
- return;
- }
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buf = NULL;
- DWORD bufSize = 0;
- while ( true )
- {
- if ( getLogicalProcInfo( buf, &bufSize ) )
- {
- break;
- }
- else
- {
- if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER )
- {
- if ( buf )
- {
- free( buf );
- }
- buf = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( bufSize );
- }
- }
- }
- int len = bufSize / sizeof( *buf );
- for ( int i = 0; i < len; ++i )
- {
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION info = buf + i;
- switch ( info->Relationship )
- {
- case RelationNumaNode:
- procInfo->numNumaNodes++;
- break;
- case RelationProcessorCore:
- procInfo->numCores++;
- procInfo->numLogicalProcessors += countSetBits( info->ProcessorMask );
- break;
- case RelationCache:
- if ( info->Cache.Level == 1 )
- {
- procInfo->numL1Cache++;
- }
- else if ( info->Cache.Level == 2 )
- {
- procInfo->numL2Cache++;
- }
- else if ( info->Cache.Level == 3 )
- {
- procInfo->numL3Cache++;
- // processors that share L3 cache are considered to be on the same team
- // because they can more easily work together on the same data.
- // Large performance penalties will occur if 2 or more threads from different
- // teams attempt to frequently read and modify the same cache lines.
- //
- // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
- // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
- // CCXs are operating on the same data, many cycles will be spent keeping the
- // two caches coherent.
- if ( procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks )
- {
- procInfo->processorTeamMasks[ procInfo->numTeamMasks ] = info->ProcessorMask;
- procInfo->numTeamMasks++;
- }
- }
- break;
- case RelationProcessorPackage:
- procInfo->numPhysicalPackages++;
- break;
- }
- }
- free( buf );
- }
- ///btThreadSupportWin32 helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
- class btThreadSupportWin32 : public btThreadSupportInterface
- {
- public:
- struct btThreadStatus
- {
- int m_taskId;
- int m_commandId;
- int m_status;
- ThreadFunc m_userThreadFunc;
- void* m_userPtr; //for taskDesc etc
- void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
- void* m_eventStartHandle;
- char m_eventStartHandleName[ 32 ];
- void* m_eventCompleteHandle;
- char m_eventCompleteHandleName[ 32 ];
- };
- private:
- btAlignedObjectArray<btThreadStatus> m_activeThreadStatus;
- btAlignedObjectArray<void*> m_completeHandles;
- int m_numThreads;
- DWORD_PTR m_startedThreadMask;
- btProcessorInfo m_processorInfo;
- void startThreads( const ConstructionInfo& threadInfo );
- void stopThreads();
- int waitForResponse();
- public:
- btThreadSupportWin32( const ConstructionInfo& threadConstructionInfo );
- virtual ~btThreadSupportWin32();
- virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
- virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
- virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
- virtual void runTask( int threadIndex, void* userData ) BT_OVERRIDE;
- virtual void waitForAllTasks() BT_OVERRIDE;
- virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
- virtual void deleteCriticalSection( btCriticalSection* criticalSection ) BT_OVERRIDE;
- };
- btThreadSupportWin32::btThreadSupportWin32( const ConstructionInfo & threadConstructionInfo )
- {
- startThreads( threadConstructionInfo );
- }
- btThreadSupportWin32::~btThreadSupportWin32()
- {
- stopThreads();
- }
- DWORD WINAPI win32threadStartFunc( LPVOID lpParam )
- {
- btThreadSupportWin32::btThreadStatus* status = ( btThreadSupportWin32::btThreadStatus* )lpParam;
- while ( 1 )
- {
- WaitForSingleObject( status->m_eventStartHandle, INFINITE );
- void* userPtr = status->m_userPtr;
- if ( userPtr )
- {
- btAssert( status->m_status );
- status->m_userThreadFunc( userPtr );
- status->m_status = 2;
- SetEvent( status->m_eventCompleteHandle );
- }
- else
- {
- //exit Thread
- status->m_status = 3;
- printf( "Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle );
- SetEvent( status->m_eventCompleteHandle );
- break;
- }
- }
- printf( "Thread TERMINATED\n" );
- return 0;
- }
- void btThreadSupportWin32::runTask( int threadIndex, void* userData )
- {
- btThreadStatus& threadStatus = m_activeThreadStatus[ threadIndex ];
- btAssert( threadIndex >= 0 );
- btAssert( int( threadIndex ) < m_activeThreadStatus.size() );
- threadStatus.m_commandId = 1;
- threadStatus.m_status = 1;
- threadStatus.m_userPtr = userData;
- m_startedThreadMask |= DWORD_PTR( 1 ) << threadIndex;
- ///fire event to start new task
- SetEvent( threadStatus.m_eventStartHandle );
- }
- int btThreadSupportWin32::waitForResponse()
- {
- btAssert( m_activeThreadStatus.size() );
- int last = -1;
- DWORD res = WaitForMultipleObjects( m_completeHandles.size(), &m_completeHandles[ 0 ], FALSE, INFINITE );
- btAssert( res != WAIT_FAILED );
- last = res - WAIT_OBJECT_0;
- btThreadStatus& threadStatus = m_activeThreadStatus[ last ];
- btAssert( threadStatus.m_threadHandle );
- btAssert( threadStatus.m_eventCompleteHandle );
- //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
- btAssert( threadStatus.m_status > 1 );
- threadStatus.m_status = 0;
- ///need to find an active spu
- btAssert( last >= 0 );
- m_startedThreadMask &= ~( DWORD_PTR( 1 ) << last );
- return last;
- }
- void btThreadSupportWin32::waitForAllTasks()
- {
- while ( m_startedThreadMask )
- {
- waitForResponse();
- }
- }
- void btThreadSupportWin32::startThreads( const ConstructionInfo& threadConstructionInfo )
- {
- static int uniqueId = 0;
- uniqueId++;
- btProcessorInfo& procInfo = m_processorInfo;
- getProcessorInformation( &procInfo );
- DWORD_PTR dwProcessAffinityMask = 0;
- DWORD_PTR dwSystemAffinityMask = 0;
- if ( !GetProcessAffinityMask( GetCurrentProcess(), &dwProcessAffinityMask, &dwSystemAffinityMask ) )
- {
- dwProcessAffinityMask = 0;
- }
- ///The number of threads should be equal to the number of available cores - 1
- m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
- m_activeThreadStatus.resize( m_numThreads );
- m_completeHandles.resize( m_numThreads );
- m_startedThreadMask = 0;
- // set main thread affinity
- if ( DWORD_PTR mask = dwProcessAffinityMask & getProcessorTeamMask( procInfo, 0 ))
- {
- SetThreadAffinityMask( GetCurrentThread(), mask );
- SetThreadIdealProcessor( GetCurrentThread(), 0 );
- }
- for ( int i = 0; i < m_numThreads; i++ )
- {
- printf( "starting thread %d\n", i );
- btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
- LPSECURITY_ATTRIBUTES lpThreadAttributes = NULL;
- SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
- LPTHREAD_START_ROUTINE lpStartAddress = &win32threadStartFunc;
- LPVOID lpParameter = &threadStatus;
- DWORD dwCreationFlags = 0;
- LPDWORD lpThreadId = 0;
- threadStatus.m_userPtr = 0;
- sprintf( threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
- threadStatus.m_eventStartHandle = CreateEventA( 0, false, false, threadStatus.m_eventStartHandleName );
- sprintf( threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i );
- threadStatus.m_eventCompleteHandle = CreateEventA( 0, false, false, threadStatus.m_eventCompleteHandleName );
- m_completeHandles[ i ] = threadStatus.m_eventCompleteHandle;
- HANDLE handle = CreateThread( lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, lpThreadId );
- //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
- // highest priority -- can cause erratic performance when numThreads > numCores
- // we don't want worker threads to be higher priority than the main thread or the main thread could get
- // totally shut out and unable to tell the workers to stop
- //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
- {
- int processorId = i + 1; // leave processor 0 for main thread
- DWORD_PTR teamMask = getProcessorTeamMask( procInfo, processorId );
- if ( teamMask )
- {
- // bind each thread to only execute on processors of it's assigned team
- // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
- // - for multi-socket Intel this will keep threads from migrating from one socket to another
- // - for AMD Ryzen this will keep threads from migrating from one CCX to another
- DWORD_PTR mask = teamMask & dwProcessAffinityMask;
- if ( mask )
- {
- SetThreadAffinityMask( handle, mask );
- }
- }
- SetThreadIdealProcessor( handle, processorId );
- }
- threadStatus.m_taskId = i;
- threadStatus.m_commandId = 0;
- threadStatus.m_status = 0;
- threadStatus.m_threadHandle = handle;
- threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
- printf( "started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle );
- }
- }
- ///tell the task scheduler we are done with the SPU tasks
- void btThreadSupportWin32::stopThreads()
- {
- for ( int i = 0; i < m_activeThreadStatus.size(); i++ )
- {
- btThreadStatus& threadStatus = m_activeThreadStatus[ i ];
- if ( threadStatus.m_status > 0 )
- {
- WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
- }
- threadStatus.m_userPtr = NULL;
- SetEvent( threadStatus.m_eventStartHandle );
- WaitForSingleObject( threadStatus.m_eventCompleteHandle, INFINITE );
- CloseHandle( threadStatus.m_eventCompleteHandle );
- CloseHandle( threadStatus.m_eventStartHandle );
- CloseHandle( threadStatus.m_threadHandle );
- }
- m_activeThreadStatus.clear();
- m_completeHandles.clear();
- }
- class btWin32CriticalSection : public btCriticalSection
- {
- private:
- CRITICAL_SECTION mCriticalSection;
- public:
- btWin32CriticalSection()
- {
- InitializeCriticalSection( &mCriticalSection );
- }
- ~btWin32CriticalSection()
- {
- DeleteCriticalSection( &mCriticalSection );
- }
- void lock()
- {
- EnterCriticalSection( &mCriticalSection );
- }
- void unlock()
- {
- LeaveCriticalSection( &mCriticalSection );
- }
- };
- btCriticalSection* btThreadSupportWin32::createCriticalSection()
- {
- unsigned char* mem = (unsigned char*) btAlignedAlloc( sizeof( btWin32CriticalSection ), 16 );
- btWin32CriticalSection* cs = new( mem ) btWin32CriticalSection();
- return cs;
- }
- void btThreadSupportWin32::deleteCriticalSection( btCriticalSection* criticalSection )
- {
- criticalSection->~btCriticalSection();
- btAlignedFree( criticalSection );
- }
- btThreadSupportInterface* btThreadSupportInterface::create( const ConstructionInfo& info )
- {
- return new btThreadSupportWin32( info );
- }
- #endif //defined(_WIN32) && BT_THREADSAFE
|