platformCPUCount.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. // Original code is:
  2. // Copyright (c) 2005 Intel Corporation
  3. // All Rights Reserved
  4. //
  5. // CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform
  6. // The three forms of HW multithreading are: Multi-processor, Multi-core, and
  7. // HyperThreading Technology.
  8. // This application enumerates all the logical processors enabled by OS and BIOS,
  9. // determine the HW topology of these enabled logical processors in the system
  10. // using information provided by CPUID instruction.
  11. // A multi-processing system can support any combination of the three forms of HW
  12. // multi-threading support. The relevant topology can be identified using a
  13. // three level decomposition of the "initial APIC ID" into
  14. // Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of
  15. // the topology of hardware resources and
  16. // allow multi-threaded software to manage shared hardware resources in
  17. // the platform to reduce resource contention
  18. // Multicore detection algorithm for processor and cache topology requires
  19. // all leaf functions of CPUID instructions be available. System administrator
  20. // must ensure BIOS settings is not configured to restrict CPUID functionalities.
  21. //-------------------------------------------------------------------------------------------------
  22. #include "platform/platform.h"
  23. #include "platform/platformCPUCount.h"
  24. // Consoles don't need this
  25. #if defined(TORQUE_OS_XENON) || defined(TORQUE_OS_PS3)
  26. namespace CPUInfo
  27. {
  28. EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
  29. {
  30. TotAvailLogical = 6;
  31. TotAvailCore = 6;
  32. PhysicalNum = 3;
  33. return CONFIG_MultiCoreAndHTEnabled;
  34. }
  35. }; // namespace
  36. #else
  37. #ifdef TORQUE_OS_LINUX
  38. // The Linux source code listing can be compiled using Linux kernel verison 2.6
  39. // or higher (e.g. RH 4AS-2.8 using GCC 3.4.4).
  40. // Due to syntax variances of Linux affinity APIs with earlier kernel versions
  41. // and dependence on glibc library versions, compilation on Linux environment
  42. // with older kernels and compilers may require kernel patches or compiler upgrades.
  43. #include <stdlib.h>
  44. #include <unistd.h>
  45. #include <string.h>
  46. #include <sched.h>
  47. #define DWORD unsigned long
  48. #elif defined( TORQUE_OS_WIN32 )
  49. #include <windows.h>
  50. #elif defined( TORQUE_OS_MAC )
  51. # include <sys/types.h>
  52. # include <sys/sysctl.h>
  53. #else
  54. #error Not implemented on platform.
  55. #endif
  56. #include <stdio.h>
  57. #include <assert.h>
  58. namespace CPUInfo {
  59. #define HWD_MT_BIT 0x10000000 // EDX[28] Bit 28 is set if HT or multi-core is supported
  60. #define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
  61. // processors per physical processor when execute cpuid with
  62. // eax set to 1
  63. #define NUM_CORE_BITS 0xFC000000 // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one
  64. // per physical processor when execute cpuid with
  65. // eax set to 4.
  66. #define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
  67. // initial APIC ID for the processor this code is running on.
  68. #ifndef TORQUE_OS_MAC
  69. static unsigned int CpuIDSupported(void);
  70. static unsigned int find_maskwidth(unsigned int);
  71. static unsigned int HWD_MTSupported(void);
  72. static unsigned int MaxLogicalProcPerPhysicalProc(void);
  73. static unsigned int MaxCorePerPhysicalProc(void);
  74. static unsigned char GetAPIC_ID(void);
  75. static unsigned char GetNzbSubID(unsigned char, unsigned char, unsigned char);
  76. #endif
  77. static char g_s3Levels[2048];
  78. #ifndef TORQUE_OS_MAC
  79. //
  80. // CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return
  81. // the maximum supported standard function.
  82. //
  83. static unsigned int CpuIDSupported(void)
  84. {
  85. unsigned int MaxInputValue;
  86. // If CPUID instruction is supported
  87. #ifdef TORQUE_COMPILER_GCC
  88. try
  89. {
  90. MaxInputValue = 0;
  91. // call cpuid with eax = 0
  92. asm
  93. (
  94. "pushl %%ebx\n\t"
  95. "xorl %%eax,%%eax\n\t"
  96. "cpuid\n\t"
  97. "popl %%ebx\n\t"
  98. : "=a" (MaxInputValue)
  99. :
  100. : "%ecx", "%edx"
  101. );
  102. }
  103. catch (...)
  104. {
  105. return(0); // cpuid instruction is unavailable
  106. }
  107. #elif defined( TORQUE_COMPILER_VISUALC )
  108. try
  109. {
  110. MaxInputValue = 0;
  111. // call cpuid with eax = 0
  112. __asm
  113. {
  114. xor eax, eax
  115. cpuid
  116. mov MaxInputValue, eax
  117. }
  118. }
  119. catch (...)
  120. {
  121. return(0); // cpuid instruction is unavailable
  122. }
  123. #else
  124. # error Not implemented.
  125. #endif
  126. return MaxInputValue;
  127. }
  128. //
  129. // Function returns the maximum cores per physical package. Note that the number of
  130. // AVAILABLE cores per physical to be used by an application might be less than this
  131. // maximum value.
  132. //
  133. static unsigned int MaxCorePerPhysicalProc(void)
  134. {
  135. unsigned int Regeax = 0;
  136. if (!HWD_MTSupported()) return (unsigned int) 1; // Single core
  137. #ifdef TORQUE_COMPILER_GCC
  138. {
  139. asm
  140. (
  141. "pushl %ebx\n\t"
  142. "xorl %eax, %eax\n\t"
  143. "cpuid\n\t"
  144. "cmpl $4, %eax\n\t" // check if cpuid supports leaf 4
  145. "jl .single_core\n\t" // Single core
  146. "movl $4, %eax\n\t"
  147. "movl $0, %ecx\n\t" // start with index = 0; Leaf 4 reports
  148. "popl %ebx\n\t"
  149. ); // at least one valid cache level
  150. asm
  151. (
  152. "cpuid"
  153. : "=a" (Regeax)
  154. :
  155. : "%ecx", "%edx"
  156. );
  157. asm
  158. (
  159. "jmp .multi_core\n"
  160. ".single_core:\n\t"
  161. "xor %eax, %eax\n"
  162. ".multi_core:"
  163. );
  164. }
  165. #elif defined( TORQUE_COMPILER_VISUALC )
  166. __asm
  167. {
  168. xor eax, eax
  169. cpuid
  170. cmp eax, 4 // check if cpuid supports leaf 4
  171. jl single_core // Single core
  172. mov eax, 4
  173. mov ecx, 0 // start with index = 0; Leaf 4 reports
  174. cpuid // at least one valid cache level
  175. mov Regeax, eax
  176. jmp multi_core
  177. single_core:
  178. xor eax, eax
  179. multi_core:
  180. }
  181. #else
  182. # error Not implemented.
  183. #endif
  184. return (unsigned int)((Regeax & NUM_CORE_BITS) >> 26)+1;
  185. }
  186. //
  187. // The function returns 0 when the hardware multi-threaded bit is not set.
  188. //
  189. static unsigned int HWD_MTSupported(void)
  190. {
  191. unsigned int Regedx = 0;
  192. if ((CpuIDSupported() >= 1))
  193. {
  194. #ifdef TORQUE_COMPILER_GCC
  195. asm
  196. (
  197. "pushl %%ebx\n\t"
  198. "movl $1,%%eax\n\t"
  199. "cpuid\n\t"
  200. "popl %%ebx\n\t"
  201. : "=d" (Regedx)
  202. :
  203. : "%eax","%ecx"
  204. );
  205. #elif defined( TORQUE_COMPILER_VISUALC )
  206. __asm
  207. {
  208. mov eax, 1
  209. cpuid
  210. mov Regedx, edx
  211. }
  212. #else
  213. # error Not implemented.
  214. #endif
  215. }
  216. return (Regedx & HWD_MT_BIT);
  217. }
  218. //
  219. // Function returns the maximum logical processors per physical package. Note that the number of
  220. // AVAILABLE logical processors per physical to be used by an application might be less than this
  221. // maximum value.
  222. //
  223. static unsigned int MaxLogicalProcPerPhysicalProc(void)
  224. {
  225. unsigned int Regebx = 0;
  226. if (!HWD_MTSupported()) return (unsigned int) 1;
  227. #ifdef TORQUE_COMPILER_GCC
  228. asm
  229. (
  230. "movl $1,%%eax\n\t"
  231. "cpuid"
  232. : "=b" (Regebx)
  233. :
  234. : "%eax","%ecx","%edx"
  235. );
  236. #elif defined( TORQUE_COMPILER_VISUALC )
  237. __asm
  238. {
  239. mov eax, 1
  240. cpuid
  241. mov Regebx, ebx
  242. }
  243. #else
  244. # error Not implemented.
  245. #endif
  246. return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16);
  247. }
  248. static unsigned char GetAPIC_ID(void)
  249. {
  250. unsigned int Regebx = 0;
  251. #ifdef TORQUE_COMPILER_GCC
  252. asm
  253. (
  254. "movl $1, %%eax\n\t"
  255. "cpuid"
  256. : "=b" (Regebx)
  257. :
  258. : "%eax","%ecx","%edx"
  259. );
  260. #elif defined( TORQUE_COMPILER_VISUALC )
  261. __asm
  262. {
  263. mov eax, 1
  264. cpuid
  265. mov Regebx, ebx
  266. }
  267. #else
  268. # error Not implemented.
  269. #endif
  270. return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24);
  271. }
  272. //
  273. // Determine the width of the bit field that can represent the value count_item.
  274. //
  275. unsigned int find_maskwidth(unsigned int CountItem)
  276. {
  277. unsigned int MaskWidth,
  278. count = CountItem;
  279. #ifdef TORQUE_COMPILER_GCC
  280. asm
  281. (
  282. #ifdef __x86_64__ // define constant to compile
  283. "push %%rcx\n\t" // under 64-bit Linux
  284. "push %%rax\n\t"
  285. #else
  286. "pushl %%ecx\n\t"
  287. "pushl %%eax\n\t"
  288. #endif
  289. // "movl $count, %%eax\n\t" //done by Assembler below
  290. "xorl %%ecx, %%ecx"
  291. // "movl %%ecx, MaskWidth\n\t" //done by Assembler below
  292. : "=c" (MaskWidth)
  293. : "a" (count)
  294. // : "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler
  295. //to put them back when we are done
  296. );
  297. asm
  298. (
  299. "decl %%eax\n\t"
  300. "bsrw %%ax,%%cx\n\t"
  301. "jz next\n\t"
  302. "incw %%cx\n\t"
  303. // "movl %%ecx, MaskWidth\n" //done by Assembler below
  304. : "=c" (MaskWidth)
  305. :
  306. );
  307. asm
  308. (
  309. "next:\n\t"
  310. #ifdef __x86_64__
  311. "pop %rax\n\t"
  312. "pop %rcx"
  313. #else
  314. "popl %eax\n\t"
  315. "popl %ecx"
  316. #endif
  317. );
  318. #elif defined( TORQUE_COMPILER_VISUALC )
  319. __asm
  320. {
  321. mov eax, count
  322. mov ecx, 0
  323. mov MaskWidth, ecx
  324. dec eax
  325. bsr cx, ax
  326. jz next
  327. inc cx
  328. mov MaskWidth, ecx
  329. next:
  330. }
  331. #else
  332. # error Not implemented.
  333. #endif
  334. return MaskWidth;
  335. }
  336. //
  337. // Extract the subset of bit field from the 8-bit value FullID. It returns the 8-bit sub ID value
  338. //
  339. static unsigned char GetNzbSubID(unsigned char FullID,
  340. unsigned char MaxSubIDValue,
  341. unsigned char ShiftCount)
  342. {
  343. unsigned int MaskWidth;
  344. unsigned char MaskBits;
  345. MaskWidth = find_maskwidth((unsigned int) MaxSubIDValue);
  346. MaskBits = (0xff << ShiftCount) ^
  347. ((unsigned char) (0xff << (ShiftCount + MaskWidth)));
  348. return (FullID & MaskBits);
  349. }
  350. #endif
  351. //
  352. //
  353. //
  354. EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
  355. {
  356. EConfig StatusFlag = CONFIG_UserConfigIssue;
  357. g_s3Levels[0] = 0;
  358. TotAvailCore = 1;
  359. PhysicalNum = 1;
  360. unsigned int numLPEnabled = 0;
  361. int MaxLPPerCore = 1;
  362. #ifdef TORQUE_OS_MAC
  363. //FIXME: This isn't a proper port but more or less just some sneaky cheating
  364. // to get around having to mess with yet another crap UNIX-style API. Seems
  365. // like there isn't a way to do this that's working across all OSX incarnations
  366. // and machine configurations anyway.
  367. int numCPUs;
  368. int numPackages;
  369. // Get the number of CPUs.
  370. size_t len = sizeof( numCPUs );
  371. if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 )
  372. return CONFIG_UserConfigIssue;
  373. // Get the number of packages.
  374. len = sizeof( numPackages );
  375. if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 )
  376. return CONFIG_UserConfigIssue;
  377. TotAvailCore = numCPUs;
  378. TotAvailLogical = numCPUs;
  379. PhysicalNum = numPackages;
  380. #else
  381. U32 dwAffinityMask;
  382. int j = 0;
  383. unsigned char apicID, PackageIDMask;
  384. unsigned char tblPkgID[256], tblCoreID[256], tblSMTID[256];
  385. char tmp[256];
  386. #ifdef TORQUE_OS_LINUX
  387. //we need to make sure that this process is allowed to run on
  388. //all of the logical processors that the OS itself can run on.
  389. //A process could acquire/inherit affinity settings that restricts the
  390. // current process to run on a subset of all logical processor visible to OS.
  391. // Linux doesn't easily allow us to look at the Affinity Bitmask directly,
  392. // but it does provide an API to test affinity maskbits of the current process
  393. // against each logical processor visible under OS.
  394. int sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many
  395. //CPUs are currently enabled.
  396. //this will tell us which processors this process can run on.
  397. cpu_set_t allowedCPUs;
  398. sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs);
  399. for (int i = 0; i < sysNumProcs; i++ )
  400. {
  401. if ( CPU_ISSET(i, &allowedCPUs) == 0 )
  402. return CONFIG_UserConfigIssue;
  403. }
  404. #elif defined( TORQUE_OS_WIN32 )
  405. DWORD dwProcessAffinity, dwSystemAffinity;
  406. GetProcessAffinityMask(GetCurrentProcess(),
  407. &dwProcessAffinity,
  408. &dwSystemAffinity);
  409. if (dwProcessAffinity != dwSystemAffinity) // not all CPUs are enabled
  410. return CONFIG_UserConfigIssue;
  411. #else
  412. # error Not implemented.
  413. #endif
  414. // Assume that cores within a package have the SAME number of
  415. // logical processors. Also, values returned by
  416. // MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have
  417. // to be power of 2.
  418. MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc();
  419. dwAffinityMask = 1;
  420. #ifdef TORQUE_OS_LINUX
  421. cpu_set_t currentCPU;
  422. while ( j < sysNumProcs )
  423. {
  424. CPU_ZERO(&currentCPU);
  425. CPU_SET(j, &currentCPU);
  426. if ( sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == 0 )
  427. {
  428. sleep(0); // Ensure system to switch to the right CPU
  429. #elif defined( TORQUE_OS_WIN32 )
  430. while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity)
  431. {
  432. if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask))
  433. {
  434. Sleep(0); // Ensure system to switch to the right CPU
  435. #else
  436. # error Not implemented.
  437. #endif
  438. apicID = GetAPIC_ID();
  439. // Store SMT ID and core ID of each logical processor
  440. // Shift vlaue for SMT ID is 0
  441. // Shift value for core ID is the mask width for maximum logical
  442. // processors per core
  443. tblSMTID[j] = GetNzbSubID(apicID, MaxLPPerCore, 0);
  444. unsigned char maxCorePPP = MaxCorePerPhysicalProc();
  445. unsigned char maskWidth = find_maskwidth(MaxLPPerCore);
  446. tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth);
  447. // Extract package ID, assume single cluster.
  448. // Shift value is the mask width for max Logical per package
  449. PackageIDMask = (unsigned char) (0xff <<
  450. find_maskwidth(MaxLogicalProcPerPhysicalProc()));
  451. tblPkgID[j] = apicID & PackageIDMask;
  452. sprintf(tmp," AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d, SMT ID = %d\n",
  453. dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]);
  454. strcat(g_s3Levels, tmp);
  455. numLPEnabled ++; // Number of available logical processors in the system.
  456. } // if
  457. j++;
  458. dwAffinityMask = 1 << j;
  459. } // while
  460. // restore the affinity setting to its original state
  461. #ifdef TORQUE_OS_LINUX
  462. sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs);
  463. sleep(0);
  464. #elif defined( TORQUE_OS_WIN32 )
  465. SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity);
  466. Sleep(0);
  467. #else
  468. # error Not implemented.
  469. #endif
  470. TotAvailLogical = numLPEnabled;
  471. //
  472. // Count available cores (TotAvailCore) in the system
  473. //
  474. unsigned char CoreIDBucket[256];
  475. DWORD ProcessorMask, pCoreMask[256];
  476. unsigned int i, ProcessorNum;
  477. CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0];
  478. ProcessorMask = 1;
  479. pCoreMask[0] = ProcessorMask;
  480. for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
  481. {
  482. ProcessorMask <<= 1;
  483. for (i = 0; i < TotAvailCore; i++)
  484. {
  485. // Comparing bit-fields of logical processors residing in different packages
  486. // Assuming the bit-masks are the same on all processors in the system.
  487. if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i])
  488. {
  489. pCoreMask[i] |= ProcessorMask;
  490. break;
  491. }
  492. } // for i
  493. if (i == TotAvailCore) // did not match any bucket. Start a new one.
  494. {
  495. CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum];
  496. pCoreMask[i] = ProcessorMask;
  497. TotAvailCore++; // Number of available cores in the system
  498. }
  499. } // for ProcessorNum
  500. //
  501. // Count physical processor (PhysicalNum) in the system
  502. //
  503. unsigned char PackageIDBucket[256];
  504. DWORD pPackageMask[256];
  505. PackageIDBucket[0] = tblPkgID[0];
  506. ProcessorMask = 1;
  507. pPackageMask[0] = ProcessorMask;
  508. for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
  509. {
  510. ProcessorMask <<= 1;
  511. for (i = 0; i < PhysicalNum; i++)
  512. {
  513. // Comparing bit-fields of logical processors residing in different packages
  514. // Assuming the bit-masks are the same on all processors in the system.
  515. if (tblPkgID[ProcessorNum]== PackageIDBucket[i])
  516. {
  517. pPackageMask[i] |= ProcessorMask;
  518. break;
  519. }
  520. } // for i
  521. if (i == PhysicalNum) // did not match any bucket. Start a new one.
  522. {
  523. PackageIDBucket[i] = tblPkgID[ProcessorNum];
  524. pPackageMask[i] = ProcessorMask;
  525. PhysicalNum++; // Total number of physical processors in the system
  526. }
  527. } // for ProcessorNum
  528. #endif
  529. //
  530. // Check to see if the system is multi-core
  531. // Check if the system is hyper-threading
  532. //
  533. if (TotAvailCore > PhysicalNum)
  534. {
  535. // Multi-core
  536. if (MaxLPPerCore == 1)
  537. StatusFlag = CONFIG_MultiCoreAndHTNotCapable;
  538. else if (numLPEnabled > TotAvailCore)
  539. StatusFlag = CONFIG_MultiCoreAndHTEnabled;
  540. else StatusFlag = CONFIG_MultiCoreAndHTDisabled;
  541. }
  542. else
  543. {
  544. // Single-core
  545. if (MaxLPPerCore == 1)
  546. StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
  547. else if (numLPEnabled > TotAvailCore)
  548. StatusFlag = CONFIG_SingleCoreHTEnabled;
  549. else StatusFlag = CONFIG_SingleCoreHTDisabled;
  550. }
  551. return StatusFlag;
  552. }
  553. } // namespace CPUInfo
  554. #endif