platformCPUCount.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. // Original code is:
  2. // Copyright (c) 2005 Intel Corporation
  3. // All Rights Reserved
  4. //
  5. // CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform
  6. // The three forms of HW multithreading are: Multi-processor, Multi-core, and
  7. // HyperThreading Technology.
  8. // This application enumerates all the logical processors enabled by OS and BIOS,
  9. // determine the HW topology of these enabled logical processors in the system
  10. // using information provided by CPUID instruction.
  11. // A multi-processing system can support any combination of the three forms of HW
  12. // multi-threading support. The relevant topology can be identified using a
  13. // three level decomposition of the "initial APIC ID" into
  14. // Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of
  15. // the topology of hardware resources and
  16. // allow multi-threaded software to manage shared hardware resources in
  17. // the platform to reduce resource contention
  18. // Multicore detection algorithm for processor and cache topology requires
  19. // all leaf functions of CPUID instructions be available. System administrator
  20. // must ensure BIOS settings is not configured to restrict CPUID functionalities.
  21. //-------------------------------------------------------------------------------------------------
  22. #include "platform/platform.h"
  23. #include "platform/platformCPUCount.h"
  24. #if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX) || defined(TORQUE_OS_XENON) || defined(TORQUE_OS_PS3)
  25. // Consoles don't need this
  26. #if defined(TORQUE_OS_XENON) || defined(TORQUE_OS_PS3)
  27. namespace CPUInfo
  28. {
  29. EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
  30. {
  31. TotAvailLogical = 6;
  32. TotAvailCore = 6;
  33. PhysicalNum = 3;
  34. return CONFIG_MultiCoreAndHTEnabled;
  35. }
  36. }; // namespace
  37. #else
  38. #ifdef TORQUE_OS_LINUX
  39. // The Linux source code listing can be compiled using Linux kernel verison 2.6
  40. // or higher (e.g. RH 4AS-2.8 using GCC 3.4.4).
  41. // Due to syntax variances of Linux affinity APIs with earlier kernel versions
  42. // and dependence on glibc library versions, compilation on Linux environment
  43. // with older kernels and compilers may require kernel patches or compiler upgrades.
  44. #include <stdlib.h>
  45. #include <unistd.h>
  46. #include <string.h>
  47. #include <sched.h>
  48. #define DWORD unsigned long
  49. #elif defined( TORQUE_OS_WIN )
  50. #include <windows.h>
  51. #elif defined( TORQUE_OS_MAC )
  52. # include <sys/types.h>
  53. # include <sys/sysctl.h>
  54. #else
  55. #error Not implemented on platform.
  56. #endif
  57. #include <stdio.h>
  58. #include <assert.h>
  59. namespace CPUInfo {
  60. #define HWD_MT_BIT 0x10000000 // EDX[28] Bit 28 is set if HT or multi-core is supported
  61. #define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
  62. // processors per physical processor when execute cpuid with
  63. // eax set to 1
  64. #define NUM_CORE_BITS 0xFC000000 // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one
  65. // per physical processor when execute cpuid with
  66. // eax set to 4.
  67. #define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
  68. // initial APIC ID for the processor this code is running on.
  69. #ifndef TORQUE_OS_MAC
  70. static U32 CpuIDSupported(void);
  71. static U32 find_maskwidth(unsigned int);
  72. static U32 HWD_MTSupported(void);
  73. static U32 MaxLogicalProcPerPhysicalProc(void);
  74. static U32 MaxCorePerPhysicalProc(void);
  75. static U8 GetAPIC_ID(void);
  76. static U8 GetNzbSubID(U8, U8, U8);
  77. #endif
  78. static char g_s3Levels[2048];
  79. #ifndef TORQUE_OS_MAC
  80. //
  81. // CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return
  82. // the maximum supported standard function.
  83. //
  84. static U32 CpuIDSupported(void)
  85. {
  86. U32 maxInputValue = 0;
  87. // If CPUID instruction is supported
  88. #ifdef TORQUE_COMPILER_GCC
  89. try
  90. {
  91. // call cpuid with eax = 0
  92. asm
  93. (
  94. "pushl %%ebx\n\t"
  95. "xorl %%eax,%%eax\n\t"
  96. "cpuid\n\t"
  97. "popl %%ebx\n\t"
  98. : "=a" (maxInputValue)
  99. :
  100. : "%ecx", "%edx"
  101. );
  102. }
  103. catch (...)
  104. {
  105. return(0); // cpuid instruction is unavailable
  106. }
  107. #elif defined( TORQUE_COMPILER_VISUALC )
  108. try
  109. {
  110. // call cpuid with eax = 0
  111. __asm
  112. {
  113. xor eax, eax
  114. cpuid
  115. mov maxInputValue, eax
  116. }
  117. }
  118. catch (...)
  119. {
  120. // cpuid instruction is unavailable
  121. }
  122. #else
  123. # error Not implemented.
  124. #endif
  125. return maxInputValue;
  126. }
  127. //
  128. // Function returns the maximum cores per physical package. Note that the number of
  129. // AVAILABLE cores per physical to be used by an application might be less than this
  130. // maximum value.
  131. //
  132. static U32 MaxCorePerPhysicalProc(void)
  133. {
  134. U32 Regeax = 0;
  135. if (!HWD_MTSupported()) return (U32) 1; // Single core
  136. #ifdef TORQUE_COMPILER_GCC
  137. {
  138. asm
  139. (
  140. "pushl %ebx\n\t"
  141. "xorl %eax, %eax\n\t"
  142. "cpuid\n\t"
  143. "cmpl $4, %eax\n\t" // check if cpuid supports leaf 4
  144. "jl .single_core\n\t" // Single core
  145. "movl $4, %eax\n\t"
  146. "movl $0, %ecx\n\t" // start with index = 0; Leaf 4 reports
  147. "popl %ebx\n\t"
  148. ); // at least one valid cache level
  149. asm
  150. (
  151. "cpuid"
  152. : "=a" (Regeax)
  153. :
  154. : "%ecx", "%edx"
  155. );
  156. asm
  157. (
  158. "jmp .multi_core\n"
  159. ".single_core:\n\t"
  160. "xor %eax, %eax\n"
  161. ".multi_core:"
  162. );
  163. }
  164. #elif defined( TORQUE_COMPILER_VISUALC )
  165. __asm
  166. {
  167. xor eax, eax
  168. cpuid
  169. cmp eax, 4 // check if cpuid supports leaf 4
  170. jl single_core // Single core
  171. mov eax, 4
  172. mov ecx, 0 // start with index = 0; Leaf 4 reports
  173. cpuid // at least one valid cache level
  174. mov Regeax, eax
  175. jmp multi_core
  176. single_core:
  177. xor eax, eax
  178. multi_core:
  179. }
  180. #else
  181. # error Not implemented.
  182. #endif
  183. return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1;
  184. }
  185. //
  186. // The function returns 0 when the hardware multi-threaded bit is not set.
  187. //
  188. static U32 HWD_MTSupported(void)
  189. {
  190. U32 Regedx = 0;
  191. if ((CpuIDSupported() >= 1))
  192. {
  193. #ifdef TORQUE_COMPILER_GCC
  194. asm
  195. (
  196. "pushl %%ebx\n\t"
  197. "movl $1,%%eax\n\t"
  198. "cpuid\n\t"
  199. "popl %%ebx\n\t"
  200. : "=d" (Regedx)
  201. :
  202. : "%eax","%ecx"
  203. );
  204. #elif defined( TORQUE_COMPILER_VISUALC )
  205. __asm
  206. {
  207. mov eax, 1
  208. cpuid
  209. mov Regedx, edx
  210. }
  211. #else
  212. # error Not implemented.
  213. #endif
  214. }
  215. return (Regedx & HWD_MT_BIT);
  216. }
  217. //
  218. // Function returns the maximum logical processors per physical package. Note that the number of
  219. // AVAILABLE logical processors per physical to be used by an application might be less than this
  220. // maximum value.
  221. //
  222. static U32 MaxLogicalProcPerPhysicalProc(void)
  223. {
  224. U32 Regebx = 0;
  225. if (!HWD_MTSupported()) return (U32) 1;
  226. #ifdef TORQUE_COMPILER_GCC
  227. asm
  228. (
  229. "movl $1,%%eax\n\t"
  230. "cpuid"
  231. : "=b" (Regebx)
  232. :
  233. : "%eax","%ecx","%edx"
  234. );
  235. #elif defined( TORQUE_COMPILER_VISUALC )
  236. __asm
  237. {
  238. mov eax, 1
  239. cpuid
  240. mov Regebx, ebx
  241. }
  242. #else
  243. # error Not implemented.
  244. #endif
  245. return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16);
  246. }
  247. static U8 GetAPIC_ID(void)
  248. {
  249. U32 Regebx = 0;
  250. #ifdef TORQUE_COMPILER_GCC
  251. asm
  252. (
  253. "movl $1, %%eax\n\t"
  254. "cpuid"
  255. : "=b" (Regebx)
  256. :
  257. : "%eax","%ecx","%edx"
  258. );
  259. #elif defined( TORQUE_COMPILER_VISUALC )
  260. __asm
  261. {
  262. mov eax, 1
  263. cpuid
  264. mov Regebx, ebx
  265. }
  266. #else
  267. # error Not implemented.
  268. #endif
  269. return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24);
  270. }
  271. //
  272. // Determine the width of the bit field that can represent the value count_item.
  273. //
  274. U32 find_maskwidth(U32 CountItem)
  275. {
  276. U32 MaskWidth,
  277. count = CountItem;
  278. #ifdef TORQUE_COMPILER_GCC
  279. asm
  280. (
  281. #ifdef __x86_64__ // define constant to compile
  282. "push %%rcx\n\t" // under 64-bit Linux
  283. "push %%rax\n\t"
  284. #else
  285. "pushl %%ecx\n\t"
  286. "pushl %%eax\n\t"
  287. #endif
  288. // "movl $count, %%eax\n\t" //done by Assembler below
  289. "xorl %%ecx, %%ecx"
  290. // "movl %%ecx, MaskWidth\n\t" //done by Assembler below
  291. : "=c" (MaskWidth)
  292. : "a" (count)
  293. // : "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler
  294. //to put them back when we are done
  295. );
  296. asm
  297. (
  298. "decl %%eax\n\t"
  299. "bsrw %%ax,%%cx\n\t"
  300. "jz next\n\t"
  301. "incw %%cx\n\t"
  302. // "movl %%ecx, MaskWidth\n" //done by Assembler below
  303. : "=c" (MaskWidth)
  304. :
  305. );
  306. asm
  307. (
  308. "next:\n\t"
  309. #ifdef __x86_64__
  310. "pop %rax\n\t"
  311. "pop %rcx"
  312. #else
  313. "popl %eax\n\t"
  314. "popl %ecx"
  315. #endif
  316. );
  317. #elif defined( TORQUE_COMPILER_VISUALC )
  318. __asm
  319. {
  320. mov eax, count
  321. mov ecx, 0
  322. mov MaskWidth, ecx
  323. dec eax
  324. bsr cx, ax
  325. jz next
  326. inc cx
  327. mov MaskWidth, ecx
  328. next:
  329. }
  330. #else
  331. # error Not implemented.
  332. #endif
  333. return MaskWidth;
  334. }
  335. //
  336. // Extract the subset of bit field from the 8-bit value FullID. It returns the 8-bit sub ID value
  337. //
  338. static U8 GetNzbSubID(U8 FullID,
  339. U8 MaxSubIDValue,
  340. U8 ShiftCount)
  341. {
  342. U32 MaskWidth;
  343. U8 MaskBits;
  344. MaskWidth = find_maskwidth((U32) MaxSubIDValue);
  345. MaskBits = (0xff << ShiftCount) ^
  346. ((U8) (0xff << (ShiftCount + MaskWidth)));
  347. return (FullID & MaskBits);
  348. }
  349. #endif
  350. //
  351. //
  352. //
  353. EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
  354. {
  355. EConfig StatusFlag = CONFIG_UserConfigIssue;
  356. g_s3Levels[0] = 0;
  357. TotAvailCore = 1;
  358. PhysicalNum = 1;
  359. U32 numLPEnabled = 0;
  360. S32 MaxLPPerCore = 1;
  361. #ifdef TORQUE_OS_MAC
  362. //FIXME: This isn't a proper port but more or less just some sneaky cheating
  363. // to get around having to mess with yet another crap UNIX-style API. Seems
  364. // like there isn't a way to do this that's working across all OSX incarnations
  365. // and machine configurations anyway.
  366. S32 numCPUs;
  367. S32 numPackages;
  368. // Get the number of CPUs.
  369. size_t len = sizeof( numCPUs );
  370. if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 )
  371. return CONFIG_UserConfigIssue;
  372. // Get the number of packages.
  373. len = sizeof( numPackages );
  374. if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 )
  375. return CONFIG_UserConfigIssue;
  376. TotAvailCore = numCPUs;
  377. TotAvailLogical = numCPUs;
  378. PhysicalNum = numPackages;
  379. #else
  380. U32 dwAffinityMask;
  381. S32 j = 0;
  382. U8 apicID, PackageIDMask;
  383. U8 tblPkgID[256], tblCoreID[256], tblSMTID[256];
  384. char tmp[256];
  385. #ifdef TORQUE_OS_LINUX
  386. //we need to make sure that this process is allowed to run on
  387. //all of the logical processors that the OS itself can run on.
  388. //A process could acquire/inherit affinity settings that restricts the
  389. // current process to run on a subset of all logical processor visible to OS.
  390. // Linux doesn't easily allow us to look at the Affinity Bitmask directly,
  391. // but it does provide an API to test affinity maskbits of the current process
  392. // against each logical processor visible under OS.
  393. S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many
  394. //CPUs are currently enabled.
  395. //this will tell us which processors this process can run on.
  396. cpu_set_t allowedCPUs;
  397. sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs);
  398. for (S32 i = 0; i < sysNumProcs; i++ )
  399. {
  400. if ( CPU_ISSET(i, &allowedCPUs) == 0 )
  401. return CONFIG_UserConfigIssue;
  402. }
  403. #elif defined( TORQUE_OS_WIN )
  404. DWORD dwProcessAffinity, dwSystemAffinity;
  405. GetProcessAffinityMask(GetCurrentProcess(),
  406. &dwProcessAffinity,
  407. &dwSystemAffinity);
  408. if (dwProcessAffinity != dwSystemAffinity) // not all CPUs are enabled
  409. return CONFIG_UserConfigIssue;
  410. #else
  411. # error Not implemented.
  412. #endif
  413. // Assume that cores within a package have the SAME number of
  414. // logical processors. Also, values returned by
  415. // MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have
  416. // to be power of 2.
  417. MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc();
  418. dwAffinityMask = 1;
  419. #ifdef TORQUE_OS_LINUX
  420. cpu_set_t currentCPU;
  421. while ( j < sysNumProcs )
  422. {
  423. CPU_ZERO(&currentCPU);
  424. CPU_SET(j, &currentCPU);
  425. if ( sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == 0 )
  426. {
  427. sleep(0); // Ensure system to switch to the right CPU
  428. #elif defined( TORQUE_OS_WIN )
  429. while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity)
  430. {
  431. if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask))
  432. {
  433. Sleep(0); // Ensure system to switch to the right CPU
  434. #else
  435. # error Not implemented.
  436. #endif
  437. apicID = GetAPIC_ID();
  438. // Store SMT ID and core ID of each logical processor
  439. // Shift vlaue for SMT ID is 0
  440. // Shift value for core ID is the mask width for maximum logical
  441. // processors per core
  442. tblSMTID[j] = GetNzbSubID(apicID, MaxLPPerCore, 0);
  443. U8 maxCorePPP = MaxCorePerPhysicalProc();
  444. U8 maskWidth = find_maskwidth(MaxLPPerCore);
  445. tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth);
  446. // Extract package ID, assume single cluster.
  447. // Shift value is the mask width for max Logical per package
  448. PackageIDMask = (unsigned char) (0xff <<
  449. find_maskwidth(MaxLogicalProcPerPhysicalProc()));
  450. tblPkgID[j] = apicID & PackageIDMask;
  451. sprintf(tmp," AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d, SMT ID = %d\n",
  452. dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]);
  453. strcat(g_s3Levels, tmp);
  454. numLPEnabled ++; // Number of available logical processors in the system.
  455. } // if
  456. j++;
  457. dwAffinityMask = 1 << j;
  458. } // while
  459. // restore the affinity setting to its original state
  460. #ifdef TORQUE_OS_LINUX
  461. sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs);
  462. sleep(0);
  463. #elif defined( TORQUE_OS_WIN )
  464. SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity);
  465. Sleep(0);
  466. #else
  467. # error Not implemented.
  468. #endif
  469. TotAvailLogical = numLPEnabled;
  470. //
  471. // Count available cores (TotAvailCore) in the system
  472. //
  473. U8 CoreIDBucket[256];
  474. DWORD ProcessorMask, pCoreMask[256];
  475. U32 i, ProcessorNum;
  476. CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0];
  477. ProcessorMask = 1;
  478. pCoreMask[0] = ProcessorMask;
  479. for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
  480. {
  481. ProcessorMask <<= 1;
  482. for (i = 0; i < TotAvailCore; i++)
  483. {
  484. // Comparing bit-fields of logical processors residing in different packages
  485. // Assuming the bit-masks are the same on all processors in the system.
  486. if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i])
  487. {
  488. pCoreMask[i] |= ProcessorMask;
  489. break;
  490. }
  491. } // for i
  492. if (i == TotAvailCore) // did not match any bucket. Start a new one.
  493. {
  494. CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum];
  495. pCoreMask[i] = ProcessorMask;
  496. TotAvailCore++; // Number of available cores in the system
  497. }
  498. } // for ProcessorNum
  499. //
  500. // Count physical processor (PhysicalNum) in the system
  501. //
  502. U8 PackageIDBucket[256];
  503. DWORD pPackageMask[256];
  504. PackageIDBucket[0] = tblPkgID[0];
  505. ProcessorMask = 1;
  506. pPackageMask[0] = ProcessorMask;
  507. for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
  508. {
  509. ProcessorMask <<= 1;
  510. for (i = 0; i < PhysicalNum; i++)
  511. {
  512. // Comparing bit-fields of logical processors residing in different packages
  513. // Assuming the bit-masks are the same on all processors in the system.
  514. if (tblPkgID[ProcessorNum]== PackageIDBucket[i])
  515. {
  516. pPackageMask[i] |= ProcessorMask;
  517. break;
  518. }
  519. } // for i
  520. if (i == PhysicalNum) // did not match any bucket. Start a new one.
  521. {
  522. PackageIDBucket[i] = tblPkgID[ProcessorNum];
  523. pPackageMask[i] = ProcessorMask;
  524. PhysicalNum++; // Total number of physical processors in the system
  525. }
  526. } // for ProcessorNum
  527. #endif
  528. //
  529. // Check to see if the system is multi-core
  530. // Check if the system is hyper-threading
  531. //
  532. if (TotAvailCore > PhysicalNum)
  533. {
  534. // Multi-core
  535. if (MaxLPPerCore == 1)
  536. StatusFlag = CONFIG_MultiCoreAndHTNotCapable;
  537. else if (numLPEnabled > TotAvailCore)
  538. StatusFlag = CONFIG_MultiCoreAndHTEnabled;
  539. else StatusFlag = CONFIG_MultiCoreAndHTDisabled;
  540. }
  541. else
  542. {
  543. // Single-core
  544. if (MaxLPPerCore == 1)
  545. StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
  546. else if (numLPEnabled > TotAvailCore)
  547. StatusFlag = CONFIG_SingleCoreHTEnabled;
  548. else StatusFlag = CONFIG_SingleCoreHTDisabled;
  549. }
  550. return StatusFlag;
  551. }
  552. } // namespace CPUInfo
  553. #endif
  554. #endif