platformCPUCount.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. // Original code is:
  2. // Copyright (c) 2005 Intel Corporation
  3. // All Rights Reserved
  4. //
  5. // CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform
  6. // The three forms of HW multithreading are: Multi-processor, Multi-core, and
  7. // HyperThreading Technology.
  8. // This application enumerates all the logical processors enabled by OS and BIOS,
  9. // determine the HW topology of these enabled logical processors in the system
  10. // using information provided by CPUID instruction.
  11. // A multi-processing system can support any combination of the three forms of HW
  12. // multi-threading support. The relevant topology can be identified using a
  13. // three level decomposition of the "initial APIC ID" into
  14. // Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of
  15. // the topology of hardware resources and
  16. // allow multi-threaded software to manage shared hardware resources in
  17. // the platform to reduce resource contention
  18. // Multicore detection algorithm for processor and cache topology requires
  19. // all leaf functions of CPUID instructions be available. System administrator
  20. // must ensure BIOS settings is not configured to restrict CPUID functionalities.
  21. //-------------------------------------------------------------------------------------------------
  22. #if defined(TORQUE_OS_LINUX) || defined(LINUX)
  23. // TODO GCC code don't compile on Release with optimizations, mover code to platform layer
  24. #else
  25. #include "platform/platform.h"
  26. #include "platform/platformCPUCount.h"
  27. #if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX) || defined(TORQUE_OS_XENON) || defined(TORQUE_OS_PS3)
  28. // Consoles don't need this
  29. #if defined(TORQUE_OS_XENON) || defined(TORQUE_OS_PS3)
  30. namespace CPUInfo
  31. {
  32. EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
  33. {
  34. TotAvailLogical = 6;
  35. TotAvailCore = 6;
  36. PhysicalNum = 3;
  37. return CONFIG_MultiCoreAndHTEnabled;
  38. }
  39. }; // namespace
  40. #else
  41. #ifdef TORQUE_OS_LINUX
  42. // The Linux source code listing can be compiled using Linux kernel verison 2.6
  43. // or higher (e.g. RH 4AS-2.8 using GCC 3.4.4).
  44. // Due to syntax variances of Linux affinity APIs with earlier kernel versions
  45. // and dependence on glibc library versions, compilation on Linux environment
  46. // with older kernels and compilers may require kernel patches or compiler upgrades.
  47. #include <stdlib.h>
  48. #include <unistd.h>
  49. #include <string.h>
  50. #include <sched.h>
  51. #define DWORD unsigned long
  52. #elif defined( TORQUE_OS_WIN )
  53. #include <windows.h>
  54. #elif defined( TORQUE_OS_MAC )
  55. # include <sys/types.h>
  56. # include <sys/sysctl.h>
  57. #else
  58. #error Not implemented on platform.
  59. #endif
  60. #include <stdio.h>
  61. #include <assert.h>
  62. namespace CPUInfo {
  63. #define HWD_MT_BIT 0x10000000 // EDX[28] Bit 28 is set if HT or multi-core is supported
  64. #define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
  65. // processors per physical processor when execute cpuid with
  66. // eax set to 1
  67. #define NUM_CORE_BITS 0xFC000000 // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one
  68. // per physical processor when execute cpuid with
  69. // eax set to 4.
  70. #define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
  71. // initial APIC ID for the processor this code is running on.
  72. #ifndef TORQUE_OS_MAC
  73. static U32 CpuIDSupported(void);
  74. static U32 find_maskwidth(unsigned int);
  75. static U32 HWD_MTSupported(void);
  76. static U32 MaxLogicalProcPerPhysicalProc(void);
  77. static U32 MaxCorePerPhysicalProc(void);
  78. static U8 GetAPIC_ID(void);
  79. static U8 GetNzbSubID(U8, U8, U8);
  80. #endif
  81. static char g_s3Levels[2048];
  82. #ifndef TORQUE_OS_MAC
  83. //
  84. // CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return
  85. // the maximum supported standard function.
  86. //
  87. static U32 CpuIDSupported(void)
  88. {
  89. U32 maxInputValue = 0;
  90. // If CPUID instruction is supported
  91. #ifdef TORQUE_COMPILER_GCC
  92. try
  93. {
  94. // call cpuid with eax = 0
  95. asm
  96. (
  97. "pushl %%ebx\n\t"
  98. "xorl %%eax,%%eax\n\t"
  99. "cpuid\n\t"
  100. "popl %%ebx\n\t"
  101. : "=a" (maxInputValue)
  102. :
  103. : "%ecx", "%edx"
  104. );
  105. }
  106. catch (...)
  107. {
  108. return(0); // cpuid instruction is unavailable
  109. }
  110. #elif defined( TORQUE_COMPILER_VISUALC )
  111. try
  112. {
  113. // call cpuid with eax = 0
  114. __asm
  115. {
  116. xor eax, eax
  117. cpuid
  118. mov maxInputValue, eax
  119. }
  120. }
  121. catch (...)
  122. {
  123. // cpuid instruction is unavailable
  124. }
  125. #else
  126. # error Not implemented.
  127. #endif
  128. return maxInputValue;
  129. }
  130. //
  131. // Function returns the maximum cores per physical package. Note that the number of
  132. // AVAILABLE cores per physical to be used by an application might be less than this
  133. // maximum value.
  134. //
  135. static U32 MaxCorePerPhysicalProc(void)
  136. {
  137. U32 Regeax = 0;
  138. if (!HWD_MTSupported()) return (U32) 1; // Single core
  139. #ifdef TORQUE_COMPILER_GCC
  140. {
  141. asm
  142. (
  143. "pushl %ebx\n\t"
  144. "xorl %eax, %eax\n\t"
  145. "cpuid\n\t"
  146. "cmpl $4, %eax\n\t" // check if cpuid supports leaf 4
  147. "jl .single_core\n\t" // Single core
  148. "movl $4, %eax\n\t"
  149. "movl $0, %ecx\n\t" // start with index = 0; Leaf 4 reports
  150. "popl %ebx\n\t"
  151. ); // at least one valid cache level
  152. asm
  153. (
  154. "cpuid"
  155. : "=a" (Regeax)
  156. :
  157. : "%ecx", "%edx"
  158. );
  159. asm
  160. (
  161. "jmp .multi_core\n"
  162. ".single_core:\n\t"
  163. "xor %eax, %eax\n"
  164. ".multi_core:"
  165. );
  166. }
  167. #elif defined( TORQUE_COMPILER_VISUALC )
  168. __asm
  169. {
  170. xor eax, eax
  171. cpuid
  172. cmp eax, 4 // check if cpuid supports leaf 4
  173. jl single_core // Single core
  174. mov eax, 4
  175. mov ecx, 0 // start with index = 0; Leaf 4 reports
  176. cpuid // at least one valid cache level
  177. mov Regeax, eax
  178. jmp multi_core
  179. single_core:
  180. xor eax, eax
  181. multi_core:
  182. }
  183. #else
  184. # error Not implemented.
  185. #endif
  186. return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1;
  187. }
  188. //
  189. // The function returns 0 when the hardware multi-threaded bit is not set.
  190. //
  191. static U32 HWD_MTSupported(void)
  192. {
  193. U32 Regedx = 0;
  194. if ((CpuIDSupported() >= 1))
  195. {
  196. #ifdef TORQUE_COMPILER_GCC
  197. asm
  198. (
  199. "pushl %%ebx\n\t"
  200. "movl $1,%%eax\n\t"
  201. "cpuid\n\t"
  202. "popl %%ebx\n\t"
  203. : "=d" (Regedx)
  204. :
  205. : "%eax","%ecx"
  206. );
  207. #elif defined( TORQUE_COMPILER_VISUALC )
  208. __asm
  209. {
  210. mov eax, 1
  211. cpuid
  212. mov Regedx, edx
  213. }
  214. #else
  215. # error Not implemented.
  216. #endif
  217. }
  218. return (Regedx & HWD_MT_BIT);
  219. }
  220. //
  221. // Function returns the maximum logical processors per physical package. Note that the number of
  222. // AVAILABLE logical processors per physical to be used by an application might be less than this
  223. // maximum value.
  224. //
  225. static U32 MaxLogicalProcPerPhysicalProc(void)
  226. {
  227. U32 Regebx = 0;
  228. if (!HWD_MTSupported()) return (U32) 1;
  229. #ifdef TORQUE_COMPILER_GCC
  230. asm
  231. (
  232. "movl $1,%%eax\n\t"
  233. "cpuid"
  234. : "=b" (Regebx)
  235. :
  236. : "%eax","%ecx","%edx"
  237. );
  238. #elif defined( TORQUE_COMPILER_VISUALC )
  239. __asm
  240. {
  241. mov eax, 1
  242. cpuid
  243. mov Regebx, ebx
  244. }
  245. #else
  246. # error Not implemented.
  247. #endif
  248. return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16);
  249. }
  250. static U8 GetAPIC_ID(void)
  251. {
  252. U32 Regebx = 0;
  253. #ifdef TORQUE_COMPILER_GCC
  254. asm
  255. (
  256. "movl $1, %%eax\n\t"
  257. "cpuid"
  258. : "=b" (Regebx)
  259. :
  260. : "%eax","%ecx","%edx"
  261. );
  262. #elif defined( TORQUE_COMPILER_VISUALC )
  263. __asm
  264. {
  265. mov eax, 1
  266. cpuid
  267. mov Regebx, ebx
  268. }
  269. #else
  270. # error Not implemented.
  271. #endif
  272. return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24);
  273. }
  274. //
  275. // Determine the width of the bit field that can represent the value count_item.
  276. //
  277. U32 find_maskwidth(U32 CountItem)
  278. {
  279. U32 MaskWidth,
  280. count = CountItem;
  281. #ifdef TORQUE_COMPILER_GCC
  282. asm
  283. (
  284. #ifdef __x86_64__ // define constant to compile
  285. "push %%rcx\n\t" // under 64-bit Linux
  286. "push %%rax\n\t"
  287. #else
  288. "pushl %%ecx\n\t"
  289. "pushl %%eax\n\t"
  290. #endif
  291. // "movl $count, %%eax\n\t" //done by Assembler below
  292. "xorl %%ecx, %%ecx"
  293. // "movl %%ecx, MaskWidth\n\t" //done by Assembler below
  294. : "=c" (MaskWidth)
  295. : "a" (count)
  296. // : "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler
  297. //to put them back when we are done
  298. );
  299. asm
  300. (
  301. "decl %%eax\n\t"
  302. "bsrw %%ax,%%cx\n\t"
  303. "jz next\n\t"
  304. "incw %%cx\n\t"
  305. // "movl %%ecx, MaskWidth\n" //done by Assembler below
  306. : "=c" (MaskWidth)
  307. :
  308. );
  309. asm
  310. (
  311. "next:\n\t"
  312. #ifdef __x86_64__
  313. "pop %rax\n\t"
  314. "pop %rcx"
  315. #else
  316. "popl %eax\n\t"
  317. "popl %ecx"
  318. #endif
  319. );
  320. #elif defined( TORQUE_COMPILER_VISUALC )
  321. __asm
  322. {
  323. mov eax, count
  324. mov ecx, 0
  325. mov MaskWidth, ecx
  326. dec eax
  327. bsr cx, ax
  328. jz next
  329. inc cx
  330. mov MaskWidth, ecx
  331. next:
  332. }
  333. #else
  334. # error Not implemented.
  335. #endif
  336. return MaskWidth;
  337. }
  338. //
  339. // Extract the subset of bit field from the 8-bit value FullID. It returns the 8-bit sub ID value
  340. //
  341. static U8 GetNzbSubID(U8 FullID,
  342. U8 MaxSubIDValue,
  343. U8 ShiftCount)
  344. {
  345. U32 MaskWidth;
  346. U8 MaskBits;
  347. MaskWidth = find_maskwidth((U32) MaxSubIDValue);
  348. MaskBits = (0xff << ShiftCount) ^
  349. ((U8) (0xff << (ShiftCount + MaskWidth)));
  350. return (FullID & MaskBits);
  351. }
  352. #endif
  353. //
  354. //
  355. //
  356. EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
  357. {
  358. EConfig StatusFlag = CONFIG_UserConfigIssue;
  359. g_s3Levels[0] = 0;
  360. TotAvailCore = 1;
  361. PhysicalNum = 1;
  362. U32 numLPEnabled = 0;
  363. S32 MaxLPPerCore = 1;
  364. #ifdef TORQUE_OS_MAC
  365. //FIXME: This isn't a proper port but more or less just some sneaky cheating
  366. // to get around having to mess with yet another crap UNIX-style API. Seems
  367. // like there isn't a way to do this that's working across all OSX incarnations
  368. // and machine configurations anyway.
  369. S32 numCPUs;
  370. S32 numPackages;
  371. // Get the number of CPUs.
  372. size_t len = sizeof( numCPUs );
  373. if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 )
  374. return CONFIG_UserConfigIssue;
  375. // Get the number of packages.
  376. len = sizeof( numPackages );
  377. if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 )
  378. return CONFIG_UserConfigIssue;
  379. TotAvailCore = numCPUs;
  380. TotAvailLogical = numCPUs;
  381. PhysicalNum = numPackages;
  382. #else
  383. U32 dwAffinityMask;
  384. S32 j = 0;
  385. U8 apicID, PackageIDMask;
  386. U8 tblPkgID[256], tblCoreID[256], tblSMTID[256];
  387. char tmp[256];
  388. #ifdef TORQUE_OS_LINUX
  389. //we need to make sure that this process is allowed to run on
  390. //all of the logical processors that the OS itself can run on.
  391. //A process could acquire/inherit affinity settings that restricts the
  392. // current process to run on a subset of all logical processor visible to OS.
  393. // Linux doesn't easily allow us to look at the Affinity Bitmask directly,
  394. // but it does provide an API to test affinity maskbits of the current process
  395. // against each logical processor visible under OS.
  396. S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many
  397. //CPUs are currently enabled.
  398. //this will tell us which processors this process can run on.
  399. cpu_set_t allowedCPUs;
  400. sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs);
  401. for (S32 i = 0; i < sysNumProcs; i++ )
  402. {
  403. if ( CPU_ISSET(i, &allowedCPUs) == 0 )
  404. return CONFIG_UserConfigIssue;
  405. }
  406. #elif defined( TORQUE_OS_WIN )
  407. DWORD dwProcessAffinity, dwSystemAffinity;
  408. GetProcessAffinityMask(GetCurrentProcess(),
  409. &dwProcessAffinity,
  410. &dwSystemAffinity);
  411. if (dwProcessAffinity != dwSystemAffinity) // not all CPUs are enabled
  412. return CONFIG_UserConfigIssue;
  413. #else
  414. # error Not implemented.
  415. #endif
  416. // Assume that cores within a package have the SAME number of
  417. // logical processors. Also, values returned by
  418. // MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have
  419. // to be power of 2.
  420. MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc();
  421. dwAffinityMask = 1;
  422. #ifdef TORQUE_OS_LINUX
  423. cpu_set_t currentCPU;
  424. while ( j < sysNumProcs )
  425. {
  426. CPU_ZERO(&currentCPU);
  427. CPU_SET(j, &currentCPU);
  428. if ( sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == 0 )
  429. {
  430. sleep(0); // Ensure system to switch to the right CPU
  431. #elif defined( TORQUE_OS_WIN )
  432. while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity)
  433. {
  434. if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask))
  435. {
  436. Sleep(0); // Ensure system to switch to the right CPU
  437. #else
  438. # error Not implemented.
  439. #endif
  440. apicID = GetAPIC_ID();
  441. // Store SMT ID and core ID of each logical processor
  442. // Shift vlaue for SMT ID is 0
  443. // Shift value for core ID is the mask width for maximum logical
  444. // processors per core
  445. tblSMTID[j] = GetNzbSubID(apicID, MaxLPPerCore, 0);
  446. U8 maxCorePPP = MaxCorePerPhysicalProc();
  447. U8 maskWidth = find_maskwidth(MaxLPPerCore);
  448. tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth);
  449. // Extract package ID, assume single cluster.
  450. // Shift value is the mask width for max Logical per package
  451. PackageIDMask = (unsigned char) (0xff <<
  452. find_maskwidth(MaxLogicalProcPerPhysicalProc()));
  453. tblPkgID[j] = apicID & PackageIDMask;
  454. sprintf(tmp," AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d, SMT ID = %d\n",
  455. dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]);
  456. strcat(g_s3Levels, tmp);
  457. numLPEnabled ++; // Number of available logical processors in the system.
  458. } // if
  459. j++;
  460. dwAffinityMask = 1 << j;
  461. } // while
  462. // restore the affinity setting to its original state
  463. #ifdef TORQUE_OS_LINUX
  464. sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs);
  465. sleep(0);
  466. #elif defined( TORQUE_OS_WIN )
  467. SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity);
  468. Sleep(0);
  469. #else
  470. # error Not implemented.
  471. #endif
  472. TotAvailLogical = numLPEnabled;
  473. //
  474. // Count available cores (TotAvailCore) in the system
  475. //
  476. U8 CoreIDBucket[256];
  477. DWORD ProcessorMask, pCoreMask[256];
  478. U32 i, ProcessorNum;
  479. CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0];
  480. ProcessorMask = 1;
  481. pCoreMask[0] = ProcessorMask;
  482. for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
  483. {
  484. ProcessorMask <<= 1;
  485. for (i = 0; i < TotAvailCore; i++)
  486. {
  487. // Comparing bit-fields of logical processors residing in different packages
  488. // Assuming the bit-masks are the same on all processors in the system.
  489. if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i])
  490. {
  491. pCoreMask[i] |= ProcessorMask;
  492. break;
  493. }
  494. } // for i
  495. if (i == TotAvailCore) // did not match any bucket. Start a new one.
  496. {
  497. CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum];
  498. pCoreMask[i] = ProcessorMask;
  499. TotAvailCore++; // Number of available cores in the system
  500. }
  501. } // for ProcessorNum
  502. //
  503. // Count physical processor (PhysicalNum) in the system
  504. //
  505. U8 PackageIDBucket[256];
  506. DWORD pPackageMask[256];
  507. PackageIDBucket[0] = tblPkgID[0];
  508. ProcessorMask = 1;
  509. pPackageMask[0] = ProcessorMask;
  510. for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
  511. {
  512. ProcessorMask <<= 1;
  513. for (i = 0; i < PhysicalNum; i++)
  514. {
  515. // Comparing bit-fields of logical processors residing in different packages
  516. // Assuming the bit-masks are the same on all processors in the system.
  517. if (tblPkgID[ProcessorNum]== PackageIDBucket[i])
  518. {
  519. pPackageMask[i] |= ProcessorMask;
  520. break;
  521. }
  522. } // for i
  523. if (i == PhysicalNum) // did not match any bucket. Start a new one.
  524. {
  525. PackageIDBucket[i] = tblPkgID[ProcessorNum];
  526. pPackageMask[i] = ProcessorMask;
  527. PhysicalNum++; // Total number of physical processors in the system
  528. }
  529. } // for ProcessorNum
  530. #endif
  531. //
  532. // Check to see if the system is multi-core
  533. // Check if the system is hyper-threading
  534. //
  535. if (TotAvailCore > PhysicalNum)
  536. {
  537. // Multi-core
  538. if (MaxLPPerCore == 1)
  539. StatusFlag = CONFIG_MultiCoreAndHTNotCapable;
  540. else if (numLPEnabled > TotAvailCore)
  541. StatusFlag = CONFIG_MultiCoreAndHTEnabled;
  542. else StatusFlag = CONFIG_MultiCoreAndHTDisabled;
  543. }
  544. else
  545. {
  546. // Single-core
  547. if (MaxLPPerCore == 1)
  548. StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
  549. else if (numLPEnabled > TotAvailCore)
  550. StatusFlag = CONFIG_SingleCoreHTEnabled;
  551. else StatusFlag = CONFIG_SingleCoreHTDisabled;
  552. }
  553. return StatusFlag;
  554. }
  555. } // namespace CPUInfo
  556. #endif
  557. #endif
  558. #endif