Browse Source

macos platform fixes for intel macs

Jeff Hutchinson 3 years ago
parent
commit
7cb306b65a

+ 3 - 3
Engine/source/platform/platform.h

@@ -73,9 +73,10 @@ enum ProcessorProperties
    CPU_PROP_RDTSC     = (1<<5),  ///< Supports Read Time Stamp Counter op.
    CPU_PROP_SSE2      = (1<<6),  ///< Supports SSE2 instruction set extension.
    CPU_PROP_SSE3      = (1<<7),  ///< Supports SSE3 instruction set extension.  
-   CPU_PROP_SSE3xt    = (1<<8),  ///< Supports extended SSE3 instruction set  
+   CPU_PROP_SSE3ex    = (1<<8),  ///< Supports extended SSE3 instruction set  
    CPU_PROP_SSE4_1    = (1<<9),  ///< Supports SSE4_1 instruction set extension.  
-   CPU_PROP_SSE4_2    = (1<<10), ///< Supports SSE4_2 instruction set extension.  
+   CPU_PROP_SSE4_2    = (1<<10), ///< Supports SSE4_2 instruction set extension.
+   CPU_PROP_AVX       = (1<<11), ///< Supports AVX256 instruction set extension.
    CPU_PROP_MP        = (1<<11), ///< This is a multi-processor system.
    CPU_PROP_LE        = (1<<12), ///< This processor is LITTLE ENDIAN.  
    CPU_PROP_64bit     = (1<<13), ///< This processor is 64-bit capable
@@ -297,7 +298,6 @@ namespace Platform
             bool           isHyperThreaded;
             U32            numLogicalProcessors;
             U32            numPhysicalProcessors;
-            U32            numAvailableCores;
             U32            properties;      // CPU type specific enum
          } processor;
    };

+ 7 - 46
Engine/source/platform/platformCPU.cpp

@@ -28,48 +28,8 @@
 
 Signal<void(void)> Platform::SystemInfoReady;
 
-enum CPUFlags
+void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand)
 {
-   // EDX Register flags
-   BIT_RDTSC   = BIT(4),
-   BIT_MMX     = BIT(23),
-   BIT_SSE     = BIT(25),
-   BIT_SSE2    = BIT(26),
-   BIT_3DNOW   = BIT(31), // only available for amd cpus in x86
-
-   // These use a different value for comparison than the above flags (ECX Register)
-   BIT_SSE3    = BIT(0),
-   BIT_SSE3xt  = BIT(9),
-   BIT_SSE4_1  = BIT(19),
-   BIT_SSE4_2  = BIT(20),
-};
-
-// fill the specified structure with information obtained from asm code
-void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
-   char* vendor, char* brand, U32 processor, U32 properties, U32 properties2)
-{
-   // always assume FPU is available in 2021...
-   pInfo.properties |= CPU_PROP_FPU;
-
-#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64)
-   pInfo.properties |= CPU_PROP_LE;
-#endif
-
-#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64)
-   pInfo.properties |= CPU_PROP_64bit;
-#endif
-
-#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
-   pInfo.properties |= (properties & BIT_RDTSC)   ? CPU_PROP_RDTSC : 0;
-   pInfo.properties |= (properties & BIT_MMX)     ? CPU_PROP_MMX : 0;
-   pInfo.properties |= (properties & BIT_SSE)     ? CPU_PROP_SSE : 0;
-   pInfo.properties |= (properties & BIT_SSE2)    ? CPU_PROP_SSE2 : 0;
-   pInfo.properties |= (properties2 & BIT_SSE3)   ? CPU_PROP_SSE3 : 0;
-   pInfo.properties |= (properties2 & BIT_SSE3xt) ? CPU_PROP_SSE3xt : 0;
-   pInfo.properties |= (properties2 & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0;
-   pInfo.properties |= (properties2 & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0;
-#endif
-
    if (dStricmp(vendor, "GenuineIntel") == 0)
    {
       pInfo.type = CPU_Intel;
@@ -80,9 +40,6 @@ void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
    {
       pInfo.name = StringTable->insert(brand ? brand : "AMD (unknown)");
       pInfo.type = CPU_AMD;
-
-      // 3dnow! is only available in AMD cpus on x86. Otherwise its not reliably set.
-      pInfo.properties |= (properties & BIT_3DNOW) ? CPU_PROP_3DNOW : 0;
    }
    else if (dStricmp(vendor, "Apple") == 0)
    {
@@ -92,18 +49,22 @@ void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
    else
    {
 #if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
+      
       pInfo.name = StringTable->insert(brand ? brand : "x86 Compatible (unknown)");
       pInfo.type = CPU_X86Compatible;
+      
 #elif defined(TORQUE_CPU_ARM64)
       pInfo.name = StringTable->insert(brand ? brand : "Arm Compatible (unknown)");
       pInfo.type = CPU_ArmCompatible;
+      
 #else
 #error "Unknown CPU Architecture"
+      
 #endif
    }
-
+   
    // Get multithreading caps.
-   CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numAvailableCores, pInfo.numPhysicalProcessors );
+   CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numPhysicalProcessors );
    pInfo.isHyperThreaded = CPUInfo::isHyperThreaded( config );
    pInfo.isMultiCore = CPUInfo::isMultiCore( config );
 

+ 0 - 657
Engine/source/platform/platformCPUCount.cpp

@@ -1,657 +0,0 @@
-// Original code is:
-// Copyright (c) 2005 Intel Corporation 
-// All Rights Reserved
-//
-// CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform
-//					The three forms of HW multithreading are: Multi-processor, Multi-core, and 
-//					HyperThreading Technology.
-//					This application enumerates all the logical processors enabled by OS and BIOS,
-//					determine the HW topology of these enabled logical processors in the system 
-//					using information provided by CPUID instruction.
-//					A multi-processing system can support any combination of the three forms of HW
-//					multi-threading support. The relevant topology can be identified using a 
-//					three level decomposition of the "initial APIC ID" into 
-//					Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of 
-//					the topology of hardware resources and
-//					allow multi-threaded software to manage shared hardware resources in 
-//					the platform to reduce resource contention
-
-//					Multicore detection algorithm for processor and cache topology requires
-//					all leaf functions of CPUID instructions be available. System administrator
-//					must ensure BIOS settings is not configured to restrict CPUID functionalities.
-//-------------------------------------------------------------------------------------------------
-
-#if defined(TORQUE_OS_LINUX) || defined(LINUX)
-
-// TODO GCC code don't compile on Release with optimizations, mover code to platform layer
-
-#else
-
-#include "platform/platform.h"
-#include "platform/platformCPUCount.h"
-
-#if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX)
-
-#ifdef TORQUE_OS_LINUX
-// 	The Linux source code listing can be compiled using Linux kernel verison 2.6 
-//	or higher (e.g. RH 4AS-2.8 using GCC 3.4.4). 
-//	Due to syntax variances of Linux affinity APIs with earlier kernel versions 
-//	and dependence on glibc library versions, compilation on Linux environment 
-//	with older kernels and compilers may require kernel patches or compiler upgrades.
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <sched.h>
-#define DWORD unsigned long
-#elif defined( TORQUE_OS_WIN )
-#include <windows.h>
-#elif defined( TORQUE_OS_MAC )
-#  include <sys/types.h>
-#  include <sys/sysctl.h>
-#else
-#error Not implemented on platform.
-#endif
-#include <stdio.h>
-#include <assert.h>
-
-namespace CPUInfo {
-
-#define HWD_MT_BIT         0x10000000     // EDX[28]  Bit 28 is set if HT or multi-core is supported
-#define NUM_LOGICAL_BITS   0x00FF0000     // EBX[23:16] Bit 16-23 in ebx contains the number of logical
-      // processors per physical processor when execute cpuid with 
-      // eax set to 1
-#define NUM_CORE_BITS      0xFC000000     // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one
-      // per physical processor when execute cpuid with 
-      // eax set to 4. 
-
-
-#define INITIAL_APIC_ID_BITS  0xFF000000  // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique 
-      // initial APIC ID for the processor this code is running on.
-
-
-      #ifndef TORQUE_OS_MAC
-      static U32  CpuIDSupported(void);      
-      static U32  find_maskwidth(unsigned int);
-      static U32  HWD_MTSupported(void);
-      static U32  MaxLogicalProcPerPhysicalProc(void);
-      static U32  MaxCorePerPhysicalProc(void);
-      static U8 GetAPIC_ID(void);
-      static U8 GetNzbSubID(U8, U8, U8);
-      #endif
-
-      static char g_s3Levels[2048];
-
-#ifndef TORQUE_OS_MAC
-
-      //
-      // CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return 
-      // the maximum supported standard function.
-      //
-      static U32 CpuIDSupported(void)
-      {
-         U32 maxInputValue = 0;
-         // If CPUID instruction is supported
-#ifdef TORQUE_COMPILER_GCC
-         try    
-         {		
-            // call cpuid with eax = 0
-            asm
-               (
-               "pushl %%ebx\n\t"
-               "xorl %%eax,%%eax\n\t"
-               "cpuid\n\t"
-               "popl %%ebx\n\t"
-               : "=a" (maxInputValue)
-               : 
-               : "%ecx", "%edx"
-               );		
-         }
-         catch (...)
-         {
-            return(0);                   // cpuid instruction is unavailable
-         }
-#elif defined( TORQUE_COMPILER_VISUALC )
-         try
-         {
-            // call cpuid with eax = 0
-            __asm
-            {
-               xor eax, eax
-                  cpuid
-                  mov maxInputValue, eax
-            }
-         }
-         catch (...)
-         {
-            // cpuid instruction is unavailable
-         }
-#else
-#  error Not implemented.
-#endif
-
-         return maxInputValue;
-      }
-
-
-
-      //
-      // Function returns the maximum cores per physical package. Note that the number of 
-      // AVAILABLE cores per physical to be used by an application might be less than this
-      // maximum value.
-      //
-
-      static U32 MaxCorePerPhysicalProc(void)
-      {
-
-         U32 Regeax        = 0;
-
-         if (!HWD_MTSupported()) return (U32) 1;  // Single core
-#ifdef TORQUE_COMPILER_GCC
-         {
-            asm
-               (
-               "pushl %ebx\n\t"
-               "xorl %eax, %eax\n\t"
-               "cpuid\n\t"
-               "cmpl $4, %eax\n\t"			// check if cpuid supports leaf 4
-               "jl .single_core\n\t"		// Single core
-               "movl $4, %eax\n\t"		
-               "movl $0, %ecx\n\t"			// start with index = 0; Leaf 4 reports
-               "popl %ebx\n\t"
-               );								// at least one valid cache level
-            asm
-               (
-               "cpuid"
-               : "=a" (Regeax)
-               :
-               : "%ecx", "%edx"
-               );		
-            asm
-               (
-               "jmp .multi_core\n"
-               ".single_core:\n\t"
-               "xor %eax, %eax\n"
-               ".multi_core:"
-               );		
-         }
-#elif defined( TORQUE_COMPILER_VISUALC )
-         __asm
-         {
-            xor eax, eax
-               cpuid
-               cmp eax, 4			// check if cpuid supports leaf 4
-               jl single_core		// Single core
-               mov eax, 4			
-               mov ecx, 0			// start with index = 0; Leaf 4 reports
-               cpuid				// at least one valid cache level
-               mov Regeax, eax
-               jmp multi_core
-
-single_core:
-            xor eax, eax		
-
-multi_core:
-
-         }
-#else
-#  error Not implemented.
-#endif
-         return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1;
-
-      }
-
-
-
-      //
-      // The function returns 0 when the hardware multi-threaded bit is not set.
-      //
-      static U32 HWD_MTSupported(void)
-      {
-
-
-         U32 Regedx      = 0;
-
-
-         if ((CpuIDSupported() >= 1))
-         {
-#ifdef TORQUE_COMPILER_GCC
-            asm 
-               (
-               "pushl %%ebx\n\t"
-               "movl $1,%%eax\n\t"
-               "cpuid\n\t"
-               "popl %%ebx\n\t"
-               : "=d" (Regedx)
-               :
-               : "%eax","%ecx"
-               );
-#elif defined( TORQUE_COMPILER_VISUALC )
-            __asm
-            {
-               mov eax, 1
-                  cpuid
-                  mov Regedx, edx
-            }		
-#else
-#  error Not implemented.
-#endif
-         }
-
-         return (Regedx & HWD_MT_BIT);  
-
-
-      }
-
-
-
-      //
-      // Function returns the maximum logical processors per physical package. Note that the number of 
-      // AVAILABLE logical processors per physical to be used by an application might be less than this
-      // maximum value.
-      //
-      static U32 MaxLogicalProcPerPhysicalProc(void)
-      {
-
-         U32 Regebx = 0;
-
-         if (!HWD_MTSupported()) return (U32) 1;
-#ifdef TORQUE_COMPILER_GCC
-         asm 
-            (
-            "movl $1,%%eax\n\t"
-            "cpuid"
-            : "=b" (Regebx)
-            :
-            : "%eax","%ecx","%edx"
-            );
-#elif defined( TORQUE_COMPILER_VISUALC )
-         __asm
-         {
-            mov eax, 1
-               cpuid
-               mov Regebx, ebx
-         }
-#else
-#  error Not implemented.
-#endif
-         return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16);
-
-      }
-
-
-      static U8 GetAPIC_ID(void)
-      {
-
-         U32 Regebx = 0;
-#ifdef TORQUE_COMPILER_GCC
-         asm
-            (
-            "movl $1, %%eax\n\t"	
-            "cpuid"
-            : "=b" (Regebx) 
-            :
-            : "%eax","%ecx","%edx" 
-            );
-
-#elif defined( TORQUE_COMPILER_VISUALC )
-         __asm
-         {
-            mov eax, 1
-               cpuid
-               mov Regebx, ebx
-         }
-#else
-#  error Not implemented.
-#endif                                
-
-         return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24);
-
-      }
-
-      //
-      // Determine the width of the bit field that can represent the value count_item. 
-      //
-      U32 find_maskwidth(U32 CountItem)
-      {
-         U32 MaskWidth,
-            count = CountItem;
-#ifdef TORQUE_COMPILER_GCC
-         asm
-            (
-#ifdef __x86_64__		// define constant to compile  
-            "push %%rcx\n\t"		// under 64-bit Linux
-            "push %%rax\n\t"
-#else
-            "pushl %%ecx\n\t"
-            "pushl %%eax\n\t"
-#endif
-            //		"movl $count, %%eax\n\t" //done by Assembler below
-            "xorl %%ecx, %%ecx"
-            //		"movl %%ecx, MaskWidth\n\t" //done by Assembler below
-            : "=c" (MaskWidth)
-            : "a" (count)
-            //		: "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler
-            //to put them back when we are done
-            );
-         asm
-            (
-            "decl %%eax\n\t"
-            "bsrw %%ax,%%cx\n\t"
-            "jz next\n\t"
-            "incw %%cx\n\t"
-            //		"movl %%ecx, MaskWidth\n" //done by Assembler below
-            : "=c" (MaskWidth)
-            :
-         );
-         asm
-            (
-            "next:\n\t"
-#ifdef __x86_64__
-            "pop %rax\n\t"
-            "pop %rcx"		
-#else
-            "popl %eax\n\t"
-            "popl %ecx"		
-#endif
-            );
-
-#elif defined( TORQUE_COMPILER_VISUALC )
-         __asm
-         {
-            mov eax, count
-               mov ecx, 0
-               mov MaskWidth, ecx
-               dec eax
-               bsr cx, ax
-               jz next
-               inc cx
-               mov MaskWidth, ecx
-next:
-
-         }
-#else
-#  error Not implemented.
-#endif
-         return MaskWidth;
-      }
-
-
-      //
-      // Extract the subset of bit field from the 8-bit value FullID.  It returns the 8-bit sub ID value
-      //
-      static U8 GetNzbSubID(U8 FullID,
-         U8 MaxSubIDValue,
-         U8 ShiftCount)
-      {
-         U32 MaskWidth;
-         U8 MaskBits;
-
-         MaskWidth = find_maskwidth((U32) MaxSubIDValue);
-         MaskBits  = (0xff << ShiftCount) ^ 
-            ((U8) (0xff << (ShiftCount + MaskWidth)));
-
-         return (FullID & MaskBits);
-      }
-
-#endif
-
-
-      //
-      //
-      //
-      EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
-      {
-         EConfig StatusFlag = CONFIG_UserConfigIssue;
-
-         g_s3Levels[0] = 0;
-         TotAvailCore = 1;
-         PhysicalNum  = 1;
-         
-         U32 numLPEnabled = 0;
-         S32 MaxLPPerCore = 1;
-
-#ifdef TORQUE_OS_MAC
-
-         //FIXME: This isn't a proper port but more or less just some sneaky cheating
-         //  to get around having to mess with yet another crap UNIX-style API.  Seems
-         //  like there isn't a way to do this that's working across all OSX incarnations
-         //  and machine configurations anyway.
-
-         S32 numCPUs;
-         S32 numPackages;
-
-         // Get the number of CPUs.
-
-         size_t len = sizeof( numCPUs );
-         if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 )
-            return CONFIG_UserConfigIssue;
-
-         // Get the number of packages.
-         len = sizeof( numPackages );
-         if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 )
-            return CONFIG_UserConfigIssue;
-
-         TotAvailCore = numCPUs;
-         TotAvailLogical = numCPUs;
-         PhysicalNum = numPackages;
-#else
-
-         U32 dwAffinityMask;
-         S32 j = 0;
-         U8 apicID, PackageIDMask;
-         U8 tblPkgID[256], tblCoreID[256], tblSMTID[256];
-         char	tmp[256];
-
-#ifdef TORQUE_OS_LINUX
-         //we need to make sure that this process is allowed to run on 
-         //all of the logical processors that the OS itself can run on.
-         //A process could acquire/inherit affinity settings that restricts the 
-         // current process to run on a subset of all logical processor visible to OS.
-
-         // Linux doesn't easily allow us to look at the Affinity Bitmask directly,
-         // but it does provide an API to test affinity maskbits of the current process 
-         // against each logical processor visible under OS.
-         S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many 
-         //CPUs are currently enabled.
-
-         //this will tell us which processors this process can run on. 
-         cpu_set_t allowedCPUs;	 
-         sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs);
-
-         for (S32 i = 0; i < sysNumProcs; i++ )
-         {
-            if ( CPU_ISSET(i, &allowedCPUs) == 0 )
-               return CONFIG_UserConfigIssue;
-         }
-#elif defined( TORQUE_OS_WIN )
-         DWORD dwProcessAffinity, dwSystemAffinity;
-         GetProcessAffinityMask(GetCurrentProcess(), 
-            &dwProcessAffinity,
-            &dwSystemAffinity);
-         if (dwProcessAffinity != dwSystemAffinity)  // not all CPUs are enabled
-            return CONFIG_UserConfigIssue;
-#else
-#  error Not implemented.
-#endif
-
-         // Assume that cores within a package have the SAME number of 
-         // logical processors.  Also, values returned by
-         // MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have
-         // to be power of 2.
-
-         MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc();
-         dwAffinityMask = 1;
-
-#ifdef TORQUE_OS_LINUX
-         cpu_set_t currentCPU;
-         while ( j < sysNumProcs )
-         {
-            CPU_ZERO(&currentCPU);
-            CPU_SET(j, &currentCPU);
-            if ( sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == 0 )
-            {
-               sleep(0);  // Ensure system to switch to the right CPU
-#elif defined( TORQUE_OS_WIN )
-         while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity)
-         {
-            if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask))
-            {
-               Sleep(0);  // Ensure system to switch to the right CPU
-#else
-#  error Not implemented.
-#endif
-               apicID = GetAPIC_ID();
-
-
-               // Store SMT ID and core ID of each logical processor
-               // Shift vlaue for SMT ID is 0
-               // Shift value for core ID is the mask width for maximum logical
-               // processors per core
-
-               tblSMTID[j]  = GetNzbSubID(apicID, MaxLPPerCore, 0);
-               U8 maxCorePPP = MaxCorePerPhysicalProc();
-               U8 maskWidth = find_maskwidth(MaxLPPerCore);
-               tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth);
-
-               // Extract package ID, assume single cluster.
-               // Shift value is the mask width for max Logical per package
-
-               PackageIDMask = (unsigned char) (0xff << 
-                  find_maskwidth(MaxLogicalProcPerPhysicalProc()));
-
-               tblPkgID[j] = apicID & PackageIDMask;
-               sprintf(tmp,"  AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d,  SMT ID = %d\n",
-                  dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]);
-               dStrcat(g_s3Levels, tmp, 2048);
-
-               numLPEnabled ++;   // Number of available logical processors in the system.
-
-            } // if
-
-            j++;  
-            dwAffinityMask = 1 << j;
-         } // while
-
-         // restore the affinity setting to its original state
-#ifdef TORQUE_OS_LINUX
-         sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs);
-         sleep(0);
-#elif defined( TORQUE_OS_WIN )
-         SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity);
-         Sleep(0);
-#else
-#  error Not implemented.
-#endif
-         TotAvailLogical = numLPEnabled;
-
-         //
-         // Count available cores (TotAvailCore) in the system
-         //
-         U8 CoreIDBucket[256];
-         DWORD ProcessorMask, pCoreMask[256];
-         U32 i, ProcessorNum;
-
-         CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0];
-         ProcessorMask = 1;
-         pCoreMask[0] = ProcessorMask;
-
-         for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
-         {
-            ProcessorMask <<= 1;
-            for (i = 0; i < TotAvailCore; i++)
-            {
-               // Comparing bit-fields of logical processors residing in different packages
-               // Assuming the bit-masks are the same on all processors in the system.
-               if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i])
-               {
-                  pCoreMask[i] |= ProcessorMask;
-                  break;
-               }
-
-            }  // for i
-
-            if (i == TotAvailCore)   // did not match any bucket.  Start a new one.
-            {
-               CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum];
-               pCoreMask[i] = ProcessorMask;
-
-               TotAvailCore++;	// Number of available cores in the system
-
-            }
-
-         }  // for ProcessorNum
-
-
-         //
-         // Count physical processor (PhysicalNum) in the system
-         //
-         U8 PackageIDBucket[256];
-         DWORD pPackageMask[256];
-
-         PackageIDBucket[0] = tblPkgID[0];
-         ProcessorMask = 1;
-         pPackageMask[0] = ProcessorMask;
-
-         for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
-         {
-            ProcessorMask <<= 1;
-            for (i = 0; i < PhysicalNum; i++)
-            {
-               // Comparing bit-fields of logical processors residing in different packages
-               // Assuming the bit-masks are the same on all processors in the system.
-               if (tblPkgID[ProcessorNum]== PackageIDBucket[i])
-               {
-                  pPackageMask[i] |= ProcessorMask;
-                  break;
-               }
-
-            }  // for i
-
-            if (i == PhysicalNum)   // did not match any bucket.  Start a new one.
-            {
-               PackageIDBucket[i] = tblPkgID[ProcessorNum];
-               pPackageMask[i] = ProcessorMask;
-
-               PhysicalNum++;	// Total number of physical processors in the system
-
-            }
-
-         }  // for ProcessorNum
-#endif
-
-         //
-         // Check to see if the system is multi-core 
-         // Check if the system is hyper-threading
-         //
-         if (TotAvailCore > PhysicalNum) 
-         {
-            // Multi-core
-            if (MaxLPPerCore == 1)
-               StatusFlag = CONFIG_MultiCoreAndHTNotCapable;
-            else if (numLPEnabled > TotAvailCore)
-               StatusFlag = CONFIG_MultiCoreAndHTEnabled;
-            else StatusFlag = CONFIG_MultiCoreAndHTDisabled;
-
-         }
-         else
-         {
-            // Single-core
-            if (MaxLPPerCore == 1)
-               StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
-            else if (numLPEnabled > TotAvailCore)
-               StatusFlag = CONFIG_SingleCoreHTEnabled;
-            else StatusFlag = CONFIG_SingleCoreHTDisabled;
-
-
-         }
-
-
-
-         return StatusFlag;
-      }
-
-} // namespace CPUInfo
-#endif
-
-#endif

+ 3 - 8
Engine/source/platform/platformCPUCount.h

@@ -29,13 +29,10 @@ namespace CPUInfo
 {
    enum EConfig
    {
-      CONFIG_UserConfigIssue,
       CONFIG_SingleCoreHTEnabled,
-      CONFIG_SingleCoreHTDisabled,
       CONFIG_SingleCoreAndHTNotCapable,
       CONFIG_MultiCoreAndHTNotCapable,
       CONFIG_MultiCoreAndHTEnabled,
-      CONFIG_MultiCoreAndHTDisabled,
    };
 
    inline bool isMultiCore( EConfig config )
@@ -44,7 +41,6 @@ namespace CPUInfo
       {
       case CONFIG_MultiCoreAndHTNotCapable:
       case CONFIG_MultiCoreAndHTEnabled:
-      case CONFIG_MultiCoreAndHTDisabled:
          return true;
 
       default:
@@ -65,11 +61,10 @@ namespace CPUInfo
       }
    }
 
-   EConfig CPUCount( U32& totalAvailableLogical,
-      U32& totalAvailableCores,
-      U32& numPhysical );
-
+   EConfig CPUCount( U32& totalAvailableLogical, U32& totalAvailableCores );
 } // namespace CPUInfo
 
+void SetProcessoInfo(Platform::SystemInfo_struct::Processor& pInfo, char* vendor, char* brand);
+
 #endif // _TORQUE_PLATFORM_PLATFORMCOUNT_H_
 

+ 1 - 2
Engine/source/platform/threads/threadPool.cpp

@@ -322,10 +322,9 @@ ThreadPool::ThreadPool( const char* name, U32 numThreads )
       // Platform::SystemInfo will not yet have been initialized.
       
       U32 numLogical = 0;
-      U32 numPhysical = 0;
       U32 numCores = 0;
 
-      CPUInfo::CPUCount( numLogical, numCores, numPhysical );
+      CPUInfo::CPUCount( numLogical, numCores );
       
       const U32 baseCount = getMax( numLogical, numCores );
       mNumThreads = (baseCount > 0) ? baseCount : 2;

+ 118 - 169
Engine/source/platformMac/macCPU.mm

@@ -35,15 +35,6 @@
 // we now have to use NSProcessInfo
 #import <Foundation/Foundation.h>
 
-//recently removed in Xcode 8 - most likely don't need these anymore
-#ifndef CPUFAMILY_INTEL_YONAH
-#define CPUFAMILY_INTEL_YONAH		0x73d67300
-#endif
-
-#ifndef CPUFAMILY_INTEL_MEROM
-#define CPUFAMILY_INTEL_MEROM		0x426f69ef
-#endif
-
 // Original code by Sean O'Brien (http://www.garagegames.com/community/forums/viewthread/81815).
 
 
@@ -89,8 +80,58 @@ int _getSysCTLvalue(const char key[], T * dest) {
 
 Platform::SystemInfo_struct Platform::SystemInfo;
 
-#define BASE_MHZ_SPEED      0
-//TODO update cpu list
+#define BASE_MHZ_SPEED 1000
+
+static void detectCpuFeatures(U32 &procflags)
+{
+   // Now we can directly query the system about a litany of "Optional" processor capabilities
+   // and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request
+   // a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists
+   // >>>> BUT <<<<<
+   // it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's
+   // actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined.
+   
+   int err;
+   U32 lraw;
+   
+   // List of chip-specific features
+   err = _getSysCTLvalue<U32>("hw.optional.mmx", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_MMX;
+   err = _getSysCTLvalue<U32>("hw.optional.sse", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE;
+   err = _getSysCTLvalue<U32>("hw.optional.sse2", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE2;
+   err = _getSysCTLvalue<U32>("hw.optional.sse3", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE3;
+   err = _getSysCTLvalue<U32>("hw.optional.supplementalsse3", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE3ex;
+   err = _getSysCTLvalue<U32>("hw.optional.sse4_1", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE4_1;
+   err = _getSysCTLvalue<U32>("hw.optional.sse4_2", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE4_2;
+   err = _getSysCTLvalue<U32>("hw.optional.avx1_0", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_AVX;
+
+   err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
+   if ((err==0)&&(lraw>1))
+      procflags |= CPU_PROP_MP;
+   err = _getSysCTLvalue<U32>("hw.cpu64bit_capable", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_64bit;
+   err = _getSysCTLvalue<U32>("hw.byteorder", &lraw);
+   if ((err==0)&&(lraw==1234))
+      procflags |= CPU_PROP_LE;
+   
+}
+
 void Processor::init()
 {
 	U32 procflags;
@@ -98,178 +139,64 @@ void Processor::init()
 	char buf[255];
 	U32 lraw;
 	U64 llraw;
-	
-	Con::printf( "System & Processor Information:" );
 
-   // Gestalt has been deprecated since Mac OSX Mountain Lion and has stopped working on
-   // Mac OSX Yosemite. we have to use NSProcessInfo now.
    // Availability: Mac OS 10.2 or greater.
    NSString *osVersionStr = [[NSProcessInfo processInfo] operatingSystemVersionString];
-   Con::printf( "   OSX Version: %s", [osVersionStr UTF8String]);
-	
-	err = _getSysCTLstring("kern.ostype", buf, sizeof(buf));	
-	if (err)
-		Con::printf( "   Unable to determine OS type\n" );
-	else
-		Con::printf( "   Mac OS Kernel name: %s", buf);
-	
-	err = _getSysCTLstring("kern.osrelease", buf, sizeof(buf));	
-	if (err)
-		Con::printf( "   Unable to determine OS release number\n" );
-	else
-		Con::printf( "   Mac OS Kernel version: %s", buf );
-	
+
+   S32 ramMB;
 	err = _getSysCTLvalue<U64>("hw.memsize", &llraw);
 	if (err)
-		Con::printf( "   Unable to determine amount of physical RAM\n" );
-	else
-		Con::printf( "   Physical memory installed: %d MB", (llraw >> 20));
-	
-	err = _getSysCTLvalue<U32>("hw.usermem", &lraw);
-	if (err)
-		Con::printf( "   Unable to determine available user address space\n");
+      ramMB = 512;
 	else
-		Con::printf( "   Addressable user memory: %d MB", (lraw >> 20));
-	
-	////////////////////////////////
-	// Values for the Family Type, CPU Type and CPU Subtype are defined in the
-	// SDK files for the Mach Kernel ==>  mach/machine.h
-	////////////////////////////////
-	
-	// CPU Family, Type, and Subtype
-	cpufam = 0;
-	cputype = 0;
-	cpusub = 0;
-	err = _getSysCTLvalue<U32>("hw.cpufamily", &lraw);
-	if (err)
-		Con::printf( "   Unable to determine 'family' of CPU\n");
-	else {
-		cpufam = (int) lraw;
-		err = _getSysCTLvalue<U32>("hw.cputype", &lraw);
-		if (err)
-			Con::printf( "   Unable to determine CPU type\n");
-		else {
-			cputype = (int) lraw;
-			err = _getSysCTLvalue<U32>("hw.cpusubtype", &lraw);
-			if (err)
-				Con::printf( "   Unable to determine CPU subtype\n");
-			else
-				cpusub = (int) lraw;
-			// If we've made it this far, 
-			Con::printf( "   Installed processor ID: Family 0x%08x  Type %d  Subtype %d",cpufam, cputype,cpusub);
-		}
-	}
+      ramMB = llraw >> 20;
 	
+   char brandString[256];
+   err = _getSysCTLstring("machdep.cpu.brand_string", brandString, sizeof(brandString));
+   if (err)
+      brandString[0] = '\0';
+   
+   char vendor[256];
+   err = _getSysCTLstring("machdep.cpu.vendor", vendor, sizeof(vendor));
+   if (err)
+      vendor[0] = '\0';
+   
 	// The Gestalt version was known to have issues with some Processor Upgrade cards
 	// but it is uncertain whether this version has similar issues.
 	err = _getSysCTLvalue<U64>("hw.cpufrequency", &llraw);
 	if (err) {
 		llraw = BASE_MHZ_SPEED;
-		Con::printf( "   Unable to determine CPU Frequency. Defaulting to %d MHz\n", llraw);
 	} else {
 		llraw /= 1000000;
-		Con::printf( "   Installed processor clock frequency: %d MHz", llraw);
 	}
 	Platform::SystemInfo.processor.mhz = (unsigned int)llraw;
 	
-	// Here's one that the original version of this routine couldn't do -- number
-	// of processors (cores)
-   U32 ncpu = 1;
-	err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
-	if (err)
-		Con::printf( "   Unable to determine number of processor cores\n");
-	else
-   {
-      ncpu = lraw;
-		Con::printf( "   Installed/available processor cores: %d", lraw);
-   }
-	
-	// Now use CPUFAM to determine and then store the processor type
-	// and 'friendly name' in GG-accessible structure. Note that since
-	// we have access to the Family code, the Type and Subtypes are useless.
-	//
-	// NOTE: Even this level of detail is almost assuredly not needed anymore
-	// and the Optional Capability flags (further down) should be more than enough.
-	switch(cpufam)
-	{
-		case CPUFAMILY_INTEL_YONAH:
-			Platform::SystemInfo.processor.type = CPU_Intel_Core;
-         if( ncpu == 2 )
-            Platform::SystemInfo.processor.name = StringTable->insert("Intel Core Duo");
-         else
-            Platform::SystemInfo.processor.name = StringTable->insert("Intel Core");
-			break;
-      case CPUFAMILY_INTEL_PENRYN:
-		case CPUFAMILY_INTEL_MEROM:
-			Platform::SystemInfo.processor.type = CPU_Intel_Core2;
-         if( ncpu == 4 )
-            Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Quad");
-         else
-            Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Duo");
-			break;
-         
-      case CPUFAMILY_INTEL_NEHALEM:
-         Platform::SystemInfo.processor.type = CPU_Intel_Core2;
-         Platform::SystemInfo.processor.name = StringTable->insert( "Intel 'Nehalem' Core Processor" );
-         break;
-      
-		default:
-			// explain why we can't get the processor type.
-			Con::warnf( "   Unknown Processor (family, type, subtype): 0x%x\t%d  %d", cpufam, cputype, cpusub);
-			// for now, identify it as an x86 processor, because Apple is moving to Intel chips...
-			Platform::SystemInfo.processor.type = CPU_X86Compatible;
-			Platform::SystemInfo.processor.name = StringTable->insert("Unknown Processor, assuming x86 Compatible");
-			break;
-	}
-   // Now we can directly query the system about a litany of "Optional" processor capabilities
-	// and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request
-	// a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists 
-	// >>>> BUT <<<<<
-	// it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's 
-	// actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined.
-	procflags = 0;
-	// Seriously this one should be an Assert()
-	err = _getSysCTLvalue<U32>("hw.optional.floatingpoint", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_FPU;
-	// List of chip-specific features
-	err = _getSysCTLvalue<U32>("hw.optional.mmx", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_MMX;
-	err = _getSysCTLvalue<U32>("hw.optional.sse", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE;
-	err = _getSysCTLvalue<U32>("hw.optional.sse2", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE2;
-	err = _getSysCTLvalue<U32>("hw.optional.sse3", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3;
-	err = _getSysCTLvalue<U32>("hw.optional.supplementalsse3", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3xt;
-	err = _getSysCTLvalue<U32>("hw.optional.sse4_1", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_1;
-	err = _getSysCTLvalue<U32>("hw.optional.sse4_2", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_2;
-
-	// Finally some architecture-wide settings
-	err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
-	if ((err==0)&&(lraw>1)) procflags |= CPU_PROP_MP;
-	err = _getSysCTLvalue<U32>("hw.cpu64bit_capable", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_64bit;
-	err = _getSysCTLvalue<U32>("hw.byteorder", &lraw);
-	if ((err==0)&&(lraw==1234)) procflags |= CPU_PROP_LE;
-
-	Platform::SystemInfo.processor.properties = procflags;
-	
-	Con::printf( "%s, %2.2f GHz", Platform::SystemInfo.processor.name, F32( Platform::SystemInfo.processor.mhz ) / 1000.0 );
+   procflags = CPU_PROP_FPU;
+   detectCpuFeatures(procflags);
+   
+   Platform::SystemInfo.processor.properties = procflags;
+   SetProcessoInfo(Platform::SystemInfo.processor, vendor, brandString);
+   
+   
+   Con::printf("System & Processor Information:");
+   Con::printf("   MacOS Version: %s", [osVersionStr UTF8String]);
+   Con::printf("   Physical memory installed: %d MB", ramMB);
+   Con::printf("   Processor: %s", Platform::SystemInfo.processor.name);
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
-		Con::printf( "   MMX detected");
+		Con::printf("      MMX detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
-		Con::printf( "   SSE detected");
+		Con::printf("      SSE detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
-		Con::printf( "   SSE2 detected");
+		Con::printf("      SSE2 detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
-		Con::printf( "   SSE3 detected");
+		Con::printf("      SSE3 detected");
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
+      Con::printf("      SSE3ex detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
-		Con::printf( "   SSE4.1 detected");
+		Con::printf("      SSE4.1 detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
-		Con::printf( "   SSE4.2 detected");
+		Con::printf("      SSE4.2 detected");
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
+      Con::printf("      AVX detected");
 	
 	Con::printf( "" );
    
@@ -277,16 +204,38 @@ void Processor::init()
    Platform::SystemInfoReady.trigger();
 }
 
+
 namespace CPUInfo {
-   EConfig CPUCount(U32 &logical, U32 &numCores, U32 &numPhysical) {
-      // todo properly implement this
-      logical = [[NSProcessInfo processInfo] activeProcessorCount];
-      numCores = [[NSProcessInfo processInfo] activeProcessorCount];
-      numPhysical = [[NSProcessInfo processInfo] processorCount];
+   EConfig CPUCount(U32 &logical, U32 &physical) {
+      U32 lraw;
+      int err;
+      
+      err = _getSysCTLvalue<U32>("hw.physicalcpu", &lraw);
+      if (err == 0)
+         physical = lraw;
+      else
+         physical = 1;
+      
+      err = _getSysCTLvalue<U32>("hw.logicalcpu", &lraw);
+      if (err == 0)
+      {
+         logical = lraw;
+      }
+      else
+      {
+         // fallback to querying the number of cpus. If that fails, then assume same as number of cores
+         err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
+         if (err == 0)
+            logical = lraw;
+         else
+            logical = physical;
+      }
+      
+      const bool smtEnabled = logical > physical;
+      
+      if (physical == 1)
+         return smtEnabled ? CONFIG_SingleCoreHTEnabled : CONFIG_SingleCoreAndHTNotCapable;
       
-      // todo check for hyperthreading
-      if (numCores > 1)
-         return CONFIG_MultiCoreAndHTNotCapable;
-      return CONFIG_SingleCoreAndHTNotCapable;
+      return smtEnabled ? CONFIG_MultiCoreAndHTEnabled : CONFIG_MultiCoreAndHTNotCapable;
    }
 }