Jelajahi Sumber

Merge pull request #789 from JeffProgrammer/cpuinfo

Overhaul on CPU detection for Windows, Mac (x64/arm64) & Linux
Brian Roberts 3 tahun lalu
induk
melakukan
53cd3ea36a

+ 0 - 216
Engine/source/math/mMathAMD.cpp

@@ -1,216 +0,0 @@
-//-----------------------------------------------------------------------------
-// Copyright (c) 2012 GarageGames, LLC
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to
-// deal in the Software without restriction, including without limitation the
-// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-// sell copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-// IN THE SOFTWARE.
-//-----------------------------------------------------------------------------
-
-#include "math/mMathFn.h"
-#include "math/mPlane.h"
-#include "math/mMatrix.h"
-
-
-// extern void (*m_matF_x_point3F)(const F32 *m, const F32 *p, F32 *presult);
-// extern void (*m_matF_x_vectorF)(const F32 *m, const F32 *v, F32 *vresult);
-
-/* not currently implemented.
-void Athlon_MatrixF_x_Point3F(const F32 *m, const F32 *p, F32 *presult)
-{
-   m;
-   p;
-   presult;
-}
-*/
-
-//============================================================
-//  Here's the C code for MatF_x_MatF:
-//  note that the code below does it in a different order (optimal asm, after all!)
-//
-// r[0] = a[0]*b[0] + a[1]*b[4] + a[2]*b[8]  + a[3]*b[12];
-// r[1] = a[0]*b[1] + a[1]*b[5] + a[2]*b[9]  + a[3]*b[13];
-// r[2] = a[0]*b[2] + a[1]*b[6] + a[2]*b[10] + a[3]*b[14];
-// r[3] = a[0]*b[3] + a[1]*b[7] + a[2]*b[11] + a[3]*b[15];
-//
-// r[4] = a[4]*b[0] + a[5]*b[4] + a[6]*b[8]  + a[7]*b[12];
-// r[5] = a[4]*b[1] + a[5]*b[5] + a[6]*b[9]  + a[7]*b[13];
-// r[6] = a[4]*b[2] + a[5]*b[6] + a[6]*b[10] + a[7]*b[14];
-// r[7] = a[4]*b[3] + a[5]*b[7] + a[6]*b[11] + a[7]*b[15];
-//
-// r[8] = a[8]*b[0] + a[9]*b[4] + a[10]*b[8] + a[11]*b[12];
-// r[9] = a[8]*b[1] + a[9]*b[5] + a[10]*b[9] + a[11]*b[13];
-// r[10]= a[8]*b[2] + a[9]*b[6] + a[10]*b[10]+ a[11]*b[14];
-// r[11]= a[8]*b[3] + a[9]*b[7] + a[10]*b[11]+ a[11]*b[15];
-//
-// r[12]= a[12]*b[0]+ a[13]*b[4]+ a[14]*b[8] + a[15]*b[12];
-// r[13]= a[12]*b[1]+ a[13]*b[5]+ a[14]*b[9] + a[15]*b[13];
-// r[14]= a[12]*b[2]+ a[13]*b[6]+ a[14]*b[10]+ a[15]*b[14];
-// r[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15];
-//============================================================
-
-#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
-#define ADD_3DNOW_FUNCS
-// inlined version here.
-void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
-{
-   __asm
-   {
-      femms
-
-      mov         ecx, matA
-      mov         edx, matB
-      mov         eax, result
-
-      prefetch    [ecx+32]       ;// These may help -
-      prefetch    [edx+32]       ;//    and probably don't hurt
-
-      movq        mm0,[ecx]      ;// a21   | a11
-      movq        mm1,[ecx+8]      ;// a41   | a31
-      movq        mm4,[edx]      ;// b21   | b11
-      punpckhdq   mm2,mm0         ;// a21   |
-      movq        mm5,[edx+16]   ;// b22   | b12
-      punpckhdq   mm3,mm1         ;// a41   |
-      movq        mm6,[edx+32]   ;// b23   | b13
-      punpckldq   mm0,mm0         ;// a11   | a11
-      punpckldq   mm1,mm1         ;// a31   | a31
-      pfmul       mm4,mm0         ;// a11*b21 | a11*b11
-      punpckhdq   mm2,mm2         ;// a21   | a21
-      pfmul       mm0,[edx+8]      ;// a11*b41 | a11*b31
-      movq        mm7,[edx+48]   ;// b24   | b14
-      pfmul       mm5,mm2         ;// a21*b22 | a21*b12
-      punpckhdq   mm3,mm3         ;// a41   | a41
-      pfmul       mm2,[edx+24]   ;// a21*b42 | a21*b32
-      pfmul       mm6,mm1         ;// a31*b23 | a31*b13
-      pfadd       mm5,mm4         ;// a21*b22 + a11*b21 | a21*b12 + a11*b11
-      pfmul       mm1,[edx+40]   ;// a31*b43 | a31*b33
-      pfadd       mm2,mm0         ;// a21*b42 + a11*b41 | a21*b32 + a11*b31
-      pfmul       mm7,mm3         ;// a41*b24 | a41*b14
-      pfadd       mm6,mm5         ;// a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13
-      pfmul       mm3,[edx+56]   ;// a41*b44 | a41*b34
-      pfadd       mm2,mm1         ;// a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33
-      pfadd       mm7,mm6         ;// a41*b24 + a21*b22 + a11*b21 + a31*b23 |  a41*b14 + a21*b12 + a11*b11 + a31*b13
-      movq        mm0,[ecx+16]   ;// a22   | a12
-      pfadd       mm3,mm2         ;// a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33
-      movq        mm1,[ecx+24]   ;// a42   | a32
-      movq        [eax],mm7      ;// r21   | r11
-      movq        mm4,[edx]      ;// b21   | b11
-      movq        [eax+8],mm3      ;// r41   | r31
-
-      punpckhdq   mm2,mm0         ;// a22   | XXX
-      movq        mm5,[edx+16]   ;// b22   | b12
-      punpckhdq   mm3,mm1         ;// a42   | XXX
-      movq        mm6,[edx+32]   ;// b23   | b13
-      punpckldq   mm0,mm0         ;// a12   | a12
-      punpckldq   mm1,mm1         ;// a32   | a32
-      pfmul       mm4,mm0         ;// a12*b21 | a12*b11
-      punpckhdq   mm2,mm2         ;// a22   | a22
-      pfmul       mm0,[edx+8]      ;// a12*b41 | a12*b31
-      movq        mm7,[edx+48]   ;// b24   | b14
-      pfmul       mm5,mm2         ;// a22*b22 | a22*b12
-      punpckhdq   mm3,mm3         ;// a42   | a42
-      pfmul       mm2,[edx+24]   ;// a22*b42 | a22*b32
-      pfmul       mm6,mm1         ;// a32*b23 | a32*b13
-      pfadd       mm5,mm4         ;// a12*b21 + a22*b22 | a12*b11 + a22*b12
-      pfmul       mm1,[edx+40]   ;// a32*b43 | a32*b33
-      pfadd       mm2,mm0         ;// a12*b41 + a22*b42 | a12*b11 + a22*b32
-      pfmul       mm7,mm3         ;// a42*b24 | a42*b14
-      pfadd       mm6,mm5         ;// a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12
-      pfmul       mm3,[edx+56]   ;// a42*b44 | a42*b34
-      pfadd       mm2,mm1         ;// a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32
-      pfadd       mm7,mm6         ;// a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12
-      movq        mm0,[ecx+32]   ;// a23 | a13
-      pfadd       mm3,mm2         ;// a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32
-      movq        mm1,[ecx+40]   ;// a43 | a33
-      movq        [eax+16],mm7   ;// r22 | r12
-      movq        mm4,[edx]      ;// b21   | b11
-      movq        [eax+24],mm3   ;// r42 | r32
-
-      punpckhdq   mm2,mm0         ;// a23 | XXX
-      movq        mm5,[edx+16]   ;// b22 | b12
-      punpckhdq   mm3,mm1         ;// a43 | XXX
-      movq        mm6,[edx+32]   ;// b23 | b13
-      punpckldq   mm0,mm0         ;// a13 | a13
-      punpckldq   mm1,mm1         ;// a33 | a33
-      pfmul       mm4,mm0         ;// a13*b21 | a13*b11
-      punpckhdq   mm2,mm2         ;// a23 | a23
-      pfmul       mm0,[edx+8]      ;// a13*b41 | a13*b31
-      movq        mm7,[edx+48]   ;// b24 | b14
-      pfmul       mm5,mm2         ;// a23*b22 | a23*b12
-      punpckhdq   mm3,mm3         ;// a43 | a43
-      pfmul       mm2,[edx+24]   ;// a23*b42 | a23*b32
-      pfmul       mm6,mm1         ;// a33*b23 | a33*b13
-      pfadd       mm5,mm4         ;// a23*b22 + a13*b21 | a23*b12 + a13*b11
-      pfmul       mm1,[edx+40]   ;// a33*b43 | a33*b33
-      pfadd       mm2,mm0         ;// a13*b41 + a23*b42 | a13*b31 + a23*b32
-      pfmul       mm7,mm3         ;// a43*b24 | a43*b14
-      pfadd       mm6,mm5         ;// a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11
-      pfmul       mm3,[edx+56]   ;// a43*b44 | a43*b34
-      pfadd       mm2,mm1         ;// a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32
-      pfadd       mm7,mm6         ;// a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11
-      movq        mm0,[ecx+48]   ;// a24 | a14
-      pfadd       mm3,mm2         ;// a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32
-      movq        mm1,[ecx+56]   ;// a44 | a34
-      movq        [eax+32],mm7   ;// r23 | r13
-      movq        mm4,[edx]      ;// b21 | b11
-      movq        [eax+40],mm3   ;// r43 | r33
-
-      punpckhdq   mm2,mm0         ;// a24 | XXX
-      movq        mm5,[edx+16]   ;// b22 | b12
-      punpckhdq   mm3,mm1         ;// a44 | XXX
-      movq        mm6,[edx+32]   ;// b23 | b13
-      punpckldq   mm0,mm0         ;// a14 | a14
-      punpckldq   mm1,mm1         ;// a34 | a34
-      pfmul       mm4,mm0         ;// a14*b21 | a14*b11
-      punpckhdq   mm2,mm2         ;// a24 | a24
-      pfmul       mm0,[edx+8]      ;// a14*b41 | a14*b31
-      movq        mm7,[edx+48]   ;// b24 | b14
-      pfmul       mm5,mm2         ;// a24*b22 | a24*b12
-      punpckhdq   mm3,mm3         ;// a44 | a44
-      pfmul       mm2,[edx+24]   ;// a24*b 42 | a24*b32
-      pfmul       mm6,mm1         ;// a34*b23 | a34*b13
-      pfadd       mm5,mm4         ;// a14*b21 + a24*b22 | a14*b11 + a24*b12
-      pfmul       mm1,[edx+40]   ;// a34*b43 | a34*b33
-      pfadd       mm2,mm0         ;// a14*b41 + a24*b 42 | a14*b31 + a24*b32
-      pfmul       mm7,mm3         ;// a44*b24 | a44*b14
-      pfadd       mm6,mm5         ;// a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12
-      pfmul       mm3,[edx+56]   ;// a44*b44 | a44*b34
-      pfadd       mm2,mm1         ;// a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32
-      pfadd       mm7,mm6         ;// a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32
-      pfadd       mm3,mm2         ;// a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32
-      movq        [eax+48],mm7   ;// r24 | r14
-      movq        [eax+56],mm3   ;// r44 | r34
-      femms
-   }
-}
-#elif defined(TORQUE_SUPPORTS_NASM)
-#define ADD_3DNOW_FUNCS
-extern "C"
-{
-   void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result);
-}
-
-#endif
-
-void mInstall_AMD_Math()
-{
-#if defined(ADD_3DNOW_FUNCS)
-   m_matF_x_matF           = Athlon_MatrixF_x_MatrixF;
-#endif
-   // m_matF_x_point3F = Athlon_MatrixF_x_Point3F;
-   // m_matF_x_vectorF = Athlon_MatrixF_x_VectorF;
-}
-

+ 0 - 177
Engine/source/math/mMathAMD_ASM.asm

@@ -1,177 +0,0 @@
-;-----------------------------------------------------------------------------
-; Copyright (c) 2012 GarageGames, LLC
-;
-; Permission is hereby granted, free of charge, to any person obtaining a copy
-; of this software and associated documentation files (the "Software"), to
-; deal in the Software without restriction, including without limitation the
-; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-; sell copies of the Software, and to permit persons to whom the Software is
-; furnished to do so, subject to the following conditions:
-;
-; The above copyright notice and this permission notice shall be included in
-; all copies or substantial portions of the Software.
-;
-; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-; IN THE SOFTWARE.
-;-----------------------------------------------------------------------------
-
-
-segment .data
-
-matA     dd 0
-result   dd 0
-matB     dd 0
-
-segment .text
-
-%macro export_fn 1 
-   %ifidn __OUTPUT_FORMAT__, elf
-   ; No underscore needed for ELF object files
-   global %1
-   %1:
-   %else
-   global _%1
-   _%1:
-   %endif
-%endmacro
-
-
-%define arg(x) [esp+(x*4)] 
-
-
-
-;void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
-
-export_fn Athlon_MatrixF_x_MatrixF
-
-      mov         ecx, arg(1)
-      mov         edx, arg(2)
-      mov         eax, arg(3)
-      
-      femms
-      prefetch    [ecx+32]       ; These may help -
-      prefetch    [edx+32]       ;    and probably don't hurt
-      
-      movq        mm0,[ecx]		; a21	| a11
-      movq        mm1,[ecx+8]		; a41	| a31
-      movq        mm4,[edx]		; b21	| b11
-      punpckhdq   mm2,mm0			; a21	| 
-      movq        mm5,[edx+16]	; b22	| b12
-      punpckhdq   mm3,mm1			; a41	| 
-      movq        mm6,[edx+32]	; b23	| b13
-      punpckldq   mm0,mm0			; a11	| a11
-      punpckldq   mm1,mm1			; a31	| a31
-      pfmul       mm4,mm0			; a11*b21 | a11*b11
-      punpckhdq   mm2,mm2			; a21	| a21
-      pfmul       mm0,[edx+8]		; a11*b41 | a11*b31
-      movq        mm7,[edx+48]	; b24	| b14
-      pfmul       mm5,mm2			; a21*b22 | a21*b12
-      punpckhdq   mm3,mm3			; a41	| a41
-      pfmul       mm2,[edx+24]	; a21*b42 | a21*b32
-      pfmul       mm6,mm1			; a31*b23 | a31*b13 
-      pfadd       mm5,mm4			; a21*b22 + a11*b21 | a21*b12 + a11*b11
-      pfmul       mm1,[edx+40]	; a31*b43 | a31*b33
-      pfadd       mm2,mm0			; a21*b42 + a11*b41 | a21*b32 + a11*b31
-      pfmul       mm7,mm3			; a41*b24 | a41*b14 
-      pfadd       mm6,mm5			; a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13
-      pfmul       mm3,[edx+56]	; a41*b44 | a41*b34
-      pfadd       mm2,mm1			; a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33 
-      pfadd       mm7,mm6			; a41*b24 + a21*b22 + a11*b21 + a31*b23 |  a41*b14 + a21*b12 + a11*b11 + a31*b13
-      movq        mm0,[ecx+16]	; a22	| a12
-      pfadd       mm3,mm2			; a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33 
-      movq        mm1,[ecx+24]	; a42	| a32
-      movq        [eax],mm7		; r21	| r11 
-      movq        mm4,[edx]		; b21	| b11
-      movq        [eax+8],mm3		; r41	| r31
-
-      punpckhdq   mm2,mm0			; a22	| XXX
-      movq        mm5,[edx+16]	; b22	| b12
-      punpckhdq   mm3,mm1			; a42	| XXX
-      movq        mm6,[edx+32]	; b23	| b13
-      punpckldq   mm0,mm0			; a12	| a12
-      punpckldq   mm1,mm1			; a32	| a32
-      pfmul       mm4,mm0			; a12*b21 | a12*b11
-      punpckhdq   mm2,mm2			; a22	| a22
-      pfmul       mm0,[edx+8]		; a12*b41 | a12*b31
-      movq        mm7,[edx+48]	; b24	| b14
-      pfmul       mm5,mm2			; a22*b22 | a22*b12
-      punpckhdq   mm3,mm3			; a42	| a42
-      pfmul       mm2,[edx+24]	; a22*b42 | a22*b32
-      pfmul       mm6,mm1			; a32*b23 | a32*b13
-      pfadd       mm5,mm4			; a12*b21 + a22*b22 | a12*b11 + a22*b12
-      pfmul       mm1,[edx+40]	; a32*b43 | a32*b33
-      pfadd       mm2,mm0			; a12*b41 + a22*b42 | a12*b11 + a22*b32
-      pfmul       mm7,mm3			; a42*b24 | a42*b14
-      pfadd       mm6,mm5			; a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12
-      pfmul       mm3,[edx+56]	; a42*b44 | a42*b34
-      pfadd       mm2,mm1			; a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32
-      pfadd       mm7,mm6			; a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12
-      movq        mm0,[ecx+32]	; a23 | a13
-      pfadd       mm3,mm2			; a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32
-      movq        mm1,[ecx+40]	; a43 | a33
-      movq        [eax+16],mm7	; r22 | r12
-      movq        mm4,[edx]		; b21	| b11
-      movq        [eax+24],mm3	; r42 | r32
-
-      punpckhdq   mm2,mm0			; a23 | XXX
-      movq        mm5,[edx+16]	; b22 | b12
-      punpckhdq   mm3,mm1			; a43 | XXX
-      movq        mm6,[edx+32]	; b23 | b13
-      punpckldq   mm0,mm0			; a13 | a13
-      punpckldq   mm1,mm1			; a33 | a33
-      pfmul       mm4,mm0			; a13*b21 | a13*b11
-      punpckhdq   mm2,mm2			; a23 | a23
-      pfmul       mm0,[edx+8]		; a13*b41 | a13*b31
-      movq        mm7,[edx+48]	; b24 | b14
-      pfmul       mm5,mm2			; a23*b22 | a23*b12
-      punpckhdq   mm3,mm3			; a43 | a43
-      pfmul       mm2,[edx+24]	; a23*b42 | a23*b32
-      pfmul       mm6,mm1			; a33*b23 | a33*b13
-      pfadd       mm5,mm4			; a23*b22 + a13*b21 | a23*b12 + a13*b11
-      pfmul       mm1,[edx+40]	; a33*b43 | a33*b33 
-      pfadd       mm2,mm0			; a13*b41 + a23*b42 | a13*b31 + a23*b32
-      pfmul       mm7,mm3			; a43*b24 | a43*b14
-      pfadd       mm6,mm5			; a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11
-      pfmul       mm3,[edx+56]	; a43*b44 | a43*b34
-      pfadd       mm2,mm1			; a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32
-      pfadd       mm7,mm6			; a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11
-      movq        mm0,[ecx+48]	; a24 | a14
-      pfadd       mm3,mm2			; a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32
-      movq        mm1,[ecx+56]	; a44 | a34
-      movq        [eax+32],mm7	; r23 | r13
-      movq        mm4,[edx]		; b21 | b11
-      movq        [eax+40],mm3	; r43 | r33
-
-      punpckhdq   mm2,mm0			; a24 | XXX
-      movq        mm5,[edx+16]	; b22 | b12
-      punpckhdq   mm3,mm1			; a44 | XXX
-      movq        mm6,[edx+32]	; b23 | b13
-      punpckldq   mm0,mm0			; a14 | a14
-      punpckldq   mm1,mm1			; a34 | a34
-      pfmul       mm4,mm0			; a14*b21 | a14*b11
-      punpckhdq   mm2,mm2			; a24 | a24
-      pfmul       mm0,[edx+8]		; a14*b41 | a14*b31
-      movq        mm7,[edx+48]	; b24 | b14
-      pfmul       mm5,mm2			; a24*b22 | a24*b12
-      punpckhdq   mm3,mm3			; a44 | a44
-      pfmul       mm2,[edx+24]	; a24*b 42 | a24*b32
-      pfmul       mm6,mm1			; a34*b23 | a34*b13
-      pfadd       mm5,mm4			; a14*b21 + a24*b22 | a14*b11 + a24*b12
-      pfmul       mm1,[edx+40]	; a34*b43 | a34*b33
-      pfadd       mm2,mm0			; a14*b41 + a24*b 42 | a14*b31 + a24*b32
-      pfmul       mm7,mm3			; a44*b24 | a44*b14
-      pfadd       mm6,mm5			; a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12
-      pfmul       mm3,[edx+56]	; a44*b44 | a44*b34
-      pfadd       mm2,mm1			; a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32
-      pfadd       mm7,mm6			; a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32
-      pfadd       mm3,mm2			; a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32
-      movq        [eax+48],mm7	; r24 | r14
-      movq        [eax+56],mm3	; r44 | r34
-      femms
-
-      ret      

+ 0 - 13
Engine/source/math/test/mMatrixTest.cpp

@@ -60,19 +60,6 @@ TEST(MatrixF, MultiplyImplmentations)
    U32 cpuProperties = Platform::SystemInfo.processor.properties;
    bool same;
 
-   // Test 3D NOW! if it is available
-   F32 mrAMD[16];
-   if (cpuProperties & CPU_PROP_3DNOW)
-   {
-      Athlon_MatrixF_x_MatrixF(m1, m2, mrAMD);
-
-      same = true;
-      for (S32 i = 0; i < 16; i++)
-         same &= mIsEqual(mrC[i], mrAMD[i]);
-
-      EXPECT_TRUE(same) << "Matrix multiplication verification failed. (C vs. 3D NOW!)";
-   }
-
    // Test SSE if it is available
    F32 mrSSE[16];
    if (cpuProperties & CPU_PROP_SSE)

+ 15 - 55
Engine/source/platform/platform.h

@@ -55,50 +55,11 @@
 /// @note These enums must be globally scoped so that they work with the inline assembly
 enum ProcessorType
 {
-   // x86
    CPU_X86Compatible,
-   CPU_Intel_Unknown,
-   CPU_Intel_486,
-   CPU_Intel_Pentium,
-   CPU_Intel_PentiumMMX,
-   CPU_Intel_PentiumPro,
-   CPU_Intel_PentiumII,
-   CPU_Intel_PentiumCeleron,
-   CPU_Intel_PentiumIII,
-   CPU_Intel_Pentium4,
-   CPU_Intel_PentiumM,
-   CPU_Intel_Core,
-   CPU_Intel_Core2,
-   CPU_Intel_Corei7Xeon, // Core i7 or Xeon
-   CPU_AMD_K6,
-   CPU_AMD_K6_2,
-   CPU_AMD_K6_3,
-   CPU_AMD_Athlon,
-   CPU_AMD_Phenom,
-   CPU_AMD_PhenomII,
-   CPU_AMD_Bulldozer,
-   CPU_AMD_Unknown,
-   CPU_Cyrix_6x86,
-   CPU_Cyrix_MediaGX,
-   CPU_Cyrix_6x86MX,
-   CPU_Cyrix_GXm,          ///< Media GX w/ MMX
-   CPU_Cyrix_Unknown,
-
-   // PowerPC
-   CPU_PowerPC_Unknown,
-   CPU_PowerPC_601,
-   CPU_PowerPC_603,
-   CPU_PowerPC_603e,
-   CPU_PowerPC_603ev,
-   CPU_PowerPC_604,
-   CPU_PowerPC_604e,
-   CPU_PowerPC_604ev,
-   CPU_PowerPC_G3,
-   CPU_PowerPC_G4,
-   CPU_PowerPC_G4_7450,
-   CPU_PowerPC_G4_7455,
-   CPU_PowerPC_G4_7447, 
-   CPU_PowerPC_G5,
+   CPU_ArmCompatible,
+   CPU_Intel,
+   CPU_AMD,
+   CPU_Apple
 };
 
 /// Properties for CPU.
@@ -107,17 +68,17 @@ enum ProcessorProperties
    CPU_PROP_C         = (1<<0),  ///< We should use C fallback math functions.
    CPU_PROP_FPU       = (1<<1),  ///< Has an FPU. (It better!)
    CPU_PROP_MMX       = (1<<2),  ///< Supports MMX instruction set extension.
-   CPU_PROP_3DNOW     = (1<<3),  ///< Supports AMD 3dNow! instruction set extension.
-   CPU_PROP_SSE       = (1<<4),  ///< Supports SSE instruction set extension.
-   CPU_PROP_RDTSC     = (1<<5),  ///< Supports Read Time Stamp Counter op.
-   CPU_PROP_SSE2      = (1<<6),  ///< Supports SSE2 instruction set extension.
-   CPU_PROP_SSE3      = (1<<7),  ///< Supports SSE3 instruction set extension.  
-   CPU_PROP_SSE3xt    = (1<<8),  ///< Supports extended SSE3 instruction set  
-   CPU_PROP_SSE4_1    = (1<<9),  ///< Supports SSE4_1 instruction set extension.  
-   CPU_PROP_SSE4_2    = (1<<10), ///< Supports SSE4_2 instruction set extension.  
-   CPU_PROP_MP        = (1<<11), ///< This is a multi-processor system.
-   CPU_PROP_LE        = (1<<12), ///< This processor is LITTLE ENDIAN.  
-   CPU_PROP_64bit     = (1<<13), ///< This processor is 64-bit capable
+   CPU_PROP_SSE       = (1<<3),  ///< Supports SSE instruction set extension.
+   CPU_PROP_SSE2      = (1<<4),  ///< Supports SSE2 instruction set extension.
+   CPU_PROP_SSE3      = (1<<5),  ///< Supports SSE3 instruction set extension.  
+   CPU_PROP_SSE3ex    = (1<<6),  ///< Supports Supplemental SSE3 instruction set  
+   CPU_PROP_SSE4_1    = (1<<7),  ///< Supports SSE4_1 instruction set extension.  
+   CPU_PROP_SSE4_2    = (1<<8),  ///< Supports SSE4_2 instruction set extension.
+   CPU_PROP_AVX       = (1<<9), ///< Supports AVX256 instruction set extension.
+   CPU_PROP_MP        = (1<<10), ///< This is a multi-processor system.
+   CPU_PROP_LE        = (1<<11), ///< This processor is LITTLE ENDIAN.
+   CPU_PROP_64bit     = (1<<12), ///< This processor is 64-bit capable
+   CPU_PROP_NEON      = (1<<13), ///< Supports the Arm Neon instruction set extension.
 };
 
 /// Processor info manager. 
@@ -336,7 +297,6 @@ namespace Platform
             bool           isHyperThreaded;
             U32            numLogicalProcessors;
             U32            numPhysicalProcessors;
-            U32            numAvailableCores;
             U32            properties;      // CPU type specific enum
          } processor;
    };

+ 31 - 248
Engine/source/platform/platformCPU.cpp

@@ -28,264 +28,47 @@
 
 Signal<void(void)> Platform::SystemInfoReady;
 
-enum CPUFlags
-{
-   // EDX Register flags
-   BIT_FPU     = BIT(0),
-   BIT_RDTSC   = BIT(4),
-   BIT_MMX     = BIT(23),
-   BIT_SSE     = BIT(25),
-   BIT_SSE2    = BIT(26),
-   BIT_3DNOW   = BIT(31),
-
-   // These use a different value for comparison than the above flags (ECX Register)
-   BIT_SSE3    = BIT(0),
-   BIT_SSE3xt  = BIT(9),
-   BIT_SSE4_1  = BIT(19),
-   BIT_SSE4_2  = BIT(20),
-};
-
 // fill the specified structure with information obtained from asm code
-void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
-   char* vendor, U32 processor, U32 properties, U32 properties2)
+void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand)
 {
-   Platform::SystemInfo.processor.properties |= (properties & BIT_FPU)   ? CPU_PROP_FPU : 0;
-   Platform::SystemInfo.processor.properties |= (properties & BIT_RDTSC) ? CPU_PROP_RDTSC : 0;
-   Platform::SystemInfo.processor.properties |= (properties & BIT_MMX)   ? CPU_PROP_MMX : 0;
-
    if (dStricmp(vendor, "GenuineIntel") == 0)
    {
-      pInfo.properties |= (properties & BIT_SSE) ? CPU_PROP_SSE : 0;
-      pInfo.properties |= (properties & BIT_SSE2) ? CPU_PROP_SSE2 : 0;
-      pInfo.properties |= (properties2 & BIT_SSE3) ? CPU_PROP_SSE3 : 0;
-      pInfo.properties |= (properties2 & BIT_SSE3xt) ? CPU_PROP_SSE3xt : 0;
-      pInfo.properties |= (properties2 & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0;
-      pInfo.properties |= (properties2 & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0;
-
-      pInfo.type = CPU_Intel_Unknown;
-      // switch on processor family code
-      switch ((processor >> 8) & 0x0f)
-      {
-         case 4:
-            pInfo.type = CPU_Intel_486;
-            pInfo.name = StringTable->insert("Intel 486 class");
-            break;
-
-            // Pentium Family
-         case 5:
-            // switch on processor model code
-            switch ((processor >> 4) & 0xf)
-            {
-               case 1:
-               case 2:
-               case 3:
-                  pInfo.type = CPU_Intel_Pentium;
-                  pInfo.name = StringTable->insert("Intel Pentium");
-                  break;
-               case 4:
-                  pInfo.type = CPU_Intel_PentiumMMX;
-                  pInfo.name = StringTable->insert("Intel Pentium MMX");
-                  break;
-               default:
-                  pInfo.type = CPU_Intel_Pentium;
-                  pInfo.name = StringTable->insert( "Intel (unknown)" );
-                  break;
-            }
-            break;
-
-            // Pentium Pro/II/II family
-         case 6:
-         {
-            U32 extendedModel = ( processor & 0xf0000 ) >> 16;
-            // switch on processor model code
-            switch ((processor >> 4) & 0xf)
-            {
-               case 1:
-                  pInfo.type = CPU_Intel_PentiumPro;
-                  pInfo.name = StringTable->insert("Intel Pentium Pro");
-                  break;
-               case 3:
-               case 5:
-                  pInfo.type = CPU_Intel_PentiumII;
-                  pInfo.name = StringTable->insert("Intel Pentium II");
-                  break;
-               case 6:
-                  pInfo.type = CPU_Intel_PentiumCeleron;
-                  pInfo.name = StringTable->insert("Intel Pentium Celeron");
-                  break;
-               case 7:
-               case 8:
-               case 11:
-                  pInfo.type = CPU_Intel_PentiumIII;
-                  pInfo.name = StringTable->insert("Intel Pentium III");
-                  break;
-               case 0xA:
-                  if( extendedModel == 1)
-                  {
-                     pInfo.type = CPU_Intel_Corei7Xeon;
-                     pInfo.name = StringTable->insert( "Intel Core i7 / Xeon" );
-                  }
-                  else
-                  {
-                     pInfo.type = CPU_Intel_PentiumIII;
-                     pInfo.name = StringTable->insert( "Intel Pentium III Xeon" );
-                  }
-                  break;
-               case 0xD:
-                  if( extendedModel == 1 )
-                  {
-                     pInfo.type = CPU_Intel_Corei7Xeon;
-                     pInfo.name = StringTable->insert( "Intel Core i7 / Xeon" );
-                  }
-                  else
-                  {
-                     pInfo.type = CPU_Intel_PentiumM;
-                     pInfo.name = StringTable->insert( "Intel Pentium/Celeron M" );
-                  }
-                  break;
-               case 0xE:
-                  pInfo.type = CPU_Intel_Core;
-                  pInfo.name = StringTable->insert( "Intel Core" );
-                  break;
-               case 0xF:
-                  pInfo.type = CPU_Intel_Core2;
-                  pInfo.name = StringTable->insert( "Intel Core 2" );
-                  break;
-               default:
-                  pInfo.type = CPU_Intel_PentiumPro;
-                  pInfo.name = StringTable->insert( "Intel (unknown)" );
-                  break;
-            }
-            break;
-         }
-
-            // Pentium4 Family
-         case 0xf:
-            pInfo.type = CPU_Intel_Pentium4;
-            pInfo.name = StringTable->insert( "Intel Pentium 4" );
-            break;
-
-         default:
-            pInfo.type = CPU_Intel_Unknown;
-            pInfo.name = StringTable->insert( "Intel (unknown)" );
-            break;
-      }
+      pInfo.type = CPU_Intel;
+      pInfo.name = StringTable->insert(brand ? brand : "Intel (Unknown)");
    }
    //--------------------------------------
+   else if (dStricmp(vendor, "AuthenticAMD") == 0)
+   {
+      pInfo.name = StringTable->insert(brand ? brand : "AMD (unknown)");
+      pInfo.type = CPU_AMD;
+   }
+   else if (dStricmp(vendor, "Apple") == 0)
+   {
+      pInfo.name = StringTable->insert(brand ? brand : "Apple (unknown)");
+      pInfo.type = CPU_Apple;
+   }
    else
-      if (dStricmp(vendor, "AuthenticAMD") == 0)
-      {
-         // AthlonXP processors support SSE
-         pInfo.properties |= (properties & BIT_SSE) ? CPU_PROP_SSE : 0;
-         pInfo.properties |= ( properties & BIT_SSE2 ) ? CPU_PROP_SSE2 : 0;
-         pInfo.properties |= (properties & BIT_3DNOW) ? CPU_PROP_3DNOW : 0;
-       // Phenom and PhenomII support SSE3, SSE4a
-       pInfo.properties |= ( properties2 & BIT_SSE3 ) ? CPU_PROP_SSE3 : 0;
-         pInfo.properties |= ( properties2 & BIT_SSE4_1 ) ? CPU_PROP_SSE4_1 : 0;
-         // switch on processor family code
-         switch ((processor >> 8) & 0xf)
-         {
-            // K6 Family
-            case 5:
-               // switch on processor model code
-               switch ((processor >> 4) & 0xf)
-               {
-                  case 0:
-                  case 1:
-                  case 2:
-                  case 3:
-                     pInfo.type = CPU_AMD_K6_3;
-                     pInfo.name = StringTable->insert("AMD K5");
-                     break;
-                  case 4:
-                  case 5:
-                  case 6:
-                  case 7:
-                     pInfo.type = CPU_AMD_K6;
-                     pInfo.name = StringTable->insert("AMD K6");
-                     break;
-                  case 8:
-                     pInfo.type = CPU_AMD_K6_2;
-                     pInfo.name = StringTable->insert("AMD K6-2");
-                     break;
-                  case 9:
-                  case 10:
-                  case 11:
-                  case 12:
-                  case 13:
-                  case 14:
-                  case 15:
-                     pInfo.type = CPU_AMD_K6_3;
-                     pInfo.name = StringTable->insert("AMD K6-3");
-                     break;
-               }
-               break;
-
-               // Athlon Family
-            case 6:
-               pInfo.type = CPU_AMD_Athlon;
-               pInfo.name = StringTable->insert("AMD Athlon");
-               break;
-
-               // Phenom Family
-         case 15:
-               pInfo.type = CPU_AMD_Phenom;
-               pInfo.name = StringTable->insert("AMD Phenom");
-               break;
-
-            // Phenom II Family
-         case 16:
-               pInfo.type = CPU_AMD_PhenomII;
-               pInfo.name = StringTable->insert("AMD Phenom II");
-               break;
-
-            // Bulldozer Family
-         case 17:
-               pInfo.type = CPU_AMD_Bulldozer;
-               pInfo.name = StringTable->insert("AMD Bulldozer");
-               break;
-
-            default:
-               pInfo.type = CPU_AMD_Unknown;
-               pInfo.name = StringTable->insert("AMD (unknown)");
-               break;
-         }
-      }
-   //--------------------------------------
-      else
-         if (dStricmp(vendor, "CyrixInstead") == 0)
-         {
-            switch (processor)
-            {
-               case 0x520:
-                  pInfo.type = CPU_Cyrix_6x86;
-                  pInfo.name = StringTable->insert("Cyrix 6x86");
-                  break;
-               case 0x440:
-                  pInfo.type = CPU_Cyrix_MediaGX;
-                  pInfo.name = StringTable->insert("Cyrix Media GX");
-                  break;
-               case 0x600:
-                  pInfo.type = CPU_Cyrix_6x86MX;
-                  pInfo.name = StringTable->insert("Cyrix 6x86mx/MII");
-                  break;
-               case 0x540:
-                  pInfo.type = CPU_Cyrix_GXm;
-                  pInfo.name = StringTable->insert("Cyrix GXm");
-                  break;
-               default:
-                  pInfo.type = CPU_Cyrix_Unknown;
-                  pInfo.name = StringTable->insert("Cyrix (unknown)");
-                  break;
-            }
-         }
-
+   {
+#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
+      
+      pInfo.name = StringTable->insert(brand ? brand : "x86 Compatible (unknown)");
+      pInfo.type = CPU_X86Compatible;
+      
+#elif defined(TORQUE_CPU_ARM64)
+      pInfo.name = StringTable->insert(brand ? brand : "Arm Compatible (unknown)");
+      pInfo.type = CPU_ArmCompatible;
+      
+#else
+#error "Unknown CPU Architecture"
+      
+#endif
+   }
+   
    // Get multithreading caps.
-
-   CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numAvailableCores, pInfo.numPhysicalProcessors );
+   CPUInfo::EConfig config = CPUInfo::CPUCount( pInfo.numLogicalProcessors, pInfo.numPhysicalProcessors );
    pInfo.isHyperThreaded = CPUInfo::isHyperThreaded( config );
    pInfo.isMultiCore = CPUInfo::isMultiCore( config );
 
    // Trigger the signal
    Platform::SystemInfoReady.trigger();
-}
+}

+ 0 - 657
Engine/source/platform/platformCPUCount.cpp

@@ -1,657 +0,0 @@
-// Original code is:
-// Copyright (c) 2005 Intel Corporation 
-// All Rights Reserved
-//
-// CPUCount.cpp : Detects three forms of hardware multi-threading support across IA-32 platform
-//					The three forms of HW multithreading are: Multi-processor, Multi-core, and 
-//					HyperThreading Technology.
-//					This application enumerates all the logical processors enabled by OS and BIOS,
-//					determine the HW topology of these enabled logical processors in the system 
-//					using information provided by CPUID instruction.
-//					A multi-processing system can support any combination of the three forms of HW
-//					multi-threading support. The relevant topology can be identified using a 
-//					three level decomposition of the "initial APIC ID" into 
-//					Package_id, core_id, and SMT_id. Such decomposition provides a three-level map of 
-//					the topology of hardware resources and
-//					allow multi-threaded software to manage shared hardware resources in 
-//					the platform to reduce resource contention
-
-//					Multicore detection algorithm for processor and cache topology requires
-//					all leaf functions of CPUID instructions be available. System administrator
-//					must ensure BIOS settings is not configured to restrict CPUID functionalities.
-//-------------------------------------------------------------------------------------------------
-
-#if defined(TORQUE_OS_LINUX) || defined(LINUX)
-
-// TODO GCC code don't compile on Release with optimizations, mover code to platform layer
-
-#else
-
-#include "platform/platform.h"
-#include "platform/platformCPUCount.h"
-
-#if defined(TORQUE_OS_LINUX) || defined(TORQUE_OS_OSX)
-
-#ifdef TORQUE_OS_LINUX
-// 	The Linux source code listing can be compiled using Linux kernel verison 2.6 
-//	or higher (e.g. RH 4AS-2.8 using GCC 3.4.4). 
-//	Due to syntax variances of Linux affinity APIs with earlier kernel versions 
-//	and dependence on glibc library versions, compilation on Linux environment 
-//	with older kernels and compilers may require kernel patches or compiler upgrades.
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <sched.h>
-#define DWORD unsigned long
-#elif defined( TORQUE_OS_WIN )
-#include <windows.h>
-#elif defined( TORQUE_OS_MAC )
-#  include <sys/types.h>
-#  include <sys/sysctl.h>
-#else
-#error Not implemented on platform.
-#endif
-#include <stdio.h>
-#include <assert.h>
-
-namespace CPUInfo {
-
-#define HWD_MT_BIT         0x10000000     // EDX[28]  Bit 28 is set if HT or multi-core is supported
-#define NUM_LOGICAL_BITS   0x00FF0000     // EBX[23:16] Bit 16-23 in ebx contains the number of logical
-      // processors per physical processor when execute cpuid with 
-      // eax set to 1
-#define NUM_CORE_BITS      0xFC000000     // EAX[31:26] Bit 26-31 in eax contains the number of cores minus one
-      // per physical processor when execute cpuid with 
-      // eax set to 4. 
-
-
-#define INITIAL_APIC_ID_BITS  0xFF000000  // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique 
-      // initial APIC ID for the processor this code is running on.
-
-
-      #ifndef TORQUE_OS_MAC
-      static U32  CpuIDSupported(void);      
-      static U32  find_maskwidth(unsigned int);
-      static U32  HWD_MTSupported(void);
-      static U32  MaxLogicalProcPerPhysicalProc(void);
-      static U32  MaxCorePerPhysicalProc(void);
-      static U8 GetAPIC_ID(void);
-      static U8 GetNzbSubID(U8, U8, U8);
-      #endif
-
-      static char g_s3Levels[2048];
-
-#ifndef TORQUE_OS_MAC
-
-      //
-      // CpuIDSupported will return 0 if CPUID instruction is unavailable. Otherwise, it will return 
-      // the maximum supported standard function.
-      //
-      static U32 CpuIDSupported(void)
-      {
-         U32 maxInputValue = 0;
-         // If CPUID instruction is supported
-#ifdef TORQUE_COMPILER_GCC
-         try    
-         {		
-            // call cpuid with eax = 0
-            asm
-               (
-               "pushl %%ebx\n\t"
-               "xorl %%eax,%%eax\n\t"
-               "cpuid\n\t"
-               "popl %%ebx\n\t"
-               : "=a" (maxInputValue)
-               : 
-               : "%ecx", "%edx"
-               );		
-         }
-         catch (...)
-         {
-            return(0);                   // cpuid instruction is unavailable
-         }
-#elif defined( TORQUE_COMPILER_VISUALC )
-         try
-         {
-            // call cpuid with eax = 0
-            __asm
-            {
-               xor eax, eax
-                  cpuid
-                  mov maxInputValue, eax
-            }
-         }
-         catch (...)
-         {
-            // cpuid instruction is unavailable
-         }
-#else
-#  error Not implemented.
-#endif
-
-         return maxInputValue;
-      }
-
-
-
-      //
-      // Function returns the maximum cores per physical package. Note that the number of 
-      // AVAILABLE cores per physical to be used by an application might be less than this
-      // maximum value.
-      //
-
-      static U32 MaxCorePerPhysicalProc(void)
-      {
-
-         U32 Regeax        = 0;
-
-         if (!HWD_MTSupported()) return (U32) 1;  // Single core
-#ifdef TORQUE_COMPILER_GCC
-         {
-            asm
-               (
-               "pushl %ebx\n\t"
-               "xorl %eax, %eax\n\t"
-               "cpuid\n\t"
-               "cmpl $4, %eax\n\t"			// check if cpuid supports leaf 4
-               "jl .single_core\n\t"		// Single core
-               "movl $4, %eax\n\t"		
-               "movl $0, %ecx\n\t"			// start with index = 0; Leaf 4 reports
-               "popl %ebx\n\t"
-               );								// at least one valid cache level
-            asm
-               (
-               "cpuid"
-               : "=a" (Regeax)
-               :
-               : "%ecx", "%edx"
-               );		
-            asm
-               (
-               "jmp .multi_core\n"
-               ".single_core:\n\t"
-               "xor %eax, %eax\n"
-               ".multi_core:"
-               );		
-         }
-#elif defined( TORQUE_COMPILER_VISUALC )
-         __asm
-         {
-            xor eax, eax
-               cpuid
-               cmp eax, 4			// check if cpuid supports leaf 4
-               jl single_core		// Single core
-               mov eax, 4			
-               mov ecx, 0			// start with index = 0; Leaf 4 reports
-               cpuid				// at least one valid cache level
-               mov Regeax, eax
-               jmp multi_core
-
-single_core:
-            xor eax, eax		
-
-multi_core:
-
-         }
-#else
-#  error Not implemented.
-#endif
-         return (U32)((Regeax & NUM_CORE_BITS) >> 26)+1;
-
-      }
-
-
-
-      //
-      // The function returns 0 when the hardware multi-threaded bit is not set.
-      //
-      static U32 HWD_MTSupported(void)
-      {
-
-
-         U32 Regedx      = 0;
-
-
-         if ((CpuIDSupported() >= 1))
-         {
-#ifdef TORQUE_COMPILER_GCC
-            asm 
-               (
-               "pushl %%ebx\n\t"
-               "movl $1,%%eax\n\t"
-               "cpuid\n\t"
-               "popl %%ebx\n\t"
-               : "=d" (Regedx)
-               :
-               : "%eax","%ecx"
-               );
-#elif defined( TORQUE_COMPILER_VISUALC )
-            __asm
-            {
-               mov eax, 1
-                  cpuid
-                  mov Regedx, edx
-            }		
-#else
-#  error Not implemented.
-#endif
-         }
-
-         return (Regedx & HWD_MT_BIT);  
-
-
-      }
-
-
-
-      //
-      // Function returns the maximum logical processors per physical package. Note that the number of 
-      // AVAILABLE logical processors per physical to be used by an application might be less than this
-      // maximum value.
-      //
-      static U32 MaxLogicalProcPerPhysicalProc(void)
-      {
-
-         U32 Regebx = 0;
-
-         if (!HWD_MTSupported()) return (U32) 1;
-#ifdef TORQUE_COMPILER_GCC
-         asm 
-            (
-            "movl $1,%%eax\n\t"
-            "cpuid"
-            : "=b" (Regebx)
-            :
-            : "%eax","%ecx","%edx"
-            );
-#elif defined( TORQUE_COMPILER_VISUALC )
-         __asm
-         {
-            mov eax, 1
-               cpuid
-               mov Regebx, ebx
-         }
-#else
-#  error Not implemented.
-#endif
-         return (unsigned int) ((Regebx & NUM_LOGICAL_BITS) >> 16);
-
-      }
-
-
-      static U8 GetAPIC_ID(void)
-      {
-
-         U32 Regebx = 0;
-#ifdef TORQUE_COMPILER_GCC
-         asm
-            (
-            "movl $1, %%eax\n\t"	
-            "cpuid"
-            : "=b" (Regebx) 
-            :
-            : "%eax","%ecx","%edx" 
-            );
-
-#elif defined( TORQUE_COMPILER_VISUALC )
-         __asm
-         {
-            mov eax, 1
-               cpuid
-               mov Regebx, ebx
-         }
-#else
-#  error Not implemented.
-#endif                                
-
-         return (unsigned char) ((Regebx & INITIAL_APIC_ID_BITS) >> 24);
-
-      }
-
-      //
-      // Determine the width of the bit field that can represent the value count_item. 
-      //
-      U32 find_maskwidth(U32 CountItem)
-      {
-         U32 MaskWidth,
-            count = CountItem;
-#ifdef TORQUE_COMPILER_GCC
-         asm
-            (
-#ifdef __x86_64__		// define constant to compile  
-            "push %%rcx\n\t"		// under 64-bit Linux
-            "push %%rax\n\t"
-#else
-            "pushl %%ecx\n\t"
-            "pushl %%eax\n\t"
-#endif
-            //		"movl $count, %%eax\n\t" //done by Assembler below
-            "xorl %%ecx, %%ecx"
-            //		"movl %%ecx, MaskWidth\n\t" //done by Assembler below
-            : "=c" (MaskWidth)
-            : "a" (count)
-            //		: "%ecx", "%eax" We don't list these as clobbered because we don't want the assembler
-            //to put them back when we are done
-            );
-         asm
-            (
-            "decl %%eax\n\t"
-            "bsrw %%ax,%%cx\n\t"
-            "jz next\n\t"
-            "incw %%cx\n\t"
-            //		"movl %%ecx, MaskWidth\n" //done by Assembler below
-            : "=c" (MaskWidth)
-            :
-         );
-         asm
-            (
-            "next:\n\t"
-#ifdef __x86_64__
-            "pop %rax\n\t"
-            "pop %rcx"		
-#else
-            "popl %eax\n\t"
-            "popl %ecx"		
-#endif
-            );
-
-#elif defined( TORQUE_COMPILER_VISUALC )
-         __asm
-         {
-            mov eax, count
-               mov ecx, 0
-               mov MaskWidth, ecx
-               dec eax
-               bsr cx, ax
-               jz next
-               inc cx
-               mov MaskWidth, ecx
-next:
-
-         }
-#else
-#  error Not implemented.
-#endif
-         return MaskWidth;
-      }
-
-
-      //
-      // Extract the subset of bit field from the 8-bit value FullID.  It returns the 8-bit sub ID value
-      //
-      static U8 GetNzbSubID(U8 FullID,
-         U8 MaxSubIDValue,
-         U8 ShiftCount)
-      {
-         U32 MaskWidth;
-         U8 MaskBits;
-
-         MaskWidth = find_maskwidth((U32) MaxSubIDValue);
-         MaskBits  = (0xff << ShiftCount) ^ 
-            ((U8) (0xff << (ShiftCount + MaskWidth)));
-
-         return (FullID & MaskBits);
-      }
-
-#endif
-
-
-      //
-      //
-      //
-      EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
-      {
-         EConfig StatusFlag = CONFIG_UserConfigIssue;
-
-         g_s3Levels[0] = 0;
-         TotAvailCore = 1;
-         PhysicalNum  = 1;
-         
-         U32 numLPEnabled = 0;
-         S32 MaxLPPerCore = 1;
-
-#ifdef TORQUE_OS_MAC
-
-         //FIXME: This isn't a proper port but more or less just some sneaky cheating
-         //  to get around having to mess with yet another crap UNIX-style API.  Seems
-         //  like there isn't a way to do this that's working across all OSX incarnations
-         //  and machine configurations anyway.
-
-         S32 numCPUs;
-         S32 numPackages;
-
-         // Get the number of CPUs.
-
-         size_t len = sizeof( numCPUs );
-         if( sysctlbyname( "hw.ncpu", &numCPUs, &len, 0, 0 ) == -1 )
-            return CONFIG_UserConfigIssue;
-
-         // Get the number of packages.
-         len = sizeof( numPackages );
-         if( sysctlbyname( "hw.packages", &numPackages, &len, 0, 0 ) == -1 )
-            return CONFIG_UserConfigIssue;
-
-         TotAvailCore = numCPUs;
-         TotAvailLogical = numCPUs;
-         PhysicalNum = numPackages;
-#else
-
-         U32 dwAffinityMask;
-         S32 j = 0;
-         U8 apicID, PackageIDMask;
-         U8 tblPkgID[256], tblCoreID[256], tblSMTID[256];
-         char	tmp[256];
-
-#ifdef TORQUE_OS_LINUX
-         //we need to make sure that this process is allowed to run on 
-         //all of the logical processors that the OS itself can run on.
-         //A process could acquire/inherit affinity settings that restricts the 
-         // current process to run on a subset of all logical processor visible to OS.
-
-         // Linux doesn't easily allow us to look at the Affinity Bitmask directly,
-         // but it does provide an API to test affinity maskbits of the current process 
-         // against each logical processor visible under OS.
-         S32 sysNumProcs = sysconf(_SC_NPROCESSORS_CONF); //This will tell us how many 
-         //CPUs are currently enabled.
-
-         //this will tell us which processors this process can run on. 
-         cpu_set_t allowedCPUs;	 
-         sched_getaffinity(0, sizeof(allowedCPUs), &allowedCPUs);
-
-         for (S32 i = 0; i < sysNumProcs; i++ )
-         {
-            if ( CPU_ISSET(i, &allowedCPUs) == 0 )
-               return CONFIG_UserConfigIssue;
-         }
-#elif defined( TORQUE_OS_WIN )
-         DWORD dwProcessAffinity, dwSystemAffinity;
-         GetProcessAffinityMask(GetCurrentProcess(), 
-            &dwProcessAffinity,
-            &dwSystemAffinity);
-         if (dwProcessAffinity != dwSystemAffinity)  // not all CPUs are enabled
-            return CONFIG_UserConfigIssue;
-#else
-#  error Not implemented.
-#endif
-
-         // Assume that cores within a package have the SAME number of 
-         // logical processors.  Also, values returned by
-         // MaxLogicalProcPerPhysicalProc and MaxCorePerPhysicalProc do not have
-         // to be power of 2.
-
-         MaxLPPerCore = MaxLogicalProcPerPhysicalProc() / MaxCorePerPhysicalProc();
-         dwAffinityMask = 1;
-
-#ifdef TORQUE_OS_LINUX
-         cpu_set_t currentCPU;
-         while ( j < sysNumProcs )
-         {
-            CPU_ZERO(&currentCPU);
-            CPU_SET(j, &currentCPU);
-            if ( sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == 0 )
-            {
-               sleep(0);  // Ensure system to switch to the right CPU
-#elif defined( TORQUE_OS_WIN )
-         while (dwAffinityMask && dwAffinityMask <= dwSystemAffinity)
-         {
-            if (SetThreadAffinityMask(GetCurrentThread(), dwAffinityMask))
-            {
-               Sleep(0);  // Ensure system to switch to the right CPU
-#else
-#  error Not implemented.
-#endif
-               apicID = GetAPIC_ID();
-
-
-               // Store SMT ID and core ID of each logical processor
-               // Shift vlaue for SMT ID is 0
-               // Shift value for core ID is the mask width for maximum logical
-               // processors per core
-
-               tblSMTID[j]  = GetNzbSubID(apicID, MaxLPPerCore, 0);
-               U8 maxCorePPP = MaxCorePerPhysicalProc();
-               U8 maskWidth = find_maskwidth(MaxLPPerCore);
-               tblCoreID[j] = GetNzbSubID(apicID, maxCorePPP, maskWidth);
-
-               // Extract package ID, assume single cluster.
-               // Shift value is the mask width for max Logical per package
-
-               PackageIDMask = (unsigned char) (0xff << 
-                  find_maskwidth(MaxLogicalProcPerPhysicalProc()));
-
-               tblPkgID[j] = apicID & PackageIDMask;
-               sprintf(tmp,"  AffinityMask = %d; Initial APIC = %d; Physical ID = %d, Core ID = %d,  SMT ID = %d\n",
-                  dwAffinityMask, apicID, tblPkgID[j], tblCoreID[j], tblSMTID[j]);
-               dStrcat(g_s3Levels, tmp, 2048);
-
-               numLPEnabled ++;   // Number of available logical processors in the system.
-
-            } // if
-
-            j++;  
-            dwAffinityMask = 1 << j;
-         } // while
-
-         // restore the affinity setting to its original state
-#ifdef TORQUE_OS_LINUX
-         sched_setaffinity (0, sizeof(allowedCPUs), &allowedCPUs);
-         sleep(0);
-#elif defined( TORQUE_OS_WIN )
-         SetThreadAffinityMask(GetCurrentThread(), dwProcessAffinity);
-         Sleep(0);
-#else
-#  error Not implemented.
-#endif
-         TotAvailLogical = numLPEnabled;
-
-         //
-         // Count available cores (TotAvailCore) in the system
-         //
-         U8 CoreIDBucket[256];
-         DWORD ProcessorMask, pCoreMask[256];
-         U32 i, ProcessorNum;
-
-         CoreIDBucket[0] = tblPkgID[0] | tblCoreID[0];
-         ProcessorMask = 1;
-         pCoreMask[0] = ProcessorMask;
-
-         for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
-         {
-            ProcessorMask <<= 1;
-            for (i = 0; i < TotAvailCore; i++)
-            {
-               // Comparing bit-fields of logical processors residing in different packages
-               // Assuming the bit-masks are the same on all processors in the system.
-               if ((tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum]) == CoreIDBucket[i])
-               {
-                  pCoreMask[i] |= ProcessorMask;
-                  break;
-               }
-
-            }  // for i
-
-            if (i == TotAvailCore)   // did not match any bucket.  Start a new one.
-            {
-               CoreIDBucket[i] = tblPkgID[ProcessorNum] | tblCoreID[ProcessorNum];
-               pCoreMask[i] = ProcessorMask;
-
-               TotAvailCore++;	// Number of available cores in the system
-
-            }
-
-         }  // for ProcessorNum
-
-
-         //
-         // Count physical processor (PhysicalNum) in the system
-         //
-         U8 PackageIDBucket[256];
-         DWORD pPackageMask[256];
-
-         PackageIDBucket[0] = tblPkgID[0];
-         ProcessorMask = 1;
-         pPackageMask[0] = ProcessorMask;
-
-         for (ProcessorNum = 1; ProcessorNum < numLPEnabled; ProcessorNum++)
-         {
-            ProcessorMask <<= 1;
-            for (i = 0; i < PhysicalNum; i++)
-            {
-               // Comparing bit-fields of logical processors residing in different packages
-               // Assuming the bit-masks are the same on all processors in the system.
-               if (tblPkgID[ProcessorNum]== PackageIDBucket[i])
-               {
-                  pPackageMask[i] |= ProcessorMask;
-                  break;
-               }
-
-            }  // for i
-
-            if (i == PhysicalNum)   // did not match any bucket.  Start a new one.
-            {
-               PackageIDBucket[i] = tblPkgID[ProcessorNum];
-               pPackageMask[i] = ProcessorMask;
-
-               PhysicalNum++;	// Total number of physical processors in the system
-
-            }
-
-         }  // for ProcessorNum
-#endif
-
-         //
-         // Check to see if the system is multi-core 
-         // Check if the system is hyper-threading
-         //
-         if (TotAvailCore > PhysicalNum) 
-         {
-            // Multi-core
-            if (MaxLPPerCore == 1)
-               StatusFlag = CONFIG_MultiCoreAndHTNotCapable;
-            else if (numLPEnabled > TotAvailCore)
-               StatusFlag = CONFIG_MultiCoreAndHTEnabled;
-            else StatusFlag = CONFIG_MultiCoreAndHTDisabled;
-
-         }
-         else
-         {
-            // Single-core
-            if (MaxLPPerCore == 1)
-               StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
-            else if (numLPEnabled > TotAvailCore)
-               StatusFlag = CONFIG_SingleCoreHTEnabled;
-            else StatusFlag = CONFIG_SingleCoreHTDisabled;
-
-
-         }
-
-
-
-         return StatusFlag;
-      }
-
-} // namespace CPUInfo
-#endif
-
-#endif

+ 3 - 8
Engine/source/platform/platformCPUCount.h

@@ -29,13 +29,10 @@ namespace CPUInfo
 {
    enum EConfig
    {
-      CONFIG_UserConfigIssue,
       CONFIG_SingleCoreHTEnabled,
-      CONFIG_SingleCoreHTDisabled,
       CONFIG_SingleCoreAndHTNotCapable,
       CONFIG_MultiCoreAndHTNotCapable,
       CONFIG_MultiCoreAndHTEnabled,
-      CONFIG_MultiCoreAndHTDisabled,
    };
 
    inline bool isMultiCore( EConfig config )
@@ -44,7 +41,6 @@ namespace CPUInfo
       {
       case CONFIG_MultiCoreAndHTNotCapable:
       case CONFIG_MultiCoreAndHTEnabled:
-      case CONFIG_MultiCoreAndHTDisabled:
          return true;
 
       default:
@@ -65,11 +61,10 @@ namespace CPUInfo
       }
    }
 
-   EConfig CPUCount( U32& totalAvailableLogical,
-      U32& totalAvailableCores,
-      U32& numPhysical );
-
+   EConfig CPUCount( U32& totalAvailableLogical, U32& totalAvailableCores );
 } // namespace CPUInfo
 
+void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo, const char* vendor, const char* brand);
+
 #endif // _TORQUE_PLATFORM_PLATFORMCOUNT_H_
 

+ 0 - 128
Engine/source/platform/platformCPUInfo.asm

@@ -1,128 +0,0 @@
-;-----------------------------------------------------------------------------
-; Copyright (c) 2012 GarageGames, LLC
-;
-; Permission is hereby granted, free of charge, to any person obtaining a copy
-; of this software and associated documentation files (the "Software"), to
-; deal in the Software without restriction, including without limitation the
-; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-; sell copies of the Software, and to permit persons to whom the Software is
-; furnished to do so, subject to the following conditions:
-;
-; The above copyright notice and this permission notice shall be included in
-; all copies or substantial portions of the Software.
-;
-; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-; IN THE SOFTWARE.
-;-----------------------------------------------------------------------------
-
-        
-segment .text
-
-; syntax: export_fn <function name>
-%macro export_fn 1
-   %ifidn __OUTPUT_FORMAT__, elf
-   ; No underscore needed for ELF object files
-   global %1
-   %1:
-   %else
-   global _%1
-   _%1:
-   %endif
-%endmacro
-
-; push registers 
-%macro pushreg 0
-;    pushad
-    push ebx
-    push ebp
-    push esi
-    push edi
-%endmacro
-
-; pop registers
-%macro popreg 0
-    pop edi
-    pop esi
-    pop ebp
-    pop ebx
-;    popad
-%endmacro
-      
-; void detectX86CPUInfo(char *vendor, U32 *processor, U32 *properties);
-export_fn detectX86CPUInfo
-   push         ebp
-   mov          ebp, esp
-
-   pushreg
-
-   push         edx
-   push         ecx
-   pushfd
-   pushfd                        ; save EFLAGS to stack
-   pop          eax              ; move EFLAGS into EAX
-   mov          ebx, eax
-   xor          eax, 0x200000    ; flip bit 21
-   push         eax
-   popfd                         ; restore EFLAGS
-   pushfd
-   pop          eax
-   cmp          eax, ebx
-   jz           EXIT             ; doesn't support CPUID instruction
-
-   ;
-   ; get vendor information using CPUID eax == 0
-   xor          eax, eax
-   cpuid
-
-   ; store the vendor tag (12 bytes in ebx, edx, ecx) in the first parameter,
-   ; which should be a char[13]
-   push         eax             ; save eax
-   mov          eax, [ebp+8]    ; store the char* address in eax
-   mov          [eax], ebx      ; move ebx into the first 4 bytes
-   add          eax, 4          ; advance the char* 4 bytes
-   mov          [eax], edx      ; move edx into the next 4 bytes
-   add          eax, 4          ; advance the char* 4 bytes
-   mov          [eax], ecx      ; move ecx into the last 4 bytes
-   pop          eax             ; restore eax
-        
-   ; get generic extended CPUID info
-   mov          eax, 1
-   cpuid                         ; eax=1, so cpuid queries feature information
-
-   and          eax, 0x0fff3fff
-   push         ecx
-   mov          ecx, [ebp+12]
-   mov          [ecx], eax      ; just store the model bits in processor param
-   mov          ecx, [ebp+16]
-   mov          [ecx], edx      ; set properties param
-   pop          ecx
-
-   ; want to check for 3DNow(tm).  
-   ; need to see if extended cpuid functions present. 
-   mov          eax, 0x80000000
-   cpuid
-   cmp          eax, 0x80000000
-   jbe          MAYBE_3DLATER
-   mov          eax, 0x80000001
-   cpuid
-   ; 3DNow if bit 31 set -> put bit in our properties        
-   and          edx, 0x80000000  
-   push         eax
-   mov          eax, [ebp+16]
-   or           [eax], edx
-   pop          eax
-MAYBE_3DLATER:
-EXIT:
-   popfd
-   pop          ecx
-   pop          edx
-
-   popreg
-
-   pop          ebp
-   ret

+ 1 - 2
Engine/source/platform/threads/threadPool.cpp

@@ -322,10 +322,9 @@ ThreadPool::ThreadPool( const char* name, U32 numThreads )
       // Platform::SystemInfo will not yet have been initialized.
       
       U32 numLogical = 0;
-      U32 numPhysical = 0;
       U32 numCores = 0;
 
-      CPUInfo::CPUCount( numLogical, numCores, numPhysical );
+      CPUInfo::CPUCount( numLogical, numCores );
       
       const U32 baseCount = getMax( numLogical, numCores );
       mNumThreads = (baseCount > 0) ? baseCount : 2;

+ 143 - 173
Engine/source/platformMac/macCPU.mm

@@ -35,15 +35,6 @@
 // we now have to use NSProcessInfo
 #import <Foundation/Foundation.h>
 
-//recently removed in Xcode 8 - most likely don't need these anymore
-#ifndef CPUFAMILY_INTEL_YONAH
-#define CPUFAMILY_INTEL_YONAH		0x73d67300
-#endif
-
-#ifndef CPUFAMILY_INTEL_MEROM
-#define CPUFAMILY_INTEL_MEROM		0x426f69ef
-#endif
-
 // Original code by Sean O'Brien (http://www.garagegames.com/community/forums/viewthread/81815).
 
 
@@ -89,204 +80,183 @@ int _getSysCTLvalue(const char key[], T * dest) {
 
 Platform::SystemInfo_struct Platform::SystemInfo;
 
-#define BASE_MHZ_SPEED      0
-//TODO update cpu list
+#define BASE_MHZ_SPEED 1000
+#define BASE_APPLE_SILICON_MHZ_SPEED 3200
+
+static void detectCpuFeatures(U32 &procflags)
+{
+   // Now we can directly query the system about a litany of "Optional" processor capabilities
+   // and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request
+   // a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists
+   // >>>> BUT <<<<<
+   // it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's
+   // actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined.
+   
+   int err;
+   U32 lraw;
+   
+   // All Cpus have fpu
+   procflags = CPU_PROP_C | CPU_PROP_FPU;
+   
+#if defined(TORQUE_CPU_X86) || defined(TORQUE_CPU_X64)
+   
+   // List of chip-specific features
+   err = _getSysCTLvalue<U32>("hw.optional.mmx", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_MMX;
+   err = _getSysCTLvalue<U32>("hw.optional.sse", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE;
+   err = _getSysCTLvalue<U32>("hw.optional.sse2", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE2;
+   err = _getSysCTLvalue<U32>("hw.optional.sse3", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE3;
+   err = _getSysCTLvalue<U32>("hw.optional.supplementalsse3", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE3ex;
+   err = _getSysCTLvalue<U32>("hw.optional.sse4_1", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE4_1;
+   err = _getSysCTLvalue<U32>("hw.optional.sse4_2", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_SSE4_2;
+   err = _getSysCTLvalue<U32>("hw.optional.avx1_0", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_AVX;
+   
+#elif defined(TORQUE_CPU_ARM64)
+   
+   err = _getSysCTLvalue<U32>("hw.optional.neon", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_NEON;
+   
+#endif
+
+   err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
+   if ((err==0)&&(lraw>1))
+      procflags |= CPU_PROP_MP;
+   err = _getSysCTLvalue<U32>("hw.cpu64bit_capable", &lraw);
+   if ((err==0)&&(lraw==1))
+      procflags |= CPU_PROP_64bit;
+   err = _getSysCTLvalue<U32>("hw.byteorder", &lraw);
+   if ((err==0)&&(lraw==1234))
+      procflags |= CPU_PROP_LE;
+}
+
 void Processor::init()
 {
-	U32 procflags;
+	U32 procflags = 0;
 	int err, cpufam, cputype, cpusub;
 	char buf[255];
 	U32 lraw;
 	U64 llraw;
-	
-	Con::printf( "System & Processor Information:" );
 
-   // Gestalt has been deprecated since Mac OSX Mountain Lion and has stopped working on
-   // Mac OSX Yosemite. we have to use NSProcessInfo now.
    // Availability: Mac OS 10.2 or greater.
    NSString *osVersionStr = [[NSProcessInfo processInfo] operatingSystemVersionString];
-   Con::printf( "   OSX Version: %s", [osVersionStr UTF8String]);
-	
-	err = _getSysCTLstring("kern.ostype", buf, sizeof(buf));	
-	if (err)
-		Con::printf( "   Unable to determine OS type\n" );
-	else
-		Con::printf( "   Mac OS Kernel name: %s", buf);
-	
-	err = _getSysCTLstring("kern.osrelease", buf, sizeof(buf));	
-	if (err)
-		Con::printf( "   Unable to determine OS release number\n" );
-	else
-		Con::printf( "   Mac OS Kernel version: %s", buf );
-	
+
+   S32 ramMB;
 	err = _getSysCTLvalue<U64>("hw.memsize", &llraw);
 	if (err)
-		Con::printf( "   Unable to determine amount of physical RAM\n" );
+      ramMB = 512;
 	else
-		Con::printf( "   Physical memory installed: %d MB", (llraw >> 20));
-	
-	err = _getSysCTLvalue<U32>("hw.usermem", &lraw);
-	if (err)
-		Con::printf( "   Unable to determine available user address space\n");
-	else
-		Con::printf( "   Addressable user memory: %d MB", (lraw >> 20));
-	
-	////////////////////////////////
-	// Values for the Family Type, CPU Type and CPU Subtype are defined in the
-	// SDK files for the Mach Kernel ==>  mach/machine.h
-	////////////////////////////////
+      ramMB = llraw >> 20;
 	
-	// CPU Family, Type, and Subtype
-	cpufam = 0;
-	cputype = 0;
-	cpusub = 0;
-	err = _getSysCTLvalue<U32>("hw.cpufamily", &lraw);
-	if (err)
-		Con::printf( "   Unable to determine 'family' of CPU\n");
-	else {
-		cpufam = (int) lraw;
-		err = _getSysCTLvalue<U32>("hw.cputype", &lraw);
-		if (err)
-			Con::printf( "   Unable to determine CPU type\n");
-		else {
-			cputype = (int) lraw;
-			err = _getSysCTLvalue<U32>("hw.cpusubtype", &lraw);
-			if (err)
-				Con::printf( "   Unable to determine CPU subtype\n");
-			else
-				cpusub = (int) lraw;
-			// If we've made it this far, 
-			Con::printf( "   Installed processor ID: Family 0x%08x  Type %d  Subtype %d",cpufam, cputype,cpusub);
-		}
-	}
-	
-	// The Gestalt version was known to have issues with some Processor Upgrade cards
-	// but it is uncertain whether this version has similar issues.
+   char brandString[256];
+   err = _getSysCTLstring("machdep.cpu.brand_string", brandString, sizeof(brandString));
+   if (err)
+      brandString[0] = '\0';
+   
+   char vendor[256];
+   err = _getSysCTLstring("machdep.cpu.vendor", vendor, sizeof(vendor));
+   if (err)
+      vendor[0] = '\0';
+   
+   // Note: hw.cpufrequency seems to be missing on the M1. For Apple Silicon,
+   // we will assume the base frequency of the M1 which is 3.2ghz
 	err = _getSysCTLvalue<U64>("hw.cpufrequency", &llraw);
 	if (err) {
+#if defined(TORQUE_CPU_ARM64)
+      llraw = BASE_APPLE_SILICON_MHZ_SPEED;
+#else
 		llraw = BASE_MHZ_SPEED;
-		Con::printf( "   Unable to determine CPU Frequency. Defaulting to %d MHz\n", llraw);
+#endif
 	} else {
 		llraw /= 1000000;
-		Con::printf( "   Installed processor clock frequency: %d MHz", llraw);
 	}
 	Platform::SystemInfo.processor.mhz = (unsigned int)llraw;
 	
-	// Here's one that the original version of this routine couldn't do -- number
-	// of processors (cores)
-   U32 ncpu = 1;
-	err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
-	if (err)
-		Con::printf( "   Unable to determine number of processor cores\n");
-	else
-   {
-      ncpu = lraw;
-		Con::printf( "   Installed/available processor cores: %d", lraw);
-   }
-	
-	// Now use CPUFAM to determine and then store the processor type
-	// and 'friendly name' in GG-accessible structure. Note that since
-	// we have access to the Family code, the Type and Subtypes are useless.
-	//
-	// NOTE: Even this level of detail is almost assuredly not needed anymore
-	// and the Optional Capability flags (further down) should be more than enough.
-	switch(cpufam)
-	{
-		case CPUFAMILY_INTEL_YONAH:
-			Platform::SystemInfo.processor.type = CPU_Intel_Core;
-         if( ncpu == 2 )
-            Platform::SystemInfo.processor.name = StringTable->insert("Intel Core Duo");
-         else
-            Platform::SystemInfo.processor.name = StringTable->insert("Intel Core");
-			break;
-      case CPUFAMILY_INTEL_PENRYN:
-		case CPUFAMILY_INTEL_MEROM:
-			Platform::SystemInfo.processor.type = CPU_Intel_Core2;
-         if( ncpu == 4 )
-            Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Quad");
-         else
-            Platform::SystemInfo.processor.name = StringTable->insert("Intel Core 2 Duo");
-			break;
-         
-      case CPUFAMILY_INTEL_NEHALEM:
-         Platform::SystemInfo.processor.type = CPU_Intel_Core2;
-         Platform::SystemInfo.processor.name = StringTable->insert( "Intel 'Nehalem' Core Processor" );
-         break;
-      
-		default:
-			// explain why we can't get the processor type.
-			Con::warnf( "   Unknown Processor (family, type, subtype): 0x%x\t%d  %d", cpufam, cputype, cpusub);
-			// for now, identify it as an x86 processor, because Apple is moving to Intel chips...
-			Platform::SystemInfo.processor.type = CPU_X86Compatible;
-			Platform::SystemInfo.processor.name = StringTable->insert("Unknown Processor, assuming x86 Compatible");
-			break;
-	}
-   // Now we can directly query the system about a litany of "Optional" processor capabilities
-	// and determine the status by using BOTH the 'err' value and the 'lraw' value. If we request
-	// a non-existant feature from SYSCTL(), the 'err' result will be -1; 0 denotes it exists 
-	// >>>> BUT <<<<<
-	// it may not be supported, only defined. Thus we need to check 'lraw' to determine if it's 
-	// actually supported/implemented by the processor: 0 = no, 1 = yes, others are undefined.
-	procflags = 0;
-	// Seriously this one should be an Assert()
-	err = _getSysCTLvalue<U32>("hw.optional.floatingpoint", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_FPU;
-	// List of chip-specific features
-	err = _getSysCTLvalue<U32>("hw.optional.mmx", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_MMX;
-	err = _getSysCTLvalue<U32>("hw.optional.sse", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE;
-	err = _getSysCTLvalue<U32>("hw.optional.sse2", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE2;
-	err = _getSysCTLvalue<U32>("hw.optional.sse3", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3;
-	err = _getSysCTLvalue<U32>("hw.optional.supplementalsse3", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE3xt;
-	err = _getSysCTLvalue<U32>("hw.optional.sse4_1", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_1;
-	err = _getSysCTLvalue<U32>("hw.optional.sse4_2", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_SSE4_2;
-
-	// Finally some architecture-wide settings
-	err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
-	if ((err==0)&&(lraw>1)) procflags |= CPU_PROP_MP;
-	err = _getSysCTLvalue<U32>("hw.cpu64bit_capable", &lraw);
-	if ((err==0)&&(lraw==1)) procflags |= CPU_PROP_64bit;
-	err = _getSysCTLvalue<U32>("hw.byteorder", &lraw);
-	if ((err==0)&&(lraw==1234)) procflags |= CPU_PROP_LE;
-
-	Platform::SystemInfo.processor.properties = procflags;
-	
-	Con::printf( "%s, %2.2f GHz", Platform::SystemInfo.processor.name, F32( Platform::SystemInfo.processor.mhz ) / 1000.0 );
+   detectCpuFeatures(procflags);
+   
+   Platform::SystemInfo.processor.properties = procflags;
+   SetProcessorInfo(Platform::SystemInfo.processor, vendor, brandString);
+   
+   
+   Con::printf("System & Processor Information:");
+   Con::printf("   MacOS Version: %s", [osVersionStr UTF8String]);
+   Con::printf("   Physical memory installed: %d MB", ramMB);
+   Con::printf("   Processor: %s", Platform::SystemInfo.processor.name);
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
-		Con::printf( "   MMX detected");
+		Con::printf("      MMX detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
-		Con::printf( "   SSE detected");
+		Con::printf("      SSE detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
-		Con::printf( "   SSE2 detected");
+		Con::printf("      SSE2 detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
-		Con::printf( "   SSE3 detected");
+		Con::printf("      SSE3 detected");
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
+      Con::printf("      SSE3ex detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
-		Con::printf( "   SSE4.1 detected");
+		Con::printf("      SSE4.1 detected");
 	if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
-		Con::printf( "   SSE4.2 detected");
-	
+		Con::printf("      SSE4.2 detected");
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
+      Con::printf("      AVX detected");
+	if (Platform::SystemInfo.processor.properties & CPU_PROP_NEON)
+      Con::printf("      Neon detected");
+
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
+      Con::printf("   MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
+   
 	Con::printf( "" );
    
    // Trigger the signal
    Platform::SystemInfoReady.trigger();
 }
 
+
 namespace CPUInfo {
-   EConfig CPUCount(U32 &logical, U32 &numCores, U32 &numPhysical) {
-      // todo properly implement this
-      logical = [[NSProcessInfo processInfo] activeProcessorCount];
-      numCores = [[NSProcessInfo processInfo] activeProcessorCount];
-      numPhysical = [[NSProcessInfo processInfo] processorCount];
+   EConfig CPUCount(U32 &logical, U32 &physical) {
+      U32 lraw;
+      int err;
+      
+      err = _getSysCTLvalue<U32>("hw.physicalcpu", &lraw);
+      if (err == 0)
+         physical = lraw;
+      else
+         physical = 1;
+      
+      err = _getSysCTLvalue<U32>("hw.logicalcpu", &lraw);
+      if (err == 0)
+      {
+         logical = lraw;
+      }
+      else
+      {
+         // fallback to querying the number of cpus. If that fails, then assume same as number of cores
+         err = _getSysCTLvalue<U32>("hw.ncpu", &lraw);
+         if (err == 0)
+            logical = lraw;
+         else
+            logical = physical;
+      }
+      
+      const bool smtEnabled = logical > physical;
+      
+      if (physical == 1)
+         return smtEnabled ? CONFIG_SingleCoreHTEnabled : CONFIG_SingleCoreAndHTNotCapable;
       
-      // todo check for hyperthreading
-      if (numCores > 1)
-         return CONFIG_MultiCoreAndHTNotCapable;
-      return CONFIG_SingleCoreAndHTNotCapable;
+      return smtEnabled ? CONFIG_MultiCoreAndHTEnabled : CONFIG_MultiCoreAndHTNotCapable;
    }
 }

+ 0 - 3
Engine/source/platformMac/macMath.mm

@@ -27,8 +27,6 @@
 #include "console/engineAPI.h"
 
 extern void mInstallLibrary_C();
-extern void mInstallLibrary_Vec();
-extern void mInstall_Library_SSE();
 
 static MRandomLCG sgPlatRandom;
 
@@ -115,7 +113,6 @@ void Math::init(U32 properties)
    if( properties & CPU_PROP_SSE )
    {
       Con::printf( "   Installing SSE extensions" );
-      mInstall_Library_SSE();
    }
    #endif
    

+ 214 - 11
Engine/source/platformPOSIX/POSIXCPUInfo.cpp

@@ -22,30 +22,233 @@
 
 #ifndef __APPLE__
 
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+
 #include "platform/platform.h"
 #include "platformPOSIX/platformPOSIX.h"
+#include "platform/platformCPUCount.h"
+
 #include "console/console.h"
-#include "core/stringTable.h"
-#include "core/strings/stringFunctions.h"
-#include <math.h>
 
-#include "platform/platformCPUCount.h"
 #include <unistd.h>
 
 Platform::SystemInfo_struct Platform::SystemInfo;
 
-void Processor::init() {}
+static inline void rtrim(std::string &s)
+{
+    s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
+}
+
+static inline void ltrim(std::string &s)
+{
+    s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
+}
+
+static void getCPUInformation()
+{
+    std::string vendorString;
+    std::string brandString;
+
+    std::ifstream cpuInfo("/proc/cpuinfo");
+
+    U32 logicalCoreCount = 0;
+    U32 physicalCoreCount = 1;
+
+    if (cpuInfo.is_open())
+    {
+        // Load every line of the CPU Info
+        std::string line;
+
+        while (std::getline(cpuInfo, line))
+        {
+            std::string fieldName = line.substr(0, line.find(":"));
+            rtrim(fieldName);
+
+            // Entries are newline separated
+            if (fieldName == "")
+            {
+                ++logicalCoreCount;
+                continue;
+            }
+
+            std::string fieldValue = line.substr(line.find(":") + 1, line.length());
+            ltrim(fieldValue);
+            rtrim(fieldValue);
+
+            // Load fields
+            if (fieldName == "vendor_id")
+            {
+                vendorString = fieldValue.c_str();
+            }
+            else if (fieldName == "model name")
+            {
+                brandString = fieldValue.c_str();
+            }
+            else if (fieldName == "cpu cores")
+            {
+                physicalCoreCount = dAtoui(fieldValue.c_str());
+            }
+            else if (fieldName == "flags")
+            {
+                std::vector<std::string> flags;
+                std::istringstream flagStream(fieldValue);
+
+                std::string currentFlag;
+                while (std::getline(flagStream, currentFlag, ' '))
+                {
+                    flags.push_back(currentFlag);
+                }
+
+                // Set CPU flags
+                if (std::find(flags.begin(), flags.end(), "fpu") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_FPU;
+                }
+
+                if (std::find(flags.begin(), flags.end(), "sse3") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3;
+                }
+
+                if (std::find(flags.begin(), flags.end(), "avx") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_AVX;
+                }
+
+                if (std::find(flags.begin(), flags.end(), "ssse3") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_SSE3ex;
+                }
+
+                if (std::find(flags.begin(), flags.end(), "sse") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_SSE;
+                }
+
+                if (std::find(flags.begin(), flags.end(), "sse2") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_SSE2;
+                }
+
+                if (std::find(flags.begin(), flags.end(), "sse4_1") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_1;
+                }
+
+                if (std::find(flags.begin(), flags.end(), "sse4_2") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_SSE4_2;
+                }
+
+                if (std::find(flags.begin(), flags.end(), "mmx") != flags.end())
+                {
+                    Platform::SystemInfo.processor.properties |= CPU_PROP_MMX;
+                }
+            }
+        }
+
+        cpuInfo.close();
+    }
+    else
+    {
+        logicalCoreCount = 1;
+    }
+
+    Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount;
+    Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount;
+    Platform::SystemInfo.processor.isHyperThreaded = logicalCoreCount != physicalCoreCount;
+    Platform::SystemInfo.processor.isMultiCore = physicalCoreCount != 1;
+    Platform::SystemInfo.processor.numLogicalProcessors = logicalCoreCount;
+    Platform::SystemInfo.processor.numPhysicalProcessors = physicalCoreCount;
+    if (Platform::SystemInfo.processor.isMultiCore)
+    {
+        Platform::SystemInfo.processor.properties |= CPU_PROP_MP;
+    }
+
+    // Load processor base frequency
+    std::ifstream baseFrequencyStream("/sys/devices/system/cpu/cpu0/cpufreq/base_frequency");
+    if (baseFrequencyStream.is_open())
+    {
+        U32 baseFrequencyKHz = 0;
+        baseFrequencyStream >> baseFrequencyKHz;
+
+        Platform::SystemInfo.processor.mhz = baseFrequencyKHz / 1000;
+        baseFrequencyStream.close();
+    }
+
+    SetProcessorInfo(Platform::SystemInfo.processor, vendorString.c_str(), brandString.c_str());
+}
+
+void Processor::init() 
+{
+    getCPUInformation();
+
+#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_X32)
+    // Set sane default information
+    Platform::SystemInfo.processor.properties |= CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE ;
+
+#elif defined(TORQUE_CPU_ARM32) || defined(TORQUE_CPU_ARM64)
+    Platform::SystemInfo.processor.type = CPU_ArmCompatible;
+    Platform::SystemInfo.processor.name = StringTable->insert("Unknown ARM Processor");
+    Platform::SystemInfo.processor.properties = CPU_PROP_C;
+#else
+#warning Unsupported CPU
+#endif
+
+    // Set 64bit flag
+#if defined(TORQUE_CPU_X64) || defined(TORQUE_CPU_ARM64)
+    Platform::SystemInfo.processor.properties |= CPU_PROP_64bit;
+#endif
+
+    // Once CPU information is resolved, produce an output like Windows does
+    Con::printf("Processor Init:");
+    Con::printf("   Processor: %s", Platform::SystemInfo.processor.name);
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
+        Con::printf("      MMX detected" );
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
+        Con::printf("      SSE detected" );
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
+        Con::printf("      SSE2 detected" );
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
+        Con::printf("      SSE3 detected" );
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
+        Con::printf("      SSE4.1 detected" );
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
+        Con::printf("      SSE4.2 detected" );
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
+        Con::printf("      AVX detected" );
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
+        Con::printf("      SSE3ex detected" );
+    if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
+        Con::printf("   MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
+
+    Con::printf(" ");
+}
 
-// TODO LINUX CPUInfo::CPUCount better support
 namespace CPUInfo
 {
-    EConfig CPUCount(U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum)
+    EConfig CPUCount(U32 &logical, U32 &physical)
     {
-        PhysicalNum = TotAvailCore = 0;
-        TotAvailLogical = (int)sysconf(_SC_NPROCESSORS_ONLN);
+        // We don't set logical or physical here because it's already been determined by this point
+        if (Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1)
+        {
+            return CONFIG_SingleCoreHTEnabled;
+        }
+        else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors > 1)
+        {
+            return CONFIG_MultiCoreAndHTNotCapable;
+        }
+        else if (!Platform::SystemInfo.processor.isHyperThreaded && Platform::SystemInfo.processor.numPhysicalProcessors == 1)
+        {
+            return CONFIG_SingleCoreAndHTNotCapable;   
+        }
 
-       return CONFIG_SingleCoreHTDisabled;
+        return CONFIG_MultiCoreAndHTEnabled;
     }
-}; // namespace CPUInfo 
+}; // namespace CPUInfo
 
 #endif

+ 0 - 15
Engine/source/platformPOSIX/POSIXMath.cpp

@@ -31,11 +31,6 @@
 extern void mInstallLibrary_C();
 extern void mInstallLibrary_ASM();
 
-
-extern void mInstall_AMD_Math();
-extern void mInstall_Library_SSE();
-
-
 //--------------------------------------
 DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
                                      "@brief Install the math library with specified extensions.\n\n"
@@ -70,10 +65,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
          properties |= CPU_PROP_MMX;
          continue;
       }
-      if (dStricmp(*argv, "3DNOW") == 0) {
-         properties |= CPU_PROP_3DNOW;
-         continue;
-      }
       if (dStricmp(*argv, "SSE") == 0) {
          properties |= CPU_PROP_SSE;
          continue;
@@ -112,18 +103,12 @@ void Math::init(U32 properties)
    if (properties & CPU_PROP_MMX)
    {
       Con::printf("   Installing MMX extensions");
-      if (properties & CPU_PROP_3DNOW)
-      {
-         Con::printf("   Installing 3DNow extensions");
-         mInstall_AMD_Math();
-      }
    }
 
 #if !defined(__MWERKS__) || (__MWERKS__ >= 0x2400)
    if (properties & CPU_PROP_SSE)
    {
       Con::printf("   Installing SSE extensions");
-      mInstall_Library_SSE();
    }
 #endif //mwerks>2.4
 

+ 106 - 59
Engine/source/platformWin32/winCPUInfo.cpp

@@ -24,13 +24,89 @@
 #include "platformWin32/platformWin32.h"
 #include "console/console.h"
 #include "core/stringTable.h"
+#include "platform/platformCPUCount.h"
 #include <math.h>
 #include <intrin.h>
 
 Platform::SystemInfo_struct Platform::SystemInfo;
 extern void PlatformBlitInit();
-extern void SetProcessorInfo(Platform::SystemInfo_struct::Processor& pInfo,
-   char* vendor, U32 processor, U32 properties, U32 properties2); // platform/platformCPU.cc
+
+static void getBrand(char* brand)
+{
+   S32 extendedInfo[4];
+   __cpuid(extendedInfo, 0x80000000);
+   S32 numberExtendedIds = extendedInfo[0];
+
+   // Sets brand
+   if (numberExtendedIds >= 0x80000004)
+   {
+      int offset = 0;
+      for (int i = 0; i < 3; ++i)
+      {
+         S32 brandInfo[4];
+         __cpuidex(brandInfo, 0x80000002 + i, 0);
+
+         *reinterpret_cast<int*>(brand + offset + 0) = brandInfo[0];
+         *reinterpret_cast<int*>(brand + offset + 4) = brandInfo[1];
+         *reinterpret_cast<int*>(brand + offset + 8) = brandInfo[2];
+         *reinterpret_cast<int*>(brand + offset + 12) = brandInfo[3];
+
+         offset += sizeof(S32) * 4;
+      }
+   }
+}
+
+enum CpuFlags
+{
+   // EDX Register flags
+   BIT_MMX = BIT(23),
+   BIT_SSE = BIT(25),
+   BIT_SSE2 = BIT(26),
+   BIT_3DNOW = BIT(31), // only available for amd cpus in x86
+
+   // These use a different value for comparison than the above flags (ECX Register)
+   BIT_SSE3 = BIT(0),
+   BIT_SSE3ex = BIT(9),
+   BIT_SSE4_1 = BIT(19),
+   BIT_SSE4_2 = BIT(20),
+
+   BIT_XSAVE_RESTORE = BIT(27),
+   BIT_AVX = BIT(28),
+};
+
+static void detectCpuFeatures(Platform::SystemInfo_struct::Processor &processor)
+{
+   S32 cpuInfo[4];
+   __cpuid(cpuInfo, 1);
+   U32 eax = cpuInfo[0];   // eax
+   U32 edx = cpuInfo[3];  // edx
+   U32 ecx = cpuInfo[2]; // ecx
+
+   processor.properties |= (edx & BIT_MMX) ? CPU_PROP_MMX : 0;
+   processor.properties |= (edx & BIT_SSE) ? CPU_PROP_SSE : 0;
+   processor.properties |= (edx & BIT_SSE2) ? CPU_PROP_SSE2 : 0;
+   processor.properties |= (ecx & BIT_SSE3) ? CPU_PROP_SSE3 : 0;
+   processor.properties |= (ecx & BIT_SSE3ex) ? CPU_PROP_SSE3ex : 0;
+   processor.properties |= (ecx & BIT_SSE4_1) ? CPU_PROP_SSE4_1 : 0;
+   processor.properties |= (ecx & BIT_SSE4_2) ? CPU_PROP_SSE4_2 : 0;
+
+   // AVX detection requires that xsaverestore is supported
+   if (ecx & BIT_XSAVE_RESTORE && ecx & BIT_AVX)
+   {
+      bool supportsAVX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6;
+      if (supportsAVX)
+      {
+         processor.properties |= CPU_PROP_AVX;
+      }
+   }
+
+   if (processor.isMultiCore)
+      processor.properties |= CPU_PROP_MP;
+
+#ifdef TORQUE_CPU_X64
+   processor.properties |= CPU_PROP_64bit;
+#endif
+}
 
 void Processor::init()
 {
@@ -40,18 +116,13 @@ void Processor::init()
    //    www.intel.com
    //       http://developer.intel.com/design/PentiumII/manuals/24512701.pdf
 
-   Con::printf("Processor Init:");
-
    Platform::SystemInfo.processor.type = CPU_X86Compatible;
    Platform::SystemInfo.processor.name = StringTable->insert("Unknown x86 Compatible");
    Platform::SystemInfo.processor.mhz  = 0;
-   Platform::SystemInfo.processor.properties = CPU_PROP_C | CPU_PROP_LE;
+   Platform::SystemInfo.processor.properties = CPU_PROP_C | CPU_PROP_FPU | CPU_PROP_LE;
 
    char  vendor[0x20];
    dMemset(vendor, 0, sizeof(vendor));
-   U32   properties = 0;
-   U32   processor  = 0;
-   U32   properties2 = 0;
 
    S32 vendorInfo[4];
    __cpuid(vendorInfo, 0);
@@ -59,17 +130,14 @@ void Processor::init()
    *reinterpret_cast<int*>(vendor + 4) = vendorInfo[3]; // edx
    *reinterpret_cast<int*>(vendor + 8) = vendorInfo[2]; // ecx
 
-   S32 cpuInfo[4];
-   __cpuid(cpuInfo, 1);
-   processor = cpuInfo[0];   // eax
-   properties = cpuInfo[3];  // edx
-   properties2 = cpuInfo[2]; // ecx
+   char brand[0x40];
+   dMemset(brand, 0, sizeof(brand));
+   getBrand(brand);
 
-   SetProcessorInfo(Platform::SystemInfo.processor, vendor, processor, properties, properties2);
+   SetProcessorInfo(Platform::SystemInfo.processor, vendor, brand);
+   detectCpuFeatures(Platform::SystemInfo.processor);
 
-// now calculate speed of processor...
-   U32 nearmhz = 0; // nearest rounded mhz
-   U32 mhz = 0; // calculated value.
+   U32 mhz = 1000; // default if it can't be found
 
    LONG result;
    DWORD data = 0;
@@ -83,56 +151,35 @@ void Processor::init()
       result = ::RegQueryValueExA (hKey, "~MHz",NULL, NULL,(LPBYTE)&data, &dataSize);
 
       if (result == ERROR_SUCCESS)
-         nearmhz = mhz = data;
+         mhz = data;
 
       ::RegCloseKey(hKey);
    }
 
    Platform::SystemInfo.processor.mhz = mhz;
 
-   if (mhz==0)
-   {
-      Con::printf("   %s, (Unknown) Mhz", Platform::SystemInfo.processor.name);
-      // stick SOMETHING in so it isn't ZERO.
-      Platform::SystemInfo.processor.mhz = 200; // seems a decent value.
-   }
-   else
-   {
-      if (nearmhz >= 1000)
-         Con::printf("   %s, ~%.2f Ghz", Platform::SystemInfo.processor.name, ((float)nearmhz)/1000.0f);
-      else
-         Con::printf("   %s, ~%d Mhz", Platform::SystemInfo.processor.name, nearmhz);
-      if (nearmhz != mhz)
-      {
-         if (mhz >= 1000)
-            Con::printf("     (timed at roughly %.2f Ghz)", ((float)mhz)/1000.0f);
-         else
-            Con::printf("     (timed at roughly %d Mhz)", mhz);
-      }
-   }
-
-   if( Platform::SystemInfo.processor.numAvailableCores > 0
-       || Platform::SystemInfo.processor.numPhysicalProcessors > 0
-       || Platform::SystemInfo.processor.isHyperThreaded )
-      Platform::SystemInfo.processor.properties |= CPU_PROP_MP;
-
-   if (Platform::SystemInfo.processor.properties & CPU_PROP_FPU)
-      Con::printf( "   FPU detected" );
+   Con::printf("Processor Init:");
+   Con::printf("   Processor: %s", Platform::SystemInfo.processor.name);
    if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
-      Con::printf( "   MMX detected" );
-   if (Platform::SystemInfo.processor.properties & CPU_PROP_3DNOW)
-      Con::printf( "   3DNow detected" );
+      Con::printf("      MMX detected" );
    if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
-      Con::printf( "   SSE detected" );
-   if( Platform::SystemInfo.processor.properties & CPU_PROP_SSE2 )
-      Con::printf( "   SSE2 detected" );
-   if( Platform::SystemInfo.processor.isHyperThreaded )
-      Con::printf( "   HT detected" );
-   if( Platform::SystemInfo.processor.properties & CPU_PROP_MP )
-      Con::printf( "   MP detected [%i cores, %i logical, %i physical]",
-         Platform::SystemInfo.processor.numAvailableCores,
-         Platform::SystemInfo.processor.numLogicalProcessors,
-         Platform::SystemInfo.processor.numPhysicalProcessors );
+      Con::printf("      SSE detected" );
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE2)
+      Con::printf("      SSE2 detected" );
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3)
+      Con::printf("      SSE3 detected" );
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE3ex)
+      Con::printf("      SSE3ex detected ");
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_1)
+      Con::printf("      SSE4.1 detected" );
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_SSE4_2)
+      Con::printf("      SSE4.2 detected" );
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_AVX)
+      Con::printf("      AVX detected");
+
+   if (Platform::SystemInfo.processor.properties & CPU_PROP_MP)
+      Con::printf("   MultiCore CPU detected [%i cores, %i logical]", Platform::SystemInfo.processor.numPhysicalProcessors, Platform::SystemInfo.processor.numLogicalProcessors);
+
    Con::printf(" ");
    
    PlatformBlitInit();

+ 0 - 13
Engine/source/platformWin32/winMath.cpp

@@ -29,8 +29,6 @@
 
 extern void mInstallLibrary_C();
 extern void mInstallLibrary_ASM();
-extern void mInstall_AMD_Math();
-extern void mInstall_Library_SSE();
 
 //--------------------------------------
 DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
@@ -40,7 +38,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
                 "    - 'C' Enable the C math routines. C routines are always enabled.\n\n"
                 "    - 'FPU' Enable floating point unit routines.\n\n"
                 "    - 'MMX' Enable MMX math routines.\n\n"
-                "    - '3DNOW' Enable 3dNow! math routines.\n\n"
                 "    - 'SSE' Enable SSE math routines.\n\n"
 				"@ingroup Math")
 
@@ -72,10 +69,6 @@ DefineEngineStringlyVariadicFunction( mathInit, void, 1, 10, "( ... )"
          properties |= CPU_PROP_MMX;
          continue;
       }
-      if (dStricmp(str, "3DNOW") == 0) {
-         properties |= CPU_PROP_3DNOW;
-         continue;
-      }
       if (dStricmp(str, "SSE") == 0) {
          properties |= CPU_PROP_SSE;
          continue;
@@ -116,17 +109,11 @@ void Math::init(U32 properties)
    if (properties & CPU_PROP_MMX)
    {
       Con::printf("   Installing MMX extensions");
-      if (properties & CPU_PROP_3DNOW)
-      {
-         Con::printf("   Installing 3DNow extensions");
-         mInstall_AMD_Math();
-      }
    }
 
    if (properties & CPU_PROP_SSE)
    {
       Con::printf("   Installing SSE extensions");
-      mInstall_Library_SSE();
    }
 
    Con::printf(" ");

+ 16 - 22
Engine/source/platformWin32/winPlatformCPUCount.cpp

@@ -26,6 +26,7 @@
 #if defined( TORQUE_OS_WIN )
 
 #include "platform/platformCPUCount.h"
+#include "console/console.h"
 #include <windows.h>
 #include <intrin.h>
 #include <stdio.h>
@@ -52,12 +53,10 @@ namespace CPUInfo {
       return bitSetCount;
    }
 
-   EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore, U32& PhysicalNum )
+   EConfig CPUCount( U32& TotAvailLogical, U32& TotAvailCore )
    {
-      EConfig StatusFlag = CONFIG_UserConfigIssue;
       TotAvailLogical = 0;
       TotAvailCore = 0;
-      PhysicalNum = 0;
 
       PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
       DWORD returnLength = 0;
@@ -68,42 +67,37 @@ namespace CPUInfo {
 
       rc = GetLogicalProcessorInformation( buffer, &returnLength );      
 
+      // if we fail, assume single threaded
       if( FALSE == rc )
       {           
          free( buffer );
-         return StatusFlag;
+         Con::errorf("Unable to determine CPU Count, assuming 1 core");
+         TotAvailCore = 1;
+         TotAvailLogical = 1;
+         return CONFIG_SingleCoreAndHTNotCapable;
       }      
 
+#pragma push
+#pragma warning (disable: 6011)
       PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
 
       DWORD byteOffset = 0;
       while( byteOffset + sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) <= returnLength )
       {
-         switch( ptr->Relationship )
-         {         
-
-         case RelationProcessorCore:
+         if (ptr->Relationship == RelationProcessorCore)
+         {
             TotAvailCore++;
-
-            // A hyperthreaded core supplies more than one logical processor.
-            TotAvailLogical += CountSetBits( ptr->ProcessorMask );
-            break;         
-
-         case RelationProcessorPackage:
-            // Logical processors share a physical package.
-            PhysicalNum++;
-            break;
-
-         default:            
-            break;
+            TotAvailLogical += CountSetBits(ptr->ProcessorMask);
          }
+
          byteOffset += sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION );
          ptr++;
-      }      
+      }
 
       free( buffer );
+#pragma pop
 
-      StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
+      EConfig StatusFlag = CONFIG_SingleCoreAndHTNotCapable;
 
       if( TotAvailCore == 1 && TotAvailLogical > TotAvailCore )
          StatusFlag = CONFIG_SingleCoreHTEnabled;