Browse Source

WIN32 no ASM

-Remove references to ASM math for WIN32 build
marauder2k7 4 years ago
parent
commit
8f8bda67cd

+ 125 - 169
engine/source/math/mMathAMD.cc

@@ -62,14 +62,7 @@ void Athlon_MatrixF_x_Point3F(const F32 *m, const F32 *p, F32 *presult)
 // r[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15];
 //============================================================
 
-#if defined(TORQUE_SUPPORTS_NASM)
-#define ADD_3DNOW_FUNCS
-extern "C"
-{
-   void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result);
-}
-
-#elif defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
+#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
 #define ADD_3DNOW_FUNCS
 // inlined version here.
 void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
@@ -82,178 +75,141 @@ void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
       mov         edx, matB
       mov         eax, result
 
-      prefetch    [ecx+32]       ;// These may help -
-      prefetch    [edx+32]       ;//    and probably don't hurt
-
-      movq        mm0,[ecx]      ;// a21   | a11
-      movq        mm1,[ecx+8]      ;// a41   | a31
-      movq        mm4,[edx]      ;// b21   | b11
-      punpckhdq   mm2,mm0         ;// a21   |
-      movq        mm5,[edx+16]   ;// b22   | b12
-      punpckhdq   mm3,mm1         ;// a41   |
-      movq        mm6,[edx+32]   ;// b23   | b13
-      punpckldq   mm0,mm0         ;// a11   | a11
-      punpckldq   mm1,mm1         ;// a31   | a31
-      pfmul       mm4,mm0         ;// a11*b21 | a11*b11
-      punpckhdq   mm2,mm2         ;// a21   | a21
-      pfmul       mm0,[edx+8]      ;// a11*b41 | a11*b31
-      movq        mm7,[edx+48]   ;// b24   | b14
-      pfmul       mm5,mm2         ;// a21*b22 | a21*b12
-      punpckhdq   mm3,mm3         ;// a41   | a41
-      pfmul       mm2,[edx+24]   ;// a21*b42 | a21*b32
-      pfmul       mm6,mm1         ;// a31*b23 | a31*b13
-      pfadd       mm5,mm4         ;// a21*b22 + a11*b21 | a21*b12 + a11*b11
-      pfmul       mm1,[edx+40]   ;// a31*b43 | a31*b33
-      pfadd       mm2,mm0         ;// a21*b42 + a11*b41 | a21*b32 + a11*b31
-      pfmul       mm7,mm3         ;// a41*b24 | a41*b14
-      pfadd       mm6,mm5         ;// a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13
-      pfmul       mm3,[edx+56]   ;// a41*b44 | a41*b34
-      pfadd       mm2,mm1         ;// a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33
-      pfadd       mm7,mm6         ;// a41*b24 + a21*b22 + a11*b21 + a31*b23 |  a41*b14 + a21*b12 + a11*b11 + a31*b13
-      movq        mm0,[ecx+16]   ;// a22   | a12
-      pfadd       mm3,mm2         ;// a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33
-      movq        mm1,[ecx+24]   ;// a42   | a32
-      movq        [eax],mm7      ;// r21   | r11
-      movq        mm4,[edx]      ;// b21   | b11
-      movq        [eax+8],mm3      ;// r41   | r31
-
-      punpckhdq   mm2,mm0         ;// a22   | XXX
-      movq        mm5,[edx+16]   ;// b22   | b12
-      punpckhdq   mm3,mm1         ;// a42   | XXX
-      movq        mm6,[edx+32]   ;// b23   | b13
-      punpckldq   mm0,mm0         ;// a12   | a12
-      punpckldq   mm1,mm1         ;// a32   | a32
-      pfmul       mm4,mm0         ;// a12*b21 | a12*b11
-      punpckhdq   mm2,mm2         ;// a22   | a22
-      pfmul       mm0,[edx+8]      ;// a12*b41 | a12*b31
-      movq        mm7,[edx+48]   ;// b24   | b14
-      pfmul       mm5,mm2         ;// a22*b22 | a22*b12
-      punpckhdq   mm3,mm3         ;// a42   | a42
-      pfmul       mm2,[edx+24]   ;// a22*b42 | a22*b32
-      pfmul       mm6,mm1         ;// a32*b23 | a32*b13
-      pfadd       mm5,mm4         ;// a12*b21 + a22*b22 | a12*b11 + a22*b12
-      pfmul       mm1,[edx+40]   ;// a32*b43 | a32*b33
-      pfadd       mm2,mm0         ;// a12*b41 + a22*b42 | a12*b11 + a22*b32
-      pfmul       mm7,mm3         ;// a42*b24 | a42*b14
-      pfadd       mm6,mm5         ;// a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12
-      pfmul       mm3,[edx+56]   ;// a42*b44 | a42*b34
-      pfadd       mm2,mm1         ;// a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32
-      pfadd       mm7,mm6         ;// a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12
-      movq        mm0,[ecx+32]   ;// a23 | a13
-      pfadd       mm3,mm2         ;// a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32
-      movq        mm1,[ecx+40]   ;// a43 | a33
-      movq        [eax+16],mm7   ;// r22 | r12
-      movq        mm4,[edx]      ;// b21   | b11
-      movq        [eax+24],mm3   ;// r42 | r32
-
-      punpckhdq   mm2,mm0         ;// a23 | XXX
-      movq        mm5,[edx+16]   ;// b22 | b12
-      punpckhdq   mm3,mm1         ;// a43 | XXX
-      movq        mm6,[edx+32]   ;// b23 | b13
-      punpckldq   mm0,mm0         ;// a13 | a13
-      punpckldq   mm1,mm1         ;// a33 | a33
-      pfmul       mm4,mm0         ;// a13*b21 | a13*b11
-      punpckhdq   mm2,mm2         ;// a23 | a23
-      pfmul       mm0,[edx+8]      ;// a13*b41 | a13*b31
-      movq        mm7,[edx+48]   ;// b24 | b14
-      pfmul       mm5,mm2         ;// a23*b22 | a23*b12
-      punpckhdq   mm3,mm3         ;// a43 | a43
-      pfmul       mm2,[edx+24]   ;// a23*b42 | a23*b32
-      pfmul       mm6,mm1         ;// a33*b23 | a33*b13
-      pfadd       mm5,mm4         ;// a23*b22 + a13*b21 | a23*b12 + a13*b11
-      pfmul       mm1,[edx+40]   ;// a33*b43 | a33*b33
-      pfadd       mm2,mm0         ;// a13*b41 + a23*b42 | a13*b31 + a23*b32
-      pfmul       mm7,mm3         ;// a43*b24 | a43*b14
-      pfadd       mm6,mm5         ;// a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11
-      pfmul       mm3,[edx+56]   ;// a43*b44 | a43*b34
-      pfadd       mm2,mm1         ;// a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32
-      pfadd       mm7,mm6         ;// a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11
-      movq        mm0,[ecx+48]   ;// a24 | a14
-      pfadd       mm3,mm2         ;// a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32
-      movq        mm1,[ecx+56]   ;// a44 | a34
-      movq        [eax+32],mm7   ;// r23 | r13
-      movq        mm4,[edx]      ;// b21 | b11
-      movq        [eax+40],mm3   ;// r43 | r33
-
-      punpckhdq   mm2,mm0         ;// a24 | XXX
-      movq        mm5,[edx+16]   ;// b22 | b12
-      punpckhdq   mm3,mm1         ;// a44 | XXX
-      movq        mm6,[edx+32]   ;// b23 | b13
-      punpckldq   mm0,mm0         ;// a14 | a14
-      punpckldq   mm1,mm1         ;// a34 | a34
-      pfmul       mm4,mm0         ;// a14*b21 | a14*b11
-      punpckhdq   mm2,mm2         ;// a24 | a24
-      pfmul       mm0,[edx+8]      ;// a14*b41 | a14*b31
-      movq        mm7,[edx+48]   ;// b24 | b14
-      pfmul       mm5,mm2         ;// a24*b22 | a24*b12
-      punpckhdq   mm3,mm3         ;// a44 | a44
-      pfmul       mm2,[edx+24]   ;// a24*b 42 | a24*b32
-      pfmul       mm6,mm1         ;// a34*b23 | a34*b13
-      pfadd       mm5,mm4         ;// a14*b21 + a24*b22 | a14*b11 + a24*b12
-      pfmul       mm1,[edx+40]   ;// a34*b43 | a34*b33
-      pfadd       mm2,mm0         ;// a14*b41 + a24*b 42 | a14*b31 + a24*b32
-      pfmul       mm7,mm3         ;// a44*b24 | a44*b14
-      pfadd       mm6,mm5         ;// a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12
-      pfmul       mm3,[edx+56]   ;// a44*b44 | a44*b34
-      pfadd       mm2,mm1         ;// a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32
-      pfadd       mm7,mm6         ;// a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32
-      pfadd       mm3,mm2         ;// a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32
-      movq        [eax+48],mm7   ;// r24 | r14
-      movq        [eax+56],mm3   ;// r44 | r34
+      prefetch[ecx + 32];// These may help -
+      prefetch[edx + 32];//    and probably don't hurt
+
+      movq        mm0, [ecx];// a21   | a11
+      movq        mm1, [ecx + 8];// a41   | a31
+      movq        mm4, [edx];// b21   | b11
+      punpckhdq   mm2, mm0;// a21   |
+      movq        mm5, [edx + 16];// b22   | b12
+      punpckhdq   mm3, mm1;// a41   |
+      movq        mm6, [edx + 32];// b23   | b13
+      punpckldq   mm0, mm0;// a11   | a11
+      punpckldq   mm1, mm1;// a31   | a31
+      pfmul       mm4, mm0;// a11*b21 | a11*b11
+      punpckhdq   mm2, mm2;// a21   | a21
+      pfmul       mm0, [edx + 8];// a11*b41 | a11*b31
+      movq        mm7, [edx + 48];// b24   | b14
+      pfmul       mm5, mm2;// a21*b22 | a21*b12
+      punpckhdq   mm3, mm3;// a41   | a41
+      pfmul       mm2, [edx + 24];// a21*b42 | a21*b32
+      pfmul       mm6, mm1;// a31*b23 | a31*b13
+      pfadd       mm5, mm4;// a21*b22 + a11*b21 | a21*b12 + a11*b11
+      pfmul       mm1, [edx + 40];// a31*b43 | a31*b33
+      pfadd       mm2, mm0;// a21*b42 + a11*b41 | a21*b32 + a11*b31
+      pfmul       mm7, mm3;// a41*b24 | a41*b14
+      pfadd       mm6, mm5;// a21*b22 + a11*b21 + a31*b23 | a21*b12 + a11*b11 + a31*b13
+      pfmul       mm3, [edx + 56];// a41*b44 | a41*b34
+      pfadd       mm2, mm1;// a21*b42 + a11*b41 + a31*b43 | a21*b32 + a11*b31 + a31*b33
+      pfadd       mm7, mm6;// a41*b24 + a21*b22 + a11*b21 + a31*b23 |  a41*b14 + a21*b12 + a11*b11 + a31*b13
+      movq        mm0, [ecx + 16];// a22   | a12
+      pfadd       mm3, mm2;// a41*b44 + a21*b42 + a11*b41 + a31*b43 | a41*b34 + a21*b32 + a11*b31 + a31*b33
+      movq        mm1, [ecx + 24];// a42   | a32
+      movq[eax], mm7;// r21   | r11
+      movq        mm4, [edx];// b21   | b11
+      movq[eax + 8], mm3;// r41   | r31
+
+      punpckhdq   mm2, mm0;// a22   | XXX
+      movq        mm5, [edx + 16];// b22   | b12
+      punpckhdq   mm3, mm1;// a42   | XXX
+      movq        mm6, [edx + 32];// b23   | b13
+      punpckldq   mm0, mm0;// a12   | a12
+      punpckldq   mm1, mm1;// a32   | a32
+      pfmul       mm4, mm0;// a12*b21 | a12*b11
+      punpckhdq   mm2, mm2;// a22   | a22
+      pfmul       mm0, [edx + 8];// a12*b41 | a12*b31
+      movq        mm7, [edx + 48];// b24   | b14
+      pfmul       mm5, mm2;// a22*b22 | a22*b12
+      punpckhdq   mm3, mm3;// a42   | a42
+      pfmul       mm2, [edx + 24];// a22*b42 | a22*b32
+      pfmul       mm6, mm1;// a32*b23 | a32*b13
+      pfadd       mm5, mm4;// a12*b21 + a22*b22 | a12*b11 + a22*b12
+      pfmul       mm1, [edx + 40];// a32*b43 | a32*b33
+      pfadd       mm2, mm0;// a12*b41 + a22*b42 | a12*b11 + a22*b32
+      pfmul       mm7, mm3;// a42*b24 | a42*b14
+      pfadd       mm6, mm5;// a32*b23 + a12*b21 + a22*b22 | a32*b13 + a12*b11 + a22*b12
+      pfmul       mm3, [edx + 56];// a42*b44 | a42*b34
+      pfadd       mm2, mm1;// a32*b43 + a12*b41 + a22*b42 | a32*b33 + a12*b11 + a22*b32
+      pfadd       mm7, mm6;// a42*b24 + a32*b23 + a12*b21 + a22*b22 | a42*b14 + a32*b13 + a12*b11 + a22*b12
+      movq        mm0, [ecx + 32];// a23 | a13
+      pfadd       mm3, mm2;// a42*b44 + a32*b43 + a12*b41 + a22*b42 | a42*b34 + a32*b33 + a12*b11 + a22*b32
+      movq        mm1, [ecx + 40];// a43 | a33
+      movq[eax + 16], mm7;// r22 | r12
+      movq        mm4, [edx];// b21   | b11
+      movq[eax + 24], mm3;// r42 | r32
+
+      punpckhdq   mm2, mm0;// a23 | XXX
+      movq        mm5, [edx + 16];// b22 | b12
+      punpckhdq   mm3, mm1;// a43 | XXX
+      movq        mm6, [edx + 32];// b23 | b13
+      punpckldq   mm0, mm0;// a13 | a13
+      punpckldq   mm1, mm1;// a33 | a33
+      pfmul       mm4, mm0;// a13*b21 | a13*b11
+      punpckhdq   mm2, mm2;// a23 | a23
+      pfmul       mm0, [edx + 8];// a13*b41 | a13*b31
+      movq        mm7, [edx + 48];// b24 | b14
+      pfmul       mm5, mm2;// a23*b22 | a23*b12
+      punpckhdq   mm3, mm3;// a43 | a43
+      pfmul       mm2, [edx + 24];// a23*b42 | a23*b32
+      pfmul       mm6, mm1;// a33*b23 | a33*b13
+      pfadd       mm5, mm4;// a23*b22 + a13*b21 | a23*b12 + a13*b11
+      pfmul       mm1, [edx + 40];// a33*b43 | a33*b33
+      pfadd       mm2, mm0;// a13*b41 + a23*b42 | a13*b31 + a23*b32
+      pfmul       mm7, mm3;// a43*b24 | a43*b14
+      pfadd       mm6, mm5;// a33*b23 + a23*b22 + a13*b21 | a33*b13 + a23*b12 + a13*b11
+      pfmul       mm3, [edx + 56];// a43*b44 | a43*b34
+      pfadd       mm2, mm1;// a33*b43*a13*b41 + a23*b42 | a33*b33 + a13*b31 + a23*b32
+      pfadd       mm7, mm6;// a43*b24 + a33*b23 + a23*b22 + a13*b21 | a43*b14 + a33*b13 + a23*b12 + a13*b11
+      movq        mm0, [ecx + 48];// a24 | a14
+      pfadd       mm3, mm2;// a43*b44 + a33*b43*a13*b41 + a23*b42 | a43*b34 + a33*b33 + a13*b31 + a23*b32
+      movq        mm1, [ecx + 56];// a44 | a34
+      movq[eax + 32], mm7;// r23 | r13
+      movq        mm4, [edx];// b21 | b11
+      movq[eax + 40], mm3;// r43 | r33
+
+      punpckhdq   mm2, mm0;// a24 | XXX
+      movq        mm5, [edx + 16];// b22 | b12
+      punpckhdq   mm3, mm1;// a44 | XXX
+      movq        mm6, [edx + 32];// b23 | b13
+      punpckldq   mm0, mm0;// a14 | a14
+      punpckldq   mm1, mm1;// a34 | a34
+      pfmul       mm4, mm0;// a14*b21 | a14*b11
+      punpckhdq   mm2, mm2;// a24 | a24
+      pfmul       mm0, [edx + 8];// a14*b41 | a14*b31
+      movq        mm7, [edx + 48];// b24 | b14
+      pfmul       mm5, mm2;// a24*b22 | a24*b12
+      punpckhdq   mm3, mm3;// a44 | a44
+      pfmul       mm2, [edx + 24];// a24*b 42 | a24*b32
+      pfmul       mm6, mm1;// a34*b23 | a34*b13
+      pfadd       mm5, mm4;// a14*b21 + a24*b22 | a14*b11 + a24*b12
+      pfmul       mm1, [edx + 40];// a34*b43 | a34*b33
+      pfadd       mm2, mm0;// a14*b41 + a24*b 42 | a14*b31 + a24*b32
+      pfmul       mm7, mm3;// a44*b24 | a44*b14
+      pfadd       mm6, mm5;// a34*b23 + a14*b21 + a24*b22 | a34*b13 + a14*b11 + a24*b12
+      pfmul       mm3, [edx + 56];// a44*b44 | a44*b34
+      pfadd       mm2, mm1;// a34*b43 + a14*b41 + a24*b 42 | a34*b33 + a14*b31 + a24*b32
+      pfadd       mm7, mm6;// a44*b24 + a14*b23 + a24*b 42 | a44*b14 + a14*b31 + a24*b32
+      pfadd       mm3, mm2;// a44*b44 + a34*b43 + a14*b41 + a24*b42 | a44*b34 + a34*b33 + a14*b31 + a24*b32
+      movq[eax + 48], mm7;// r24 | r14
+      movq[eax + 56], mm3;// r44 | r34
       femms
    }
 }
-#endif
-
-
-#if 0
-/* this isn't currently used/implemented.
-void Athlon_MatrixF_x_VectorF(const F32 *matrix, const F32 *vector, F32 *result)
+#elif defined(TORQUE_SUPPORTS_NASM)
+#define ADD_3DNOW_FUNCS
+extern "C"
 {
-   __asm {
-      femms
-      mov         eax,result
-      mov         ecx,vector
-      mov         edx,matrix
-
-      // Here's what we're doing:
-      // result[0] = M[0] * v[0]    +  M[1] * v[1]    +  M[2] * v[2];
-      // result[1] = M[4] * v[0]    +  M[5] * v[1]    +  M[6] * v[2];
-      // result[2] = M[8] * v[0]    +  M[9] * v[1]    +  M[10]* v[2];
-
-      movq        mm0,[ecx]         //     y   |  x
-      movd        mm1,[ecx+8]       //     0   |  z
-      movd        mm4,[edx+8]       //     0   | m_13
-      movq        mm3,mm0           //     y   |  x
-      movd        mm2,[edx+40]      //     0   | m_33 (M[10])
-      punpckldq   mm0,mm0           //     x   |  x
-      punpckldq   mm4,[edx+20]      //    m_31 | m_23
-      pfmul       mm0,[edx]         //     x * m_12 | x * m_11
-      punpckhdq   mm3,mm3           //     y   |  y
-      pfmul       mm2,mm1           //     0   |  z * m_33
-      punpckldq   mm1,mm1           //     z   |  z
-      pfmul       mm4,[ecx]         //    y * m_31 | x * m_23
-      pfmul       mm3,[edx+12]      //    y * m_22 | y * m_21
-      pfmul       mm1,[edx+24]      //    z * m_32 | z * m_32
-      pfacc       mm4,mm4           //     ?   | y * m_31 + x * m_23
-      pfadd       mm3,mm0           //     x * m_12 + y * m_22 | x * m_11 + y * m_21
-      pfadd       mm4,mm2           //     ?   | y * m_31 + x * m_23 + z * m_33
-      pfadd       mm3,mm1           //     x * m_12 + y * m_22 + z * m_32 | x * m_11 + y * m_21 + z * m_32
-      movd        [eax+8],mm4       //    r_z
-      movq        [eax],mm3         //    r_y  | r_x
-      femms
-   }
+   void Athlon_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result);
 }
-*/
-#endif
 
+#endif
 
 void mInstall_AMD_Math()
 {
 #if defined(ADD_3DNOW_FUNCS)
-   m_matF_x_matF           = Athlon_MatrixF_x_MatrixF;
+   m_matF_x_matF = Athlon_MatrixF_x_MatrixF;
 #endif
    // m_matF_x_point3F = Athlon_MatrixF_x_Point3F;
    // m_matF_x_vectorF = Athlon_MatrixF_x_VectorF;
 }
-

+ 1 - 0
engine/source/math/mMathFn.h

@@ -93,6 +93,7 @@ extern void (*m_matF_scale)(F32 *m,const F32* p);
 extern void (*m_matF_normalize)(F32 *m);
 extern F32  (*m_matF_determinant)(const F32 *m);
 extern void (*m_matF_x_matF)(const F32 *a, const F32 *b, F32 *mresult);
+extern void(*m_matF_x_matF_aligned)(const F32 *a, const F32 *b, F32 *mresult);
 // extern void (*m_matF_x_point3F)(const F32 *m, const F32 *p, F32 *presult);
 // extern void (*m_matF_x_vectorF)(const F32 *m, const F32 *v, F32 *vresult);
 extern void (*m_matF_x_point4F)(const F32 *m, const F32 *p, F32 *presult);

+ 292 - 39
engine/source/math/mMathSSE.cc

@@ -25,106 +25,359 @@
 #include "math/mMatrix.h"
 
 
-// if we set our flag, we always try to build the inlined asm.
-// EXCEPT if we're in an old version of Codewarrior that can't handle SSE code.
-#if defined(TORQUE_SUPPORTS_NASM)
-#define ADD_SSE_FN
-extern "C"
-{
-   void SSE_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result);
-}
-
-#elif defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
+#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
 #define ADD_SSE_FN
 // inlined version here.
 void SSE_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
 {
    __asm
    {
-      mov         ecx, matA
-      mov         edx, matB
+      mov         edx, matA
+      mov         ecx, matB
       mov         eax, result
 
       movss       xmm0, [edx]
       movups      xmm1, [ecx]
       shufps      xmm0, xmm0, 0
-      movss       xmm2, [edx+4]
+      movss       xmm2, [edx + 4]
       mulps       xmm0, xmm1
       shufps      xmm2, xmm2, 0
-      movups      xmm3, [ecx+10h]
-      movss       xmm7, [edx+8]
+      movups      xmm3, [ecx + 10h]
+      movss       xmm7, [edx + 8]
       mulps       xmm2, xmm3
       shufps      xmm7, xmm7, 0
       addps       xmm0, xmm2
-      movups      xmm4, [ecx+20h]
-      movss       xmm2, [edx+0Ch]
+      movups      xmm4, [ecx + 20h]
+      movss       xmm2, [edx + 0Ch]
       mulps       xmm7, xmm4
       shufps      xmm2, xmm2, 0
       addps       xmm0, xmm7
-      movups      xmm5, [ecx+30h]
-      movss       xmm6, [edx+10h]
+      movups      xmm5, [ecx + 30h]
+      movss       xmm6, [edx + 10h]
       mulps       xmm2, xmm5
-      movss       xmm7, [edx+14h]
+      movss       xmm7, [edx + 14h]
       shufps      xmm6, xmm6, 0
       addps       xmm0, xmm2
       shufps      xmm7, xmm7, 0
-      movlps      [eax], xmm0
-      movhps      [eax+8], xmm0
+      movlps[eax], xmm0
+      movhps[eax + 8], xmm0
       mulps       xmm7, xmm3
-      movss       xmm0, [edx+18h]
+      movss       xmm0, [edx + 18h]
       mulps       xmm6, xmm1
       shufps      xmm0, xmm0, 0
       addps       xmm6, xmm7
       mulps       xmm0, xmm4
-      movss       xmm2, [edx+24h]
+      movss       xmm2, [edx + 24h]
       addps       xmm6, xmm0
-      movss       xmm0, [edx+1Ch]
-      movss       xmm7, [edx+20h]
+      movss       xmm0, [edx + 1Ch]
+      movss       xmm7, [edx + 20h]
       shufps      xmm0, xmm0, 0
       shufps      xmm7, xmm7, 0
       mulps       xmm0, xmm5
       mulps       xmm7, xmm1
       addps       xmm6, xmm0
       shufps      xmm2, xmm2, 0
-      movlps      [eax+10h], xmm6
-      movhps      [eax+18h], xmm6
+      movlps[eax + 10h], xmm6
+      movhps[eax + 18h], xmm6
       mulps       xmm2, xmm3
-      movss       xmm6, [edx+28h]
+      movss       xmm6, [edx + 28h]
       addps       xmm7, xmm2
       shufps      xmm6, xmm6, 0
-      movss       xmm2, [edx+2Ch]
+      movss       xmm2, [edx + 2Ch]
       mulps       xmm6, xmm4
       shufps      xmm2, xmm2, 0
       addps       xmm7, xmm6
       mulps       xmm2, xmm5
-      movss       xmm0, [edx+34h]
+      movss       xmm0, [edx + 34h]
       addps       xmm7, xmm2
       shufps      xmm0, xmm0, 0
-      movlps      [eax+20h], xmm7
-      movss       xmm2, [edx+30h]
-      movhps      [eax+28h], xmm7
+      movlps[eax + 20h], xmm7
+      movss       xmm2, [edx + 30h]
+      movhps[eax + 28h], xmm7
       mulps       xmm0, xmm3
       shufps      xmm2, xmm2, 0
-      movss       xmm6, [edx+38h]
+      movss       xmm6, [edx + 38h]
       mulps       xmm2, xmm1
       shufps      xmm6, xmm6, 0
       addps       xmm2, xmm0
       mulps       xmm6, xmm4
-      movss       xmm7, [edx+3Ch]
+      movss       xmm7, [edx + 3Ch]
       shufps      xmm7, xmm7, 0
       addps       xmm2, xmm6
       mulps       xmm7, xmm5
       addps       xmm2, xmm7
-      movups      [eax+30h], xmm2
+      movups[eax + 30h], xmm2
    }
 }
-#endif
+void SSE_MatrixF_x_MatrixF_Aligned(const F32 *matA, const F32 *matB, F32 *result)
+{
+   __asm
+   {
+      mov         edx, matA
+      mov         ecx, matB
+      mov         eax, result
+
+      movss       xmm0, [edx]
+      movaps      xmm1, [ecx]
+      shufps      xmm0, xmm0, 0
+      movss       xmm2, [edx + 4]
+      mulps       xmm0, xmm1
+      shufps      xmm2, xmm2, 0
+      movaps      xmm3, [ecx + 10h]
+      movss       xmm7, [edx + 8]
+      mulps       xmm2, xmm3
+      shufps      xmm7, xmm7, 0
+      addps       xmm0, xmm2
+      movaps      xmm4, [ecx + 20h]
+      movss       xmm2, [edx + 0Ch]
+      mulps       xmm7, xmm4
+      shufps      xmm2, xmm2, 0
+      addps       xmm0, xmm7
+      movaps      xmm5, [ecx + 30h]
+      movss       xmm6, [edx + 10h]
+      mulps       xmm2, xmm5
+      movss       xmm7, [edx + 14h]
+      shufps      xmm6, xmm6, 0
+      addps       xmm0, xmm2
+      shufps      xmm7, xmm7, 0
+      movlps[eax], xmm0
+      movhps[eax + 8], xmm0
+      mulps       xmm7, xmm3
+      movss       xmm0, [edx + 18h]
+      mulps       xmm6, xmm1
+      shufps      xmm0, xmm0, 0
+      addps       xmm6, xmm7
+      mulps       xmm0, xmm4
+      movss       xmm2, [edx + 24h]
+      addps       xmm6, xmm0
+      movss       xmm0, [edx + 1Ch]
+      movss       xmm7, [edx + 20h]
+      shufps      xmm0, xmm0, 0
+      shufps      xmm7, xmm7, 0
+      mulps       xmm0, xmm5
+      mulps       xmm7, xmm1
+      addps       xmm6, xmm0
+      shufps      xmm2, xmm2, 0
+      movlps[eax + 10h], xmm6
+      movhps[eax + 18h], xmm6
+      mulps       xmm2, xmm3
+      movss       xmm6, [edx + 28h]
+      addps       xmm7, xmm2
+      shufps      xmm6, xmm6, 0
+      movss       xmm2, [edx + 2Ch]
+      mulps       xmm6, xmm4
+      shufps      xmm2, xmm2, 0
+      addps       xmm7, xmm6
+      mulps       xmm2, xmm5
+      movss       xmm0, [edx + 34h]
+      addps       xmm7, xmm2
+      shufps      xmm0, xmm0, 0
+      movlps[eax + 20h], xmm7
+      movss       xmm2, [edx + 30h]
+      movhps[eax + 28h], xmm7
+      mulps       xmm0, xmm3
+      shufps      xmm2, xmm2, 0
+      movss       xmm6, [edx + 38h]
+      mulps       xmm2, xmm1
+      shufps      xmm6, xmm6, 0
+      addps       xmm2, xmm0
+      mulps       xmm6, xmm4
+      movss       xmm7, [edx + 3Ch]
+      shufps      xmm7, xmm7, 0
+      addps       xmm2, xmm6
+      mulps       xmm7, xmm5
+      addps       xmm2, xmm7
+      movaps[eax + 30h], xmm2
+   }
+}
+// if we set our flag, we always try to build the inlined asm.
+// EXCEPT if we're in an old version of Codewarrior that can't handle SSE code.
+// TODO: the NASM implementation of SSE_MatrixF_x_MatrixF_Aligned is missing,
+// so we temporary disable this until fixed (needed for linux dedicated build)
+#elif defined(TORQUE_SUPPORTS_NASM)
+#define ADD_SSE_FN
+extern "C"
+{
+   void SSE_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result);
+   void SSE_MatrixF_x_MatrixF_Aligned(const F32 *matA, const F32 *matB, F32 *result);
+}
+
+#elif defined( TORQUE_COMPILER_GCC ) && (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
+#define ADD_SSE_FN
 
+void SSE_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
+{
+   asm
+   (
+      "movss      (%%edx),%%xmm0\n"
+      "movups     (%%ecx),%%xmm1\n"
+      "shufps     $0,%%xmm0,%%xmm0\n"
+      "movss      4(%%edx),%%xmm2\n"
+      "mulps      %%xmm1,%%xmm0\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "movups     0x10(%%ecx),%%xmm3\n"
+      "movss      8(%%edx),%%xmm7\n"
+      "mulps      %%xmm3,%%xmm2\n"
+      "shufps     $0,%%xmm7,%%xmm7\n"
+      "addps      %%xmm2,%%xmm0\n"
+      "movups     0x20(%%ecx),%%xmm4\n"
+      "movss      0x0c(%%edx),%%xmm2\n"
+      "mulps      %%xmm4,%%xmm7\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "addps      %%xmm7,%%xmm0\n"
+      "movups     0x30(%%ecx),%%xmm5\n"
+      "movss      0x10(%%edx),%%xmm6\n"
+      "mulps      %%xmm5,%%xmm2\n"
+      "movss      0x14(%%edx),%%xmm7\n"
+      "shufps     $0,%%xmm6,%%xmm6\n"
+      "addps      %%xmm2,%%xmm0\n"
+      "shufps     $0,%%xmm7,%%xmm7\n"
+      "movlps     %%xmm0,(%%eax)\n"
+      "movhps     %%xmm0,8(%%eax)\n"
+      "mulps      %%xmm3,%%xmm7\n"
+      "movss      0x18(%%edx),%%xmm0\n"
+      "mulps      %%xmm1,%%xmm6\n"
+      "shufps     $0,%%xmm0,%%xmm0\n"
+      "addps      %%xmm7,%%xmm6\n"
+      "mulps      %%xmm4,%%xmm0\n"
+      "movss      0x24(%%edx),%%xmm2\n"
+      "addps      %%xmm0,%%xmm6\n"
+      "movss      0x1c(%%edx),%%xmm0\n"
+      "movss      0x20(%%edx),%%xmm7\n"
+      "shufps     $0,%%xmm0,%%xmm0\n"
+      "shufps     $0,%%xmm7,%%xmm7\n"
+      "mulps      %%xmm5,%%xmm0\n"
+      "mulps      %%xmm1,%%xmm7\n"
+      "addps      %%xmm0,%%xmm6\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "movlps     %%xmm6,0x10(%%eax)\n"
+      "movhps     %%xmm6,0x18(%%eax)\n"
+      "mulps      %%xmm3,%%xmm2\n"
+      "movss      0x28(%%edx),%%xmm6\n"
+      "addps      %%xmm2,%%xmm7\n"
+      "shufps     $0,%%xmm6,%%xmm6\n"
+      "movss      0x2c(%%edx),%%xmm2\n"
+      "mulps      %%xmm4,%%xmm6\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "addps      %%xmm6,%%xmm7\n"
+      "mulps      %%xmm5,%%xmm2\n"
+      "movss      0x34(%%edx),%%xmm0\n"
+      "addps      %%xmm2,%%xmm7\n"
+      "shufps     $0,%%xmm0,%%xmm0\n"
+      "movlps     %%xmm7,0x20(%%eax)\n"
+      "movss      0x30(%%edx),%%xmm2\n"
+      "movhps     %%xmm7,0x28(%%eax)\n"
+      "mulps      %%xmm3,%%xmm0\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "movss      0x38(%%edx),%%xmm6\n"
+      "mulps      %%xmm1,%%xmm2\n"
+      "shufps     $0,%%xmm6,%%xmm6\n"
+      "addps      %%xmm0,%%xmm2\n"
+      "mulps      %%xmm4,%%xmm6\n"
+      "movss      0x3c(%%edx),%%xmm7\n"
+      "shufps     $0,%%xmm7,%%xmm7\n"
+      "addps      %%xmm6,%%xmm2\n"
+      "mulps      %%xmm5,%%xmm7\n"
+      "addps      %%xmm7,%%xmm2\n"
+      "movups     %%xmm2,0x30(%%eax)\n"
+
+      :
+   : "d" (matA),
+      "c" (matB),
+      "a" (result)
+      );
+}
+
+void SSE_MatrixF_x_MatrixF_Aligned(const F32 *matA, const F32 *matB, F32 *result)
+{
+   asm
+   (
+      "movss      (%%edx),%%xmm0\n"
+      "movaps     (%%ecx),%%xmm1\n"
+      "shufps     $0,%%xmm0,%%xmm0\n"
+      "movss      4(%%edx),%%xmm2\n"
+      "mulps      %%xmm1,%%xmm0\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "movaps     0x10(%%ecx),%%xmm3\n"
+      "movss      8(%%edx),%%xmm7\n"
+      "mulps      %%xmm3,%%xmm2\n"
+      "shufps     $0,%%xmm7,%%xmm7\n"
+      "addps      %%xmm2,%%xmm0\n"
+      "movaps     0x20(%%ecx),%%xmm4\n"
+      "movss      0x0c(%%edx),%%xmm2\n"
+      "mulps      %%xmm4,%%xmm7\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "addps      %%xmm7,%%xmm0\n"
+      "movaps     0x30(%%ecx),%%xmm5\n"
+      "movss      0x10(%%edx),%%xmm6\n"
+      "mulps      %%xmm5,%%xmm2\n"
+      "movss      0x14(%%edx),%%xmm7\n"
+      "shufps     $0,%%xmm6,%%xmm6\n"
+      "addps      %%xmm2,%%xmm0\n"
+      "shufps     $0,%%xmm7,%%xmm7\n"
+      "movlps     %%xmm0,(%%eax)\n"
+      "movhps     %%xmm0,8(%%eax)\n"
+      "mulps      %%xmm3,%%xmm7\n"
+      "movss      0x18(%%edx),%%xmm0\n"
+      "mulps      %%xmm1,%%xmm6\n"
+      "shufps     $0,%%xmm0,%%xmm0\n"
+      "addps      %%xmm7,%%xmm6\n"
+      "mulps      %%xmm4,%%xmm0\n"
+      "movss      0x24(%%edx),%%xmm2\n"
+      "addps      %%xmm0,%%xmm6\n"
+      "movss      0x1c(%%edx),%%xmm0\n"
+      "movss      0x20(%%edx),%%xmm7\n"
+      "shufps     $0,%%xmm0,%%xmm0\n"
+      "shufps     $0,%%xmm7,%%xmm7\n"
+      "mulps      %%xmm5,%%xmm0\n"
+      "mulps      %%xmm1,%%xmm7\n"
+      "addps      %%xmm0,%%xmm6\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "movlps     %%xmm6,0x10(%%eax)\n"
+      "movhps     %%xmm6,0x18(%%eax)\n"
+      "mulps      %%xmm3,%%xmm2\n"
+      "movss      0x28(%%edx),%%xmm6\n"
+      "addps      %%xmm2,%%xmm7\n"
+      "shufps     $0,%%xmm6,%%xmm6\n"
+      "movss      0x2c(%%edx),%%xmm2\n"
+      "mulps      %%xmm4,%%xmm6\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "addps      %%xmm6,%%xmm7\n"
+      "mulps      %%xmm5,%%xmm2\n"
+      "movss      0x34(%%edx),%%xmm0\n"
+      "addps      %%xmm2,%%xmm7\n"
+      "shufps     $0,%%xmm0,%%xmm0\n"
+      "movlps     %%xmm7,0x20(%%eax)\n"
+      "movss      0x30(%%edx),%%xmm2\n"
+      "movhps     %%xmm7,0x28(%%eax)\n"
+      "mulps      %%xmm3,%%xmm0\n"
+      "shufps     $0,%%xmm2,%%xmm2\n"
+      "movss      0x38(%%edx),%%xmm6\n"
+      "mulps      %%xmm1,%%xmm2\n"
+      "shufps     $0,%%xmm6,%%xmm6\n"
+      "addps      %%xmm0,%%xmm2\n"
+      "mulps      %%xmm4,%%xmm6\n"
+      "movss      0x3c(%%edx),%%xmm7\n"
+      "shufps     $0,%%xmm7,%%xmm7\n"
+      "addps      %%xmm6,%%xmm2\n"
+      "mulps      %%xmm5,%%xmm7\n"
+      "addps      %%xmm7,%%xmm2\n"
+      "movaps     %%xmm2,0x30(%%eax)\n"
+
+      :
+   : "d" (matA),
+      "c" (matB),
+      "a" (result)
+      );
+}
+
+#endif
 
 void mInstall_Library_SSE()
 {
 #if defined(ADD_SSE_FN)
-   m_matF_x_matF           = SSE_MatrixF_x_MatrixF;
+   m_matF_x_matF = SSE_MatrixF_x_MatrixF;
+   m_matF_x_matF_aligned = SSE_MatrixF_x_MatrixF_Aligned;
    // m_matF_x_point3F = Athlon_MatrixF_x_Point3F;
    // m_matF_x_vectorF = Athlon_MatrixF_x_VectorF;
 #endif

+ 1 - 0
engine/source/math/mMath_C.cc

@@ -786,6 +786,7 @@ void (*m_matF_scale)(F32 *m,const F32* p) = m_matF_scale_C;
 void (*m_matF_normalize)(F32 *m) = m_matF_normalize_C;
 F32  (*m_matF_determinant)(const F32 *m) = m_matF_determinant_C;
 void (*m_matF_x_matF)(const F32 *a, const F32 *b, F32 *mresult)    = default_matF_x_matF_C;
+void(*m_matF_x_matF_aligned)(const F32 *a, const F32 *b, F32 *mresult) = default_matF_x_matF_C;
 // void (*m_matF_x_point3F)(const F32 *m, const F32 *p, F32 *presult) = m_matF_x_point3F_C;
 // void (*m_matF_x_vectorF)(const F32 *m, const F32 *v, F32 *vresult) = m_matF_x_vectorF_C;
 void (*m_matF_x_point4F)(const F32 *m, const F32 *p, F32 *presult) = m_matF_x_point4F_C;

+ 0 - 167
engine/source/platformWin32/winCPUInfo.cc

@@ -30,16 +30,6 @@ extern void PlatformBlitInit();
 extern void SetProcessorInfo(TorqueSystemInfo::Processor& pInfo,
    char* vendor, U32 processor, U32 properties); // platform/platformCPU.cc
 
-
-#if defined(TORQUE_SUPPORTS_NASM)
-// asm cpu detection routine from platform code
-extern "C"
-{
-   void detectX86CPUInfo(char *vendor, U32 *processor, U32 *properties);
-}
-#endif
-
-
 void Processor::init()
 {
    Con::printSeparator();
@@ -60,168 +50,11 @@ void Processor::init()
    U32   properties = 0;
    U32   processor  = 0;
 
-#if defined(TORQUE_SUPPORTS_NASM)
-
-   detectX86CPUInfo(vendor, &processor, &properties);
-
-#elif defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
-   __asm
-   {
-      //--------------------------------------
-      // is CPUID supported
-      push     ebx
-      push     edx
-      push     ecx
-      pushfd
-      pushfd                     // save EFLAGS to stack
-      pop      eax               // move EFLAGS into EAX
-      mov      ebx, eax
-      xor      eax, 0x200000     // flip bit 21
-      push     eax
-      popfd                      // restore EFLAGS
-      pushfd
-      pop      eax
-      cmp      eax, ebx
-      jz       EXIT              // doesn't support CPUID instruction
-
-      //--------------------------------------
-      // Get Vendor Informaion using CPUID eax==0
-      xor      eax, eax
-      cpuid
-
-      mov      DWORD PTR vendor, ebx
-      mov      DWORD PTR vendor+4, edx
-      mov      DWORD PTR vendor+8, ecx
-
-      // get Generic Extended CPUID info
-      mov      eax, 1
-      cpuid                      // eax=1, so cpuid queries feature information
-
-      and      eax, 0x0ff0
-      mov      processor, eax    // just store the model bits
-      mov      properties, edx
-
-      // Want to check for 3DNow(tm).  Need to see if extended cpuid functions present.
-      mov      eax, 0x80000000
-      cpuid
-      cmp      eax, 0x80000000
-      jbe      MAYBE_3DLATER
-      mov      eax, 0x80000001
-      cpuid
-      and      edx, 0x80000000      // 3DNow if bit 31 set -> put bit in our properties
-      or       properties, edx
-   MAYBE_3DLATER:
-
-
-   EXIT:
-      popfd
-      pop      ecx
-      pop      edx
-      pop      ebx
-   }
-#endif
-
    SetProcessorInfo(PlatformSystemInfo.processor, vendor, processor, properties);
 
 // now calculate speed of processor...
    U16 nearmhz = 0; // nearest rounded mhz
    U16 mhz = 0; // calculated value.
-#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM) || defined(TORQUE_COMPILER_MINGW)
-   //--------------------------------------
-   // if RDTSC support calculate the aproximate Mhz of the CPU
-   if (PlatformSystemInfo.processor.properties & CPU_PROP_RDTSC &&
-       PlatformSystemInfo.processor.properties & CPU_PROP_FPU)
-   {
-      const U32 msToWait = 1000; // bigger this is, more accurate we are.
-      U32 tsLo1 = 0, tsHi1 = 0; // time stamp storage.
-      U32 tsLo2 = 0, tsHi2 = 0; // time stamp storage.
-      U16 Nearest66Mhz = 0, Delta66Mhz = 0;
-      U16 Nearest50Mhz = 0, Delta50Mhz = 0;
-      F64 tsFirst, tsSecond, tsDelta;
-      U32 ms;
-
-      // starting time marker.
-      ms = GetTickCount(); // !!!!TBD - this function may have too high an error... dunno.
-
-   #if defined(TORQUE_COMPILER_MINGW)
-      asm ("rdtsc" : "=a" (tsLo1), "=d" (tsHi1));
-   #elif defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)  // VC|CW
-      __asm
-      {
-           push  eax
-           push  edx
-         rdtsc
-         mov  tsLo1, eax
-         mov  tsHi1, edx
-           pop  edx
-           pop  eax
-      }
-   #endif
-
-      // the faster this check and exit is, the more accurate the time-stamp-delta will be.
-      while(GetTickCount() < ms + msToWait) {};
-
-   #if defined(TORQUE_COMPILER_MINGW)
-      asm ("rdtsc" : "=a" (tsLo2), "=d" (tsHi2));
-   #elif defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)  // VC|CW
-      __asm
-      {
-           push  eax
-           push  edx
-         rdtsc
-         mov  tsLo2, eax
-         mov  tsHi2, edx
-           pop  edx
-           pop  eax
-      }
-   #endif
-
-      // do calculations in doubles for accuracy, since we're working with 64-bit math here...
-      // grabbed this from the MINGW sample.
-      tsFirst = ((F64)tsHi1 * (F64)0x10000 * (F64)0x10000) + (F64)tsLo1;
-      tsSecond = ((F64)tsHi2 * (F64)0x10000 * (F64)0x10000) + (F64)tsLo2;
-      // get the timestamp delta.  potentially large number here, as it's in Hz.
-      tsDelta = tsSecond - tsFirst;
-
-      // adjust for slightly-off-delay -- better to assume +1ms than try to really calc.
-      tsDelta *= (F64)(msToWait + 1);
-      tsDelta /= (F64)msToWait;
-      // factor back into 1s of time.
-      tsDelta *= ((F64)1000/(F64)msToWait);
-      // then convert into Mhz
-      tsDelta /= (F64)1000000;
-      tsDelta += 0.5f; // trying to get closer to the right values, effectively rounding up.
-      mhz = (U32)tsDelta;
-
-      // Find nearest full/half multiple of 66/133 MHz
-      Nearest66Mhz = ((((mhz * 3) + 100) / 200) * 200) / 3;
-      // 660 = 1980 = 2080 = 100 = 2000 = 666
-      // 440 = 1320 = 1420 = 70 = 1400 = 466
-
-      // find delta to nearest 66 multiple.
-      Delta66Mhz = abs(Nearest66Mhz - mhz);
-
-      // Find nearest full/half multiple of 100 MHz
-      Nearest50Mhz = (((mhz + 25) / 50) * 50);
-      // 440 = 465 = 9 = 450
-
-      // find delta to nearest 50 multiple.
-      Delta50Mhz = abs(Nearest50Mhz - mhz);
-
-      if (Delta50Mhz < Delta66Mhz) // closer to a 50 boundary
-         nearmhz = Nearest50Mhz;
-      else
-      {
-         nearmhz = Nearest66Mhz;
-         if (nearmhz==666) // hack around -- !!!!TBD - other cases?!?!
-            nearmhz = 667;
-      }
-
-      // !!!TBD
-      // would be nice if we stored both the calculated and the adjusted/guessed values.
-      PlatformSystemInfo.processor.mhz = nearmhz; // hold onto adjusted value only.
-   }
-#endif
 
    if (mhz==0)
    {