瀏覽代碼

Merge pull request #1761 from Bloodknight/intrinsicsfix

Intrinsicsfix
Areloch 9 年之前
父節點
當前提交
f88975121d

+ 3 - 4
Engine/source/core/ogg/oggTheoraDecoder.cpp

@@ -285,8 +285,7 @@ U32 OggTheoraDecoder::read( OggTheoraFrame** buffer, U32 num )
       
       
       // Transcode the packet.
       // Transcode the packet.
       
       
-      #if ( defined( TORQUE_COMPILER_GCC ) || defined( TORQUE_COMPILER_VISUALC ) ) && defined( TORQUE_CPU_X86 )
-      
+      #if ( defined( TORQUE_COMPILER_GCC ) || defined( TORQUE_COMPILER_VISUALC ) ) && (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))      
       if(      ( mTranscoder == TRANSCODER_Auto || mTranscoder == TRANSCODER_SSE2420RGBA ) &&
       if(      ( mTranscoder == TRANSCODER_Auto || mTranscoder == TRANSCODER_SSE2420RGBA ) &&
                getDecoderPixelFormat() == PIXEL_FORMAT_420 &&
                getDecoderPixelFormat() == PIXEL_FORMAT_420 &&
                Platform::SystemInfo.processor.properties & CPU_PROP_SSE2 &&
                Platform::SystemInfo.processor.properties & CPU_PROP_SSE2 &&
@@ -420,7 +419,7 @@ void OggTheoraDecoder::_transcode( th_ycbcr_buffer ycbcr, U8* buffer, const U32
 }
 }
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
-#if defined( TORQUE_CPU_X86 )
+#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
 void OggTheoraDecoder::_transcode420toRGBA_SSE2( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height, U32 pitch )
 void OggTheoraDecoder::_transcode420toRGBA_SSE2( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height, U32 pitch )
 {
 {
    AssertFatal( width % 2 == 0, "OggTheoraDecoder::_transcode420toRGBA_SSE2() - width must be multiple of 2" );
    AssertFatal( width % 2 == 0, "OggTheoraDecoder::_transcode420toRGBA_SSE2() - width must be multiple of 2" );
@@ -560,7 +559,7 @@ void OggTheoraDecoder::_transcode420toRGBA_SSE2( th_ycbcr_buffer ycbcr, U8* buff
          jnz hloop
          jnz hloop
    };
    };
    
    
-   #elif defined( TORQUE_COMPILER_GCC ) && defined( TORQUE_CPU_X86 )
+   #elif defined( TORQUE_COMPILER_GCC ) && (defined( TORQUE_CPU_X86 )  || defined( TORQUE_CPU_X64 ))
 
 
    asm(  "pushal\n"                                // Save all general-purpose registers.
    asm(  "pushal\n"                                // Save all general-purpose registers.
          
          

+ 1 - 1
Engine/source/core/ogg/oggTheoraDecoder.h

@@ -172,7 +172,7 @@ class OggTheoraDecoder : public OggDecoder,
       /// Generic transcoder going from any of the Y'CbCr pixel formats to
       /// Generic transcoder going from any of the Y'CbCr pixel formats to
       /// any RGB format (that is supported by GFXFormatUtils).
       /// any RGB format (that is supported by GFXFormatUtils).
       void _transcode( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height );
       void _transcode( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height );
-#if defined( TORQUE_CPU_X86 )
+#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
       /// Transcoder with fixed 4:2:0 to RGBA conversion using SSE2 assembly. Unused on 64 bit archetecture.
       /// Transcoder with fixed 4:2:0 to RGBA conversion using SSE2 assembly. Unused on 64 bit archetecture.
       void _transcode420toRGBA_SSE2( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height, U32 pitch );
       void _transcode420toRGBA_SSE2( th_ycbcr_buffer ycbcr, U8* buffer, U32 width, U32 height, U32 pitch );
 #endif
 #endif

+ 1 - 1
Engine/source/math/mMathSSE.cpp

@@ -203,7 +203,7 @@ extern "C"
    void SSE_MatrixF_x_MatrixF_Aligned(const F32 *matA, const F32 *matB, F32 *result);
    void SSE_MatrixF_x_MatrixF_Aligned(const F32 *matA, const F32 *matB, F32 *result);
 }
 }
 
 
-#elif defined( TORQUE_COMPILER_GCC ) && defined( TORQUE_CPU_X86 )
+#elif defined( TORQUE_COMPILER_GCC ) && (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
 #define ADD_SSE_FN
 #define ADD_SSE_FN
 
 
 void SSE_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)
 void SSE_MatrixF_x_MatrixF(const F32 *matA, const F32 *matB, F32 *result)

+ 1 - 1
Engine/source/ts/arch/tsMeshIntrinsics.arch.h

@@ -23,7 +23,7 @@
 #ifndef _TSMESHINTRINSICS_ARCH_H_
 #ifndef _TSMESHINTRINSICS_ARCH_H_
 #define _TSMESHINTRINSICS_ARCH_H_
 #define _TSMESHINTRINSICS_ARCH_H_
 
 
-#if defined(TORQUE_CPU_X86)
+#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 )) 
 # // x86 CPU family implementations
 # // x86 CPU family implementations
 extern void zero_vert_normal_bulk_SSE(const dsize_t count, U8 * __restrict const outPtr, const dsize_t outStride);
 extern void zero_vert_normal_bulk_SSE(const dsize_t count, U8 * __restrict const outPtr, const dsize_t outStride);
 #
 #

+ 1 - 1
Engine/source/ts/arch/tsMeshIntrinsics.sse.cpp

@@ -21,7 +21,7 @@
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 #include "ts/tsMesh.h"
 #include "ts/tsMesh.h"
 
 
-#if defined(TORQUE_CPU_X86)
+#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
 #include "ts/tsMeshIntrinsics.h"
 #include "ts/tsMeshIntrinsics.h"
 #include <xmmintrin.h>
 #include <xmmintrin.h>
 
 

+ 1 - 1
Engine/source/ts/arch/tsMeshIntrinsics.sse4.cpp

@@ -21,7 +21,7 @@
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 #include "ts/tsMesh.h"
 #include "ts/tsMesh.h"
 
 
-#if defined(TORQUE_CPU_X86) && (_MSC_VER >= 1500)
+#if (defined(TORQUE_CPU_X86)  || defined( TORQUE_CPU_X64 )) && (_MSC_VER >= 1500)
 #include "ts/tsMeshIntrinsics.h"
 #include "ts/tsMeshIntrinsics.h"
 #include <smmintrin.h>
 #include <smmintrin.h>
 
 

+ 1 - 1
Engine/source/ts/tsMeshIntrinsics.cpp

@@ -65,7 +65,7 @@ MODULE_BEGIN( TSMeshIntrinsics )
       // Find the best implementation for the current CPU
       // Find the best implementation for the current CPU
       if(Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
       if(Platform::SystemInfo.processor.properties & CPU_PROP_SSE)
       {
       {
-   #if defined(TORQUE_CPU_X86)
+   #if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 )) 
          
          
          zero_vert_normal_bulk = zero_vert_normal_bulk_SSE;
          zero_vert_normal_bulk = zero_vert_normal_bulk_SSE;
    #endif
    #endif