فهرست منبع

Fixed SSE2 compile time detection for MSVC.

bkaradzic 13 سال پیش
والد
کامیت
6d911800f2
7فایلهای تغییر یافته به همراه32 افزوده شده و 13 حذف شده
  1. 0 1
      include/bx/bx.h
  2. 0 8
      include/bx/cpu.h
  3. 15 0
      include/bx/float4_neon.h
  4. 5 0
      include/bx/float4_ref.h
  5. 5 4
      include/bx/float4_sse.h
  6. 4 0
      include/bx/float4_t.h
  7. 3 0
      include/bx/macros.h

+ 0 - 1
include/bx/bx.h

@@ -20,4 +20,3 @@ using namespace bx;
 #endif // BX_NAMESPACE
 
 #endif // __BX_H__ 
-

+ 0 - 8
include/bx/cpu.h

@@ -31,14 +31,6 @@ extern "C" void _ReadWriteBarrier();
 
 namespace bx
 {
-#if BX_COMPILER_MSVC
-#	define BX_CACHE_LINE_ALIGN_MARKER() __declspec(align(BX_CACHE_LINE_SIZE) ) struct {}
-#else
-#	define BX_CACHE_LINE_ALIGN_MARKER() struct {} __attribute__( (__aligned__(BX_CACHE_LINE_SIZE) ) )
-#endif // BX_COMPILER_
-
-#define BX_CACHE_LINE_ALIGN(_def) BX_CACHE_LINE_ALIGN_MARKER(); _def; BX_CACHE_LINE_ALIGN_MARKER()
-
 	inline void readBarrier()
 	{
 #if BX_COMPILER_MSVC

+ 15 - 0
include/bx/float4_neon.h

@@ -104,6 +104,21 @@ namespace bx
 		return _a.fxyzw[3];
 	}
 
+//	BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
+//	{
+//		return _mm_load_ps(reinterpret_cast<const float*>(_ptr) );
+//	}
+
+//	BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
+//	{
+//		_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);
+//	}
+
+//	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
+//	{
+//		_mm_stream_ps(reinterpret_cast<float*>(_ptr), _a);
+//	}
+
 	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
 	{
 		const float32_t val[4] = {_x, _y, _z, _w};

+ 5 - 0
include/bx/float4_ref.h

@@ -190,6 +190,11 @@ IMPLEMENT_TEST(xyzw , 0xf);
 		*reinterpret_cast<float4_t*>(_ptr) = _a;
 	}
 
+	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
+	{
+		*reinterpret_cast<float4_t*>(_ptr) = _a;
+	}
+
 	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
 	{
 		float4_t result;

+ 5 - 4
include/bx/float4_sse.h

@@ -6,10 +6,6 @@
 #ifndef __BX_FLOAT4_SSE_H__
 #define __BX_FLOAT4_SSE_H__
 
-#if !defined(__SSE2__)
-#	error "float4_t requires at least SSE2"
-#endif // !defined(__SSE2__)
-
 #include <stdint.h>
 
 #include <emmintrin.h> // __m128i
@@ -149,6 +145,11 @@ IMPLEMENT_TEST(xyzw , 0xf);
 		_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);
 	}
 
+	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
+	{
+		_mm_stream_ps(reinterpret_cast<float*>(_ptr), _a);
+	}
+
 	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
 	{
 		return _mm_set_ps(_w, _z, _y, _x);

+ 4 - 0
include/bx/float4_t.h

@@ -11,6 +11,10 @@
 
 #define BX_FLOAT4_INLINE BX_FORCE_INLINE
 
+#if BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2)
+#	define __SSE2__
+#endif // BX_COMPILER_
+
 #if defined(__SSE2__)
 #	include "float4_sse.h"
 #elif 0 // __ARM_NEON__

+ 3 - 0
include/bx/macros.h

@@ -57,6 +57,9 @@
 #	error "Unknown BX_COMPILER_?"
 #endif
 
+#define BX_CACHE_LINE_ALIGN_MARKER() BX_ALIGN_STRUCT(BX_CACHE_LINE_SIZE, struct) {}
+#define BX_CACHE_LINE_ALIGN(_def) BX_CACHE_LINE_ALIGN_MARKER(); _def; BX_CACHE_LINE_ALIGN_MARKER()
+
 #define BX_ALIGN_STRUCT_16(_struct) BX_ALIGN_STRUCT(16, _struct)
 #define BX_ALIGN_STRUCT_256(_struct) BX_ALIGN_STRUCT(256, _struct)