Răsfoiți Sursa

Added float4_stx to store single value. Fixed float4 reference implementation itof/ftoi.

bkaradzic 12 ani în urmă
părinte
comite
947742a5d0
5 a modificat fișierele cu 31 adăugiri și 15 ștergeri
  1. 8 4
      include/bx/allocator.h
  2. 13 8
      include/bx/float4_ref.h
  3. 5 0
      include/bx/float4_sse.h
  4. 3 1
      include/bx/os.h
  5. 2 2
      include/bx/string.h

+ 8 - 4
include/bx/allocator.h

@@ -28,6 +28,10 @@
 #	define BX_ALIGNED_FREE(_allocator, _ptr)           bx::alignedFree(_allocator, _ptr)
 #endif // BX_CONFIG_DEBUG_ALLOC
 
+#ifndef BX_CONFIG_ALLOCATOR_NATURAL_ALIGNMENT
+#	define BX_CONFIG_ALLOCATOR_NATURAL_ALIGNMENT 8
+#endif // BX_CONFIG_ALLOCATOR_NATURAL_ALIGNMENT
+
 namespace bx
 {
 	struct BX_NO_VTABLE AllocatorI
@@ -87,11 +91,11 @@ namespace bx
 		return _allocator->alignedRealloc(_ptr, _size, _align, _file, _line);
 	}
 
-	inline void* alignPtr(void* _ptr, size_t _align)
+	inline void* alignPtr(void* _ptr, size_t _extra, size_t _align = BX_CONFIG_ALLOCATOR_NATURAL_ALIGNMENT)
 	{
 		union { void* ptr; size_t addr; } un;
 		un.ptr = _ptr;
-		size_t unaligned = un.addr + sizeof(uint32_t); // space for header
+		size_t unaligned = un.addr + _extra; // space for header
 		size_t mask = _align-1;
 		size_t aligned = BX_ALIGN_MASK(unaligned, mask);
 		un.addr = aligned;
@@ -102,7 +106,7 @@ namespace bx
 	{
 		size_t total = _size + _align;
 		uint8_t* ptr = (uint8_t*)alloc(_allocator, total, _file, _line);
-		uint8_t* aligned = (uint8_t*)alignPtr(ptr, _align);
+		uint8_t* aligned = (uint8_t*)alignPtr(ptr, sizeof(uint32_t), _align);
 		uint32_t* header = (uint32_t*)aligned - 1;
 		*header = uint32_t(aligned - ptr);
 		return aligned;
@@ -128,7 +132,7 @@ namespace bx
 		uint8_t* ptr = aligned - offset;
 		size_t total = _size + _align;
 		ptr = (uint8_t*)realloc(_allocator, ptr, total, _file, _line);
-		uint8_t* newAligned = (uint8_t*)alignPtr(ptr, _align);
+		uint8_t* newAligned = (uint8_t*)alignPtr(ptr, sizeof(uint32_t), _align);
 
 		if (newAligned == aligned)
 		{

+ 13 - 8
include/bx/float4_ref.h

@@ -190,6 +190,11 @@ IMPLEMENT_TEST(xyzw , 0xf);
 		*reinterpret_cast<float4_t*>(_ptr) = _a;
 	}
 
+	BX_FLOAT4_INLINE void float4_stx(void* _ptr, float4_t _a)
+	{
+		*reinterpret_cast<uint32_t*>(_ptr) = _a.uxyzw[0];
+	}
+
 	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
 	{
 		*reinterpret_cast<float4_t*>(_ptr) = _a;
@@ -239,20 +244,20 @@ IMPLEMENT_TEST(xyzw , 0xf);
 	BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
 	{
 		float4_t result;
-		result.fxyzw[0] = (float)result.ixyzw[0];
-		result.fxyzw[1] = (float)result.ixyzw[1];
-		result.fxyzw[2] = (float)result.ixyzw[2];
-		result.fxyzw[3] = (float)result.ixyzw[3];
+		result.fxyzw[0] = (float)_a.ixyzw[0];
+		result.fxyzw[1] = (float)_a.ixyzw[1];
+		result.fxyzw[2] = (float)_a.ixyzw[2];
+		result.fxyzw[3] = (float)_a.ixyzw[3];
 		return result;
 	}
 
 	BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
 	{
 		float4_t result;
-		result.ixyzw[0] = (int)result.fxyzw[0];
-		result.ixyzw[1] = (int)result.fxyzw[1];
-		result.ixyzw[2] = (int)result.fxyzw[2];
-		result.ixyzw[3] = (int)result.fxyzw[3];
+		result.ixyzw[0] = (int)_a.fxyzw[0];
+		result.ixyzw[1] = (int)_a.fxyzw[1];
+		result.ixyzw[2] = (int)_a.fxyzw[2];
+		result.ixyzw[3] = (int)_a.fxyzw[3];
 		return result;
 	}
 

+ 5 - 0
include/bx/float4_sse.h

@@ -143,6 +143,11 @@ IMPLEMENT_TEST(xyzw , 0xf);
 		_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);
 	}
 
+	BX_FLOAT4_INLINE void float4_stx(void* _ptr, float4_t _a)
+	{
+		_mm_store_ss(reinterpret_cast<float*>(_ptr), _a);
+	}
+
 	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
 	{
 		_mm_stream_ps(reinterpret_cast<float*>(_ptr), _a);

+ 3 - 1
include/bx/os.h

@@ -8,7 +8,9 @@
 
 #include "bx.h"
 
-#if BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX || BX_PLATFORM_OSX || BX_PLATFORM_IOS
+#if BX_PLATFORM_WINDOWS
+#	include <windows.h>
+#elif BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX || BX_PLATFORM_OSX || BX_PLATFORM_IOS
 #	include <sched.h> // sched_yield
 #	if BX_PLATFORM_NACL
 #		include <sys/nacl_syscalls.h> // nanosleep

+ 2 - 2
include/bx/string.h

@@ -8,7 +8,7 @@
 
 #include "bx.h"
 #include <alloca.h>
-#include <ctype.h> // tolower
+#include <ctype.h>  // tolower
 #include <stdarg.h> // va_list
 #include <stdio.h>  // vsnprintf, vsnwprintf
 #include <string.h>
@@ -19,7 +19,7 @@ namespace bx
 {
 	inline bool toBool(const char* _str)
 	{
-		char ch = tolower(_str[0]);
+		char ch = (char)tolower(_str[0]);
 		return ch == 't' ||  ch == '1';
 	}