13 years ago · 82aa4d3bc6
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,6 @@
 
															+*.cpp eol=lf
														
 
															+*.h   eol=lf
														
 
															+*.sc  eol=lf
														
 
															+*.sh  eol=lf
														
 
															+*.md  eol=lf
														
 
															+*.lua eol=lf
														
--- a/README.md
+++ b/README.md
@@ -1,39 +1,39 @@
 
															-bx

														
 
															-==

														
 
															-

														
 
															-Base library.

														
 
															-

														
 
															-Contact

														
 
															--------

														
 
															-

														
 
															-[@bkaradzic](https://twitter.com/bkaradzic)  

														
 
															-http://www.stuckingeometry.com

														
 
															-

														
 
															-Project page  

														
 
															-https://github.com/bkaradzic/bx

														
 
															-

														
 
															-License

														
 
															--------

														
 
															-

														
 
															-Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															-

														
 
															-Redistribution and use in source and binary forms, with or without modification,

														
 
															-are permitted provided that the following conditions are met:

														
 
															-

														
 
															-   1. Redistributions of source code must retain the above copyright notice, this

														
 
															-      list of conditions and the following disclaimer.

														
 
															-

														
 
															-   2. Redistributions in binary form must reproduce the above copyright notice,

														
 
															-      this list of conditions and the following disclaimer in the documentation

														
 
															-      and/or other materials provided with the distribution.

														
 
															-

														
 
															-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR

														
 
															-IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF

														
 
															-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT

														
 
															-SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

														
 
															-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

														
 
															-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

														
 
															-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,

														
 
															-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE

														
 
															-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED

														
 
															-OF THE POSSIBILITY OF SUCH DAMAGE.

														
 
															+bx
														
 
															+==
														
 
															+
														
 
															+Base library.
														
 
															+
														
 
															+Contact
														
 
															+-------
														
 
															+
														
 
															+[@bkaradzic](https://twitter.com/bkaradzic)  
														
 
															+http://www.stuckingeometry.com
														
 
															+
														
 
															+Project page  
														
 
															+https://github.com/bkaradzic/bx
														
 
															+
														
 
															+License
														
 
															+-------
														
 
															+
														
 
															+Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+
														
 
															+Redistribution and use in source and binary forms, with or without modification,
														
 
															+are permitted provided that the following conditions are met:
														
 
															+
														
 
															+   1. Redistributions of source code must retain the above copyright notice, this
														
 
															+      list of conditions and the following disclaimer.
														
 
															+
														
 
															+   2. Redistributions in binary form must reproduce the above copyright notice,
														
 
															+      this list of conditions and the following disclaimer in the documentation
														
 
															+      and/or other materials provided with the distribution.
														
 
															+
														
 
															+THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
														
 
															+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
														
 
															+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
														
 
															+SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
														
 
															+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
														
 
															+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
														
 
															+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
														
 
															+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
														
 
															+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
														
 
															+OF THE POSSIBILITY OF SUCH DAMAGE.
														
--- a/include/bx/bx.h
+++ b/include/bx/bx.h
@@ -1,23 +1,23 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_H__

														
 
															-#define __BX_H__

														
 
															-

														
 
															-#include <stdint.h>

														
 
															-#include "platform.h"

														
 
															-#include "macros.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-}// namespace bx

														
 
															-

														
 
															-#ifndef BX_NAMESPACE

														
 
															-#	define BX_NAMESPACE 0

														
 
															-#elif BX_NAMESPACE

														
 
															-using namespace bx;

														
 
															-#endif // BX_NAMESPACE

														
 
															-

														
 
															-#endif // __BX_H__ 

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_H__
														
 
															+#define __BX_H__
														
 
															+
														
 
															+#include <stdint.h>
														
 
															+#include "platform.h"
														
 
															+#include "macros.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+}// namespace bx
														
 
															+
														
 
															+#ifndef BX_NAMESPACE
														
 
															+#	define BX_NAMESPACE 0
														
 
															+#elif BX_NAMESPACE
														
 
															+using namespace bx;
														
 
															+#endif // BX_NAMESPACE
														
 
															+
														
 
															+#endif // __BX_H__ 
														
--- a/include/bx/commandline.h
+++ b/include/bx/commandline.h
@@ -1,164 +1,164 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_COMMANDLINE_H__

														
 
															-#define __BX_COMMANDLINE_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-#include "string.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	class CommandLine

														
 
															-	{

														
 
															-	public:

														
 
															-		CommandLine(int _argc, char const* const* _argv)

														
 
															-			: m_argc(_argc)

														
 
															-			, m_argv(_argv)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		const char* findOption(const char* _long, const char* _default) const

														
 
															-		{

														
 
															-			const char* result = find('\0', _long, 1);

														
 
															-			return result == NULL ? _default : result;

														
 
															-		}

														
 
															-

														
 
															-		const char* findOption(const char _short, const char* _long, const char* _default) const

														
 
															-		{

														
 
															-			const char* result = find(_short, _long, 1);

														
 
															-			return result == NULL ? _default : result;

														
 
															-		}

														
 
															-

														
 
															-		const char* findOption(const char* _long, int _numParams = 1) const

														
 
															-		{

														
 
															-			const char* result = find('\0', _long, _numParams);

														
 
															-			return result;

														
 
															-		}

														
 
															-

														
 
															-		const char* findOption(const char _short, const char* _long = NULL, int _numParams = 1) const

														
 
															-		{

														
 
															-			const char* result = find(_short, _long, _numParams);

														
 
															-			return result;

														
 
															-		}

														
 
															-

														
 
															-		bool hasArg(const char _short, const char* _long = NULL) const

														
 
															-		{

														
 
															-			const char* arg = findOption(_short, _long, 0);

														
 
															-			return NULL != arg;

														
 
															-		}

														
 
															-

														
 
															-		bool hasArg(const char* _long) const

														
 
															-		{

														
 
															-			const char* arg = findOption('\0', _long, 0);

														
 
															-			return NULL != arg;

														
 
															-		}

														
 
															-

														
 
															-		bool hasArg(const char*& _value, const char _short, const char* _long = NULL) const

														
 
															-		{

														
 
															-			const char* arg = findOption(_short, _long, 1);

														
 
															-			_value = arg;

														
 
															-			return NULL != arg;

														
 
															-		}

														
 
															-

														
 
															-		bool hasArg(int& _value, const char _short, const char* _long = NULL) const

														
 
															-		{

														
 
															-			const char* arg = findOption(_short, _long, 1);

														
 
															-			if (NULL != arg)

														
 
															-			{

														
 
															-				_value = atoi(arg);

														
 
															-				return true;

														
 
															-			}

														
 
															-

														
 
															-			return false;

														
 
															-		}

														
 
															-

														
 
															-		bool hasArg(unsigned int& _value, const char _short, const char* _long = NULL) const

														
 
															-		{

														
 
															-			const char* arg = findOption(_short, _long, 1);

														
 
															-			if (NULL != arg)

														
 
															-			{

														
 
															-				_value = atoi(arg);

														
 
															-				return true;

														
 
															-			}

														
 
															-

														
 
															-			return false;

														
 
															-		}

														
 
															-

														
 
															-		bool hasArg(bool& _value, const char _short, const char* _long = NULL) const

														
 
															-		{

														
 
															-			const char* arg = findOption(_short, _long, 1);

														
 
															-			if (NULL != arg)

														
 
															-			{

														
 
															-				if ('0' == *arg || stricmp(arg, "false") )

														
 
															-				{

														
 
															-					_value = false;

														
 
															-				}

														
 
															-				else if ('0' != *arg || stricmp(arg, "true") )

														
 
															-				{

														
 
															-					_value = true;

														
 
															-				}

														
 
															-

														
 
															-				return true;

														
 
															-			}

														
 
															-

														
 
															-			return false;

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		const char* find(const char _short, const char* _long, int _numParams) const

														
 
															-		{

														
 
															-			for (int ii = 0; ii < m_argc; ++ii)

														
 
															-			{

														
 
															-				const char* arg = m_argv[ii];

														
 
															-				if ('-' == *arg)

														
 
															-				{

														
 
															-					++arg;

														
 
															-					if (_short == *arg)

														
 
															-					{

														
 
															-						if (1 == strlen(arg) )

														
 
															-						{

														
 
															-							if (0 == _numParams)

														
 
															-							{

														
 
															-								return "";

														
 
															-							}

														
 
															-							else if (ii+_numParams < m_argc

														
 
															-								 && '-' != *m_argv[ii+1] )

														
 
															-							{

														
 
															-								return m_argv[ii+1];

														
 
															-							}

														
 
															-

														
 
															-							return NULL;

														
 
															-						}

														
 
															-					}

														
 
															-					else if (NULL != _long

														
 
															-						 &&  '-' == *arg

														
 
															-						 &&  0 == stricmp(arg+1, _long) )

														
 
															-					{

														
 
															-						if (0 == _numParams)

														
 
															-						{

														
 
															-							return "";

														
 
															-						}

														
 
															-						else if (ii+_numParams < m_argc

														
 
															-								&&  '-' != *m_argv[ii+1] )

														
 
															-						{

														
 
															-							return m_argv[ii+1];

														
 
															-						}

														
 
															-

														
 
															-						return NULL;

														
 
															-					}

														
 
															-				}

														
 
															-			}

														
 
															-

														
 
															-			return NULL;

														
 
															-		}

														
 
															-

														
 
															-		int m_argc;

														
 
															-		char const* const* m_argv;

														
 
															-	};

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif /// __BX_COMMANDLINE_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_COMMANDLINE_H__
														
 
															+#define __BX_COMMANDLINE_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+#include "string.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	class CommandLine
														
 
															+	{
														
 
															+	public:
														
 
															+		CommandLine(int _argc, char const* const* _argv)
														
 
															+			: m_argc(_argc)
														
 
															+			, m_argv(_argv)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		const char* findOption(const char* _long, const char* _default) const
														
 
															+		{
														
 
															+			const char* result = find('\0', _long, 1);
														
 
															+			return result == NULL ? _default : result;
														
 
															+		}
														
 
															+
														
 
															+		const char* findOption(const char _short, const char* _long, const char* _default) const
														
 
															+		{
														
 
															+			const char* result = find(_short, _long, 1);
														
 
															+			return result == NULL ? _default : result;
														
 
															+		}
														
 
															+
														
 
															+		const char* findOption(const char* _long, int _numParams = 1) const
														
 
															+		{
														
 
															+			const char* result = find('\0', _long, _numParams);
														
 
															+			return result;
														
 
															+		}
														
 
															+
														
 
															+		const char* findOption(const char _short, const char* _long = NULL, int _numParams = 1) const
														
 
															+		{
														
 
															+			const char* result = find(_short, _long, _numParams);
														
 
															+			return result;
														
 
															+		}
														
 
															+
														
 
															+		bool hasArg(const char _short, const char* _long = NULL) const
														
 
															+		{
														
 
															+			const char* arg = findOption(_short, _long, 0);
														
 
															+			return NULL != arg;
														
 
															+		}
														
 
															+
														
 
															+		bool hasArg(const char* _long) const
														
 
															+		{
														
 
															+			const char* arg = findOption('\0', _long, 0);
														
 
															+			return NULL != arg;
														
 
															+		}
														
 
															+
														
 
															+		bool hasArg(const char*& _value, const char _short, const char* _long = NULL) const
														
 
															+		{
														
 
															+			const char* arg = findOption(_short, _long, 1);
														
 
															+			_value = arg;
														
 
															+			return NULL != arg;
														
 
															+		}
														
 
															+
														
 
															+		bool hasArg(int& _value, const char _short, const char* _long = NULL) const
														
 
															+		{
														
 
															+			const char* arg = findOption(_short, _long, 1);
														
 
															+			if (NULL != arg)
														
 
															+			{
														
 
															+				_value = atoi(arg);
														
 
															+				return true;
														
 
															+			}
														
 
															+
														
 
															+			return false;
														
 
															+		}
														
 
															+
														
 
															+		bool hasArg(unsigned int& _value, const char _short, const char* _long = NULL) const
														
 
															+		{
														
 
															+			const char* arg = findOption(_short, _long, 1);
														
 
															+			if (NULL != arg)
														
 
															+			{
														
 
															+				_value = atoi(arg);
														
 
															+				return true;
														
 
															+			}
														
 
															+
														
 
															+			return false;
														
 
															+		}
														
 
															+
														
 
															+		bool hasArg(bool& _value, const char _short, const char* _long = NULL) const
														
 
															+		{
														
 
															+			const char* arg = findOption(_short, _long, 1);
														
 
															+			if (NULL != arg)
														
 
															+			{
														
 
															+				if ('0' == *arg || stricmp(arg, "false") )
														
 
															+				{
														
 
															+					_value = false;
														
 
															+				}
														
 
															+				else if ('0' != *arg || stricmp(arg, "true") )
														
 
															+				{
														
 
															+					_value = true;
														
 
															+				}
														
 
															+
														
 
															+				return true;
														
 
															+			}
														
 
															+
														
 
															+			return false;
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		const char* find(const char _short, const char* _long, int _numParams) const
														
 
															+		{
														
 
															+			for (int ii = 0; ii < m_argc; ++ii)
														
 
															+			{
														
 
															+				const char* arg = m_argv[ii];
														
 
															+				if ('-' == *arg)
														
 
															+				{
														
 
															+					++arg;
														
 
															+					if (_short == *arg)
														
 
															+					{
														
 
															+						if (1 == strlen(arg) )
														
 
															+						{
														
 
															+							if (0 == _numParams)
														
 
															+							{
														
 
															+								return "";
														
 
															+							}
														
 
															+							else if (ii+_numParams < m_argc
														
 
															+								 && '-' != *m_argv[ii+1] )
														
 
															+							{
														
 
															+								return m_argv[ii+1];
														
 
															+							}
														
 
															+
														
 
															+							return NULL;
														
 
															+						}
														
 
															+					}
														
 
															+					else if (NULL != _long
														
 
															+						 &&  '-' == *arg
														
 
															+						 &&  0 == stricmp(arg+1, _long) )
														
 
															+					{
														
 
															+						if (0 == _numParams)
														
 
															+						{
														
 
															+							return "";
														
 
															+						}
														
 
															+						else if (ii+_numParams < m_argc
														
 
															+								&&  '-' != *m_argv[ii+1] )
														
 
															+						{
														
 
															+							return m_argv[ii+1];
														
 
															+						}
														
 
															+
														
 
															+						return NULL;
														
 
															+					}
														
 
															+				}
														
 
															+			}
														
 
															+
														
 
															+			return NULL;
														
 
															+		}
														
 
															+
														
 
															+		int m_argc;
														
 
															+		char const* const* m_argv;
														
 
															+	};
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif /// __BX_COMMANDLINE_H__
														
--- a/include/bx/countof.h
+++ b/include/bx/countof.h
@@ -1,19 +1,19 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_COUNTOF_H__

														
 
															-#define __BX_COUNTOF_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	// http://cnicholson.net/2011/01/stupid-c-tricks-a-better-sizeof_array/

														
 
															-	template<typename T, size_t N> char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(const T(&)[N]) )[N];

														
 
															-#define countof(x) sizeof(bx::COUNTOF_REQUIRES_ARRAY_ARGUMENT(x) )

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_COUNTOF_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_COUNTOF_H__
														
 
															+#define __BX_COUNTOF_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	// http://cnicholson.net/2011/01/stupid-c-tricks-a-better-sizeof_array/
														
 
															+	template<typename T, size_t N> char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(const T(&)[N]) )[N];
														
 
															+#define countof(x) sizeof(bx::COUNTOF_REQUIRES_ARRAY_ARGUMENT(x) )
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_COUNTOF_H__
														
--- a/include/bx/cpu.h
+++ b/include/bx/cpu.h
@@ -1,102 +1,102 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_CPU_H__

														
 
															-#define __BX_CPU_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-#if BX_COMPILER_MSVC

														
 
															-#	if BX_PLATFORM_XBOX360

														
 
															-#		include <ppcintrinsics.h>

														
 
															-#		include <xtl.h>

														
 
															-#	else

														
 
															-#		include <math.h> // math.h is included because VS bitches:

														
 
															-						 // warning C4985: 'ceil': attributes not present on previous declaration.

														
 
															-						 // must be included before intrin.h.

														
 
															-#		include <intrin.h>

														
 
															-#		include <windows.h>

														
 
															-#	endif // !BX_PLATFORM_XBOX360

														
 
															-extern "C" void _ReadBarrier();

														
 
															-extern "C" void _WriteBarrier();

														
 
															-extern "C" void _ReadWriteBarrier();

														
 
															-#	pragma intrinsic(_ReadBarrier)

														
 
															-#	pragma intrinsic(_WriteBarrier)

														
 
															-#	pragma intrinsic(_ReadWriteBarrier)

														
 
															-#	pragma intrinsic(_InterlockedIncrement)

														
 
															-#	pragma intrinsic(_InterlockedDecrement)

														
 
															-#endif // BX_COMPILER_MSVC

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	inline void readBarrier()

														
 
															-	{

														
 
															-#if BX_COMPILER_MSVC

														
 
															-		_ReadBarrier();

														
 
															-#elif BX_COMPILER_GCC || BX_COMPILER_CLANG

														
 
															-		asm volatile("":::"memory");

														
 
															-#endif // BX_COMPILER

														
 
															-	}

														
 
															-

														
 
															-	inline void writeBarrier()

														
 
															-	{

														
 
															-#if BX_COMPILER_MSVC

														
 
															-		_WriteBarrier();

														
 
															-#elif BX_COMPILER_GCC || BX_COMPILER_CLANG

														
 
															-		asm volatile("":::"memory");

														
 
															-#endif // BX_COMPILER

														
 
															-	}

														
 
															-

														
 
															-	inline void readWriteBarrier()

														
 
															-	{

														
 
															-#if BX_COMPILER_MSVC

														
 
															-		_ReadWriteBarrier();

														
 
															-#elif BX_COMPILER_GCC || BX_COMPILER_CLANG

														
 
															-		asm volatile("":::"memory");

														
 
															-#endif // BX_COMPILER

														
 
															-	}

														
 
															-

														
 
															-	inline void memoryBarrier()

														
 
															-	{

														
 
															-#if BX_PLATFORM_XBOX360

														
 
															-		__lwsync();

														
 
															-#elif BX_COMPILER_MSVC

														
 
															-		_mm_mfence();

														
 
															-#else

														
 
															-		__sync_synchronize();

														
 
															-//		asm volatile("mfence":::"memory");

														
 
															-#endif // BX_COMPILER

														
 
															-	}

														
 
															-

														
 
															-	inline int32_t atomicIncr(volatile void* _var)

														
 
															-	{

														
 
															-#if BX_COMPILER_MSVC

														
 
															-		return _InterlockedIncrement( (volatile LONG*)(_var) );

														
 
															-#elif BX_COMPILER_GCC || BX_COMPILER_CLANG

														
 
															-		return __sync_fetch_and_add( (volatile int32_t*)_var, 1);

														
 
															-#endif // BX_COMPILER

														
 
															-	}

														
 
															-

														
 
															-	inline int32_t atomicDecr(volatile void* _var)

														
 
															-	{

														
 
															-#if BX_COMPILER_MSVC

														
 
															-		return _InterlockedDecrement( (volatile LONG*)(_var) );

														
 
															-#elif BX_COMPILER_GCC || BX_COMPILER_CLANG

														
 
															-		return __sync_fetch_and_sub( (volatile int32_t*)_var, 1);

														
 
															-#endif // BX_COMPILER

														
 
															-	}

														
 
															-

														
 
															-	inline void* atomicExchangePtr(void** _target, void* _ptr)

														
 
															-	{

														
 
															-#if BX_COMPILER_MSVC

														
 
															-		return InterlockedExchangePointer(_target, _ptr);

														
 
															-#elif BX_COMPILER_GCC || BX_COMPILER_CLANG

														
 
															-		return __sync_lock_test_and_set(_target, _ptr);

														
 
															-#endif // BX_COMPILER

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_CPU_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_CPU_H__
														
 
															+#define __BX_CPU_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+#if BX_COMPILER_MSVC
														
 
															+#	if BX_PLATFORM_XBOX360
														
 
															+#		include <ppcintrinsics.h>
														
 
															+#		include <xtl.h>
														
 
															+#	else
														
 
															+#		include <math.h> // math.h is included because VS bitches:
														
 
															+						 // warning C4985: 'ceil': attributes not present on previous declaration.
														
 
															+						 // must be included before intrin.h.
														
 
															+#		include <intrin.h>
														
 
															+#		include <windows.h>
														
 
															+#	endif // !BX_PLATFORM_XBOX360
														
 
															+extern "C" void _ReadBarrier();
														
 
															+extern "C" void _WriteBarrier();
														
 
															+extern "C" void _ReadWriteBarrier();
														
 
															+#	pragma intrinsic(_ReadBarrier)
														
 
															+#	pragma intrinsic(_WriteBarrier)
														
 
															+#	pragma intrinsic(_ReadWriteBarrier)
														
 
															+#	pragma intrinsic(_InterlockedIncrement)
														
 
															+#	pragma intrinsic(_InterlockedDecrement)
														
 
															+#endif // BX_COMPILER_MSVC
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	inline void readBarrier()
														
 
															+	{
														
 
															+#if BX_COMPILER_MSVC
														
 
															+		_ReadBarrier();
														
 
															+#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
														
 
															+		asm volatile("":::"memory");
														
 
															+#endif // BX_COMPILER
														
 
															+	}
														
 
															+
														
 
															+	inline void writeBarrier()
														
 
															+	{
														
 
															+#if BX_COMPILER_MSVC
														
 
															+		_WriteBarrier();
														
 
															+#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
														
 
															+		asm volatile("":::"memory");
														
 
															+#endif // BX_COMPILER
														
 
															+	}
														
 
															+
														
 
															+	inline void readWriteBarrier()
														
 
															+	{
														
 
															+#if BX_COMPILER_MSVC
														
 
															+		_ReadWriteBarrier();
														
 
															+#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
														
 
															+		asm volatile("":::"memory");
														
 
															+#endif // BX_COMPILER
														
 
															+	}
														
 
															+
														
 
															+	inline void memoryBarrier()
														
 
															+	{
														
 
															+#if BX_PLATFORM_XBOX360
														
 
															+		__lwsync();
														
 
															+#elif BX_COMPILER_MSVC
														
 
															+		_mm_mfence();
														
 
															+#else
														
 
															+		__sync_synchronize();
														
 
															+//		asm volatile("mfence":::"memory");
														
 
															+#endif // BX_COMPILER
														
 
															+	}
														
 
															+
														
 
															+	inline int32_t atomicIncr(volatile void* _var)
														
 
															+	{
														
 
															+#if BX_COMPILER_MSVC
														
 
															+		return _InterlockedIncrement( (volatile LONG*)(_var) );
														
 
															+#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
														
 
															+		return __sync_fetch_and_add( (volatile int32_t*)_var, 1);
														
 
															+#endif // BX_COMPILER
														
 
															+	}
														
 
															+
														
 
															+	inline int32_t atomicDecr(volatile void* _var)
														
 
															+	{
														
 
															+#if BX_COMPILER_MSVC
														
 
															+		return _InterlockedDecrement( (volatile LONG*)(_var) );
														
 
															+#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
														
 
															+		return __sync_fetch_and_sub( (volatile int32_t*)_var, 1);
														
 
															+#endif // BX_COMPILER
														
 
															+	}
														
 
															+
														
 
															+	inline void* atomicExchangePtr(void** _target, void* _ptr)
														
 
															+	{
														
 
															+#if BX_COMPILER_MSVC
														
 
															+		return InterlockedExchangePointer(_target, _ptr);
														
 
															+#elif BX_COMPILER_GCC || BX_COMPILER_CLANG
														
 
															+		return __sync_lock_test_and_set(_target, _ptr);
														
 
															+#endif // BX_COMPILER
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_CPU_H__
														
--- a/include/bx/debug.h
+++ b/include/bx/debug.h
@@ -1,31 +1,31 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_DEBUG_H__

														
 
															-#define __BX_DEBUG_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	inline void debugBreak()

														
 
															-	{

														
 
															-#if BX_COMPILER_MSVC

														
 
															-		__debugbreak();

														
 
															-#elif BX_CPU_ARM

														
 
															-		asm("bkpt 0");

														
 
															-#elif !BX_PLATFORM_NACL && BX_CPU_X86 && (BX_COMPILER_GCC || BX_COMPILER_CLANG)

														
 
															-		// NaCl doesn't like int 3:

														
 
															-		// NativeClient: NaCl module load failed: Validation failure. File violates Native Client safety rules.

														
 
															-		__asm__ ("int $3");

														
 
															-#else // cross platform implementation

														
 
															-		int* int3 = (int*)3L;

														
 
															-		*int3 = 3;

														
 
															-#endif // BX

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_DEBUG_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_DEBUG_H__
														
 
															+#define __BX_DEBUG_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	inline void debugBreak()
														
 
															+	{
														
 
															+#if BX_COMPILER_MSVC
														
 
															+		__debugbreak();
														
 
															+#elif BX_CPU_ARM
														
 
															+		asm("bkpt 0");
														
 
															+#elif !BX_PLATFORM_NACL && BX_CPU_X86 && (BX_COMPILER_GCC || BX_COMPILER_CLANG)
														
 
															+		// NaCl doesn't like int 3:
														
 
															+		// NativeClient: NaCl module load failed: Validation failure. File violates Native Client safety rules.
														
 
															+		__asm__ ("int $3");
														
 
															+#else // cross platform implementation
														
 
															+		int* int3 = (int*)3L;
														
 
															+		*int3 = 3;
														
 
															+#endif // BX
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_DEBUG_H__
														
--- a/include/bx/endian.h
+++ b/include/bx/endian.h
@@ -1,71 +1,71 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_ENDIAN_H__

														
 
															-#define __BX_ENDIAN_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	inline uint16_t endianSwap(uint16_t _in)

														
 
															-	{

														
 
															-		return (_in>>8) | (_in<<8);

														
 
															-	}

														
 
															-	

														
 
															-	inline uint32_t endianSwap(uint32_t _in)

														
 
															-	{

														
 
															-		return (_in>>24) | (_in<<24)

														
 
															-			 | ( (_in&0x00ff0000)>>8) | ( (_in&0x0000ff00)<<8)

														
 
															-			 ;

														
 
															-	}

														
 
															-

														
 
															-	inline uint64_t endianSwap(uint64_t _in)

														
 
															-	{

														
 
															-		return (_in>>56) | (_in<<56)

														
 
															-			 | ( (_in&UINT64_C(0x00ff000000000000) )>>40) | ( (_in&UINT64_C(0x000000000000ff00) )<<40)

														
 
															-			 | ( (_in&UINT64_C(0x0000ff0000000000) )>>24) | ( (_in&UINT64_C(0x0000000000ff0000) )<<24)

														
 
															-			 | ( (_in&UINT64_C(0x000000ff00000000) )>>8)  | ( (_in&UINT64_C(0x00000000ff000000) )<<8)

														
 
															-			 ;

														
 
															-	}

														
 
															-

														
 
															-	inline int16_t endianSwap(int16_t _in)

														
 
															-	{

														
 
															-		return (int16_t)endianSwap( (uint16_t)_in);

														
 
															-	}

														
 
															-

														
 
															-	inline int32_t endianSwap(int32_t _in)

														
 
															-	{

														
 
															-		return (int32_t)endianSwap( (uint32_t)_in);

														
 
															-	}

														
 
															-

														
 
															-	inline int64_t endianSwap(int64_t _in)

														
 
															-	{

														
 
															-		return (int64_t)endianSwap( (uint64_t)_in);

														
 
															-	}

														
 
															-

														
 
															-	template <typename Ty>

														
 
															-	inline Ty littleEndian(Ty& _in)

														
 
															-	{

														
 
															-#if BX_CPU_ENDIAN_BIG

														
 
															-		endianSwap(_in);

														
 
															-#else

														
 
															-		return _in;

														
 
															-#endif // BX_CPU_ENDIAN_BIG

														
 
															-	}

														
 
															-

														
 
															-	template <typename Ty>

														
 
															-	inline Ty bigEndian(Ty& _in)

														
 
															-	{

														
 
															-#if BX_CPU_ENDIAN_LITTLE

														
 
															-		return endianSwap(_in);

														
 
															-#else

														
 
															-		return _in;

														
 
															-#endif // BX_CPU_ENDIAN_LITTLE

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_ENDIAN_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_ENDIAN_H__
														
 
															+#define __BX_ENDIAN_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	inline uint16_t endianSwap(uint16_t _in)
														
 
															+	{
														
 
															+		return (_in>>8) | (_in<<8);
														
 
															+	}
														
 
															+	
														
 
															+	inline uint32_t endianSwap(uint32_t _in)
														
 
															+	{
														
 
															+		return (_in>>24) | (_in<<24)
														
 
															+			 | ( (_in&0x00ff0000)>>8) | ( (_in&0x0000ff00)<<8)
														
 
															+			 ;
														
 
															+	}
														
 
															+
														
 
															+	inline uint64_t endianSwap(uint64_t _in)
														
 
															+	{
														
 
															+		return (_in>>56) | (_in<<56)
														
 
															+			 | ( (_in&UINT64_C(0x00ff000000000000) )>>40) | ( (_in&UINT64_C(0x000000000000ff00) )<<40)
														
 
															+			 | ( (_in&UINT64_C(0x0000ff0000000000) )>>24) | ( (_in&UINT64_C(0x0000000000ff0000) )<<24)
														
 
															+			 | ( (_in&UINT64_C(0x000000ff00000000) )>>8)  | ( (_in&UINT64_C(0x00000000ff000000) )<<8)
														
 
															+			 ;
														
 
															+	}
														
 
															+
														
 
															+	inline int16_t endianSwap(int16_t _in)
														
 
															+	{
														
 
															+		return (int16_t)endianSwap( (uint16_t)_in);
														
 
															+	}
														
 
															+
														
 
															+	inline int32_t endianSwap(int32_t _in)
														
 
															+	{
														
 
															+		return (int32_t)endianSwap( (uint32_t)_in);
														
 
															+	}
														
 
															+
														
 
															+	inline int64_t endianSwap(int64_t _in)
														
 
															+	{
														
 
															+		return (int64_t)endianSwap( (uint64_t)_in);
														
 
															+	}
														
 
															+
														
 
															+	template <typename Ty>
														
 
															+	inline Ty littleEndian(Ty& _in)
														
 
															+	{
														
 
															+#if BX_CPU_ENDIAN_BIG
														
 
															+		endianSwap(_in);
														
 
															+#else
														
 
															+		return _in;
														
 
															+#endif // BX_CPU_ENDIAN_BIG
														
 
															+	}
														
 
															+
														
 
															+	template <typename Ty>
														
 
															+	inline Ty bigEndian(Ty& _in)
														
 
															+	{
														
 
															+#if BX_CPU_ENDIAN_LITTLE
														
 
															+		return endianSwap(_in);
														
 
															+#else
														
 
															+		return _in;
														
 
															+#endif // BX_CPU_ENDIAN_LITTLE
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_ENDIAN_H__
														
--- a/include/bx/float4_neon.h
+++ b/include/bx/float4_neon.h
@@ -1,244 +1,244 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_FLOAT4_NEON_H__

														
 
															-#define __BX_FLOAT4_NEON_H__

														
 
															-

														
 
															-#include <arm_neon.h>

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-

														
 
															-// Reference:

														
 
															-// http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html

														
 
															-// http://blogs.arm.com/software-enablement/161-coding-for-neon-part-1-load-and-stores/

														
 
															-// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/

														
 
															-// http://blogs.arm.com/software-enablement/241-coding-for-neon-part-3-matrix-multiplication/

														
 
															-// http://blogs.arm.com/software-enablement/277-coding-for-neon-part-4-shifting-left-and-right/

														
 
															-// http://blogs.arm.com/software-enablement/684-coding-for-neon-part-5-rearranging-vectors/

														
 
															-

														
 
															-	typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) );

														
 
															-

														
 
															-#define ELEMx 0

														
 
															-#define ELEMy 1

														
 
															-#define ELEMz 2

														
 
															-#define ELEMw 3

														
 
															-#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \

														
 
															-			BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \

														
 
															-			{ \

														
 
															-				float4_t result; \

														
 
															-				result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \

														
 
															-				result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \

														
 
															-				result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \

														
 
															-				result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \

														
 
															-				return result; \

														
 
															-			}

														
 
															-

														
 
															-#include "float4_swizzle.inl"

														
 
															-

														
 
															-#undef IMPLEMENT_SWIZZLE

														
 
															-#undef ELEMw

														
 
															-#undef ELEMz

														
 
															-#undef ELEMy

														
 
															-#undef ELEMx

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _a; //_mm_movelh_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _a; //_mm_movelh_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _a; //_mm_movehl_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _a; //_mm_movehl_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _a; //_mm_unpacklo_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _a; //_mm_unpacklo_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _a; //_mm_unpackhi_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _a; //_mm_unpackhi_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_x(float4_t _a)

														
 
															-	{

														
 
															-		return _a.fxyzw[0];

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_y(float4_t _a)

														
 
															-	{

														
 
															-		return _a.fxyzw[1];

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_z(float4_t _a)

														
 
															-	{

														
 
															-		return _a.fxyzw[2];

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_w(float4_t _a)

														
 
															-	{

														
 
															-		return _a.fxyzw[3];

														
 
															-	}

														
 
															-

														
 
															-//	BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)

														
 
															-//	{

														
 
															-//		return _mm_load_ps(reinterpret_cast<const float*>(_ptr) );

														
 
															-//	}

														
 
															-

														
 
															-//	BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)

														
 
															-//	{

														
 
															-//		_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);

														
 
															-//	}

														
 
															-

														
 
															-//	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)

														
 
															-//	{

														
 
															-//		_mm_stream_ps(reinterpret_cast<float*>(_ptr), _a);

														
 
															-//	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)

														
 
															-	{

														
 
															-		const float32_t val[4] = {_x, _y, _z, _w};

														
 
															-		return __builtin_neon_vld1v4sf(val);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)

														
 
															-	{

														
 
															-		const uint32_t val[4] = {_x, _y, _z, _w};

														
 
															-		return (float4_t)__builtin_neon_vld1v4si( (const __builtin_neon_si*)val);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_splat(float _a)

														
 
															-	{

														
 
															-		return __builtin_neon_vdup_nv4sf(_a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)

														
 
															-	{

														
 
															-		return (float4_t)__builtin_neon_vdup_nv4si( (__builtin_neon_si)_a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_zero()

														
 
															-	{

														
 
															-		return vdupq_n_f32(0.0f);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return vaddq_f32(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return vsubq_f32(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return vmulq_f32(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)

														
 
															-	{

														
 
															-		return vrecpeq_f32(_a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)

														
 
															-	{

														
 
															-		return vrsqrteq_f32(_a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return (float4_t)__builtin_neon_vandv4si( (int32x4_t)_a, (int32x4_t)_b, 0);

														
 
															-	}

														
 
															-

														
 
															-	//BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)

														
 
															-	//{

														
 
															-	//	return _mm_andnot_ps(_b, _a);

														
 
															-	//}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return (float4_t)__builtin_neon_vorrv4si( (int32x4_t)_a, (int32x4_t)_b, 0);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);

														
 
															-		const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);

														
 
															-		const uint32x4_t add  = vaddq_u32(tmp0, tmp1);

														
 
															-		const float4_t result = vreinterpretq_f32_u32(add);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);

														
 
															-		const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);

														
 
															-		const uint32x4_t sub  = vsubq_u32(tmp0, tmp1);

														
 
															-		const float4_t result = vreinterpretq_f32_u32(sub);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)

														
 
															-	{

														
 
															-		const uint32x4_t tmp   = vreinterpretq_u32_f32(_a);

														
 
															-		const uint32x4_t shift = vshlq_n_u32(tmp, _count);

														
 
															-		const float4_t result  = vreinterpretq_f32_u32(shift);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)

														
 
															-	{

														
 
															-		const uint32x4_t tmp   = vreinterpretq_i32_f32(_a);

														
 
															-		const uint32x4_t shift = (uint32x4_t)__builtin_neon_vshr_nv4si( (int32x4_t)tmp, _count, 0);

														
 
															-		const float4_t result  = vreinterpretq_f32_u32(shift);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)

														
 
															-	{

														
 
															-		const int32x4_t a     = vreinterpretq_s32_f32(_a);

														
 
															-		const int32x4_t shift = __builtin_neon_vshr_nv4si(a, _count, 1);

														
 
															-		const float4_t result = vreinterpretq_f32_s32(shift);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#define float4_div_nr float4_div_nr_ni

														
 
															-#define float4_div float4_div_nr_ni

														
 
															-#define float4_ceil float4_ceil_ni

														
 
															-#define float4_floor float4_floor_ni

														
 
															-#include "float4_ni.h"

														
 
															-

														
 
															-#endif // __BX_FLOAT4_NEON_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_FLOAT4_NEON_H__
														
 
															+#define __BX_FLOAT4_NEON_H__
														
 
															+
														
 
															+#include <arm_neon.h>
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+
														
 
															+// Reference:
														
 
															+// http://gcc.gnu.org/onlinedocs/gcc/ARM-NEON-Intrinsics.html
														
 
															+// http://blogs.arm.com/software-enablement/161-coding-for-neon-part-1-load-and-stores/
														
 
															+// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/
														
 
															+// http://blogs.arm.com/software-enablement/241-coding-for-neon-part-3-matrix-multiplication/
														
 
															+// http://blogs.arm.com/software-enablement/277-coding-for-neon-part-4-shifting-left-and-right/
														
 
															+// http://blogs.arm.com/software-enablement/684-coding-for-neon-part-5-rearranging-vectors/
														
 
															+
														
 
															+	typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) );
														
 
															+
														
 
															+#define ELEMx 0
														
 
															+#define ELEMy 1
														
 
															+#define ELEMz 2
														
 
															+#define ELEMw 3
														
 
															+#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
														
 
															+			BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
														
 
															+			{ \
														
 
															+				float4_t result; \
														
 
															+				result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \
														
 
															+				result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \
														
 
															+				result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \
														
 
															+				result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \
														
 
															+				return result; \
														
 
															+			}
														
 
															+
														
 
															+#include "float4_swizzle.inl"
														
 
															+
														
 
															+#undef IMPLEMENT_SWIZZLE
														
 
															+#undef ELEMw
														
 
															+#undef ELEMz
														
 
															+#undef ELEMy
														
 
															+#undef ELEMx
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _a; //_mm_movelh_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _a; //_mm_movelh_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _a; //_mm_movehl_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _a; //_mm_movehl_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _a; //_mm_unpacklo_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _a; //_mm_unpacklo_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _a; //_mm_unpackhi_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _a; //_mm_unpackhi_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_x(float4_t _a)
														
 
															+	{
														
 
															+		return _a.fxyzw[0];
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_y(float4_t _a)
														
 
															+	{
														
 
															+		return _a.fxyzw[1];
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_z(float4_t _a)
														
 
															+	{
														
 
															+		return _a.fxyzw[2];
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_w(float4_t _a)
														
 
															+	{
														
 
															+		return _a.fxyzw[3];
														
 
															+	}
														
 
															+
														
 
															+//	BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
														
 
															+//	{
														
 
															+//		return _mm_load_ps(reinterpret_cast<const float*>(_ptr) );
														
 
															+//	}
														
 
															+
														
 
															+//	BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
														
 
															+//	{
														
 
															+//		_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);
														
 
															+//	}
														
 
															+
														
 
															+//	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
														
 
															+//	{
														
 
															+//		_mm_stream_ps(reinterpret_cast<float*>(_ptr), _a);
														
 
															+//	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
														
 
															+	{
														
 
															+		const float32_t val[4] = {_x, _y, _z, _w};
														
 
															+		return __builtin_neon_vld1v4sf(val);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
														
 
															+	{
														
 
															+		const uint32_t val[4] = {_x, _y, _z, _w};
														
 
															+		return (float4_t)__builtin_neon_vld1v4si( (const __builtin_neon_si*)val);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_splat(float _a)
														
 
															+	{
														
 
															+		return __builtin_neon_vdup_nv4sf(_a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
														
 
															+	{
														
 
															+		return (float4_t)__builtin_neon_vdup_nv4si( (__builtin_neon_si)_a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_zero()
														
 
															+	{
														
 
															+		return vdupq_n_f32(0.0f);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return vaddq_f32(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return vsubq_f32(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return vmulq_f32(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
														
 
															+	{
														
 
															+		return vrecpeq_f32(_a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
														
 
															+	{
														
 
															+		return vrsqrteq_f32(_a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return (float4_t)__builtin_neon_vandv4si( (int32x4_t)_a, (int32x4_t)_b, 0);
														
 
															+	}
														
 
															+
														
 
															+	//BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
														
 
															+	//{
														
 
															+	//	return _mm_andnot_ps(_b, _a);
														
 
															+	//}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return (float4_t)__builtin_neon_vorrv4si( (int32x4_t)_a, (int32x4_t)_b, 0);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);
														
 
															+		const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);
														
 
															+		const uint32x4_t add  = vaddq_u32(tmp0, tmp1);
														
 
															+		const float4_t result = vreinterpretq_f32_u32(add);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const uint32x4_t tmp0 = vreinterpretq_u32_f32(_a);
														
 
															+		const uint32x4_t tmp1 = vreinterpretq_u32_f32(_b);
														
 
															+		const uint32x4_t sub  = vsubq_u32(tmp0, tmp1);
														
 
															+		const float4_t result = vreinterpretq_f32_u32(sub);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
														
 
															+	{
														
 
															+		const uint32x4_t tmp   = vreinterpretq_u32_f32(_a);
														
 
															+		const uint32x4_t shift = vshlq_n_u32(tmp, _count);
														
 
															+		const float4_t result  = vreinterpretq_f32_u32(shift);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
														
 
															+	{
														
 
															+		const uint32x4_t tmp   = vreinterpretq_i32_f32(_a);
														
 
															+		const uint32x4_t shift = (uint32x4_t)__builtin_neon_vshr_nv4si( (int32x4_t)tmp, _count, 0);
														
 
															+		const float4_t result  = vreinterpretq_f32_u32(shift);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
														
 
															+	{
														
 
															+		const int32x4_t a     = vreinterpretq_s32_f32(_a);
														
 
															+		const int32x4_t shift = __builtin_neon_vshr_nv4si(a, _count, 1);
														
 
															+		const float4_t result = vreinterpretq_f32_s32(shift);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#define float4_div_nr float4_div_nr_ni
														
 
															+#define float4_div float4_div_nr_ni
														
 
															+#define float4_ceil float4_ceil_ni
														
 
															+#define float4_floor float4_floor_ni
														
 
															+#include "float4_ni.h"
														
 
															+
														
 
															+#endif // __BX_FLOAT4_NEON_H__
														
--- a/include/bx/float4_ni.h
+++ b/include/bx/float4_ni.h
@@ -1,431 +1,431 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_FLOAT4_NI_H__

														
 
															-#define __BX_FLOAT4_NI_H__

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_xAzC_ni(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t xAyB   = float4_shuf_xAyB(_a, _b);

														
 
															-		const float4_t zCwD   = float4_shuf_zCwD(_a, _b);

														
 
															-		const float4_t result = float4_shuf_xyAB(xAyB, zCwD);

														
 
															-		

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_yBwD_ni(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t xAyB   = float4_shuf_xAyB(_a, _b);

														
 
															-		const float4_t zCwD   = float4_shuf_zCwD(_a, _b);

														
 
															-		const float4_t result = float4_shuf_zwCD(xAyB, zCwD);

														
 
															-		

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_madd_ni(float4_t _a, float4_t _b, float4_t _c)

														
 
															-	{

														
 
															-		const float4_t mul    = float4_mul(_a, _b);

														
 
															-		const float4_t result = float4_add(mul, _c);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_nmsub_ni(float4_t _a, float4_t _b, float4_t _c)

														
 
															-	{

														
 
															-		const float4_t mul    = float4_mul(_a, _b);

														
 
															-		const float4_t result = float4_sub(_c, mul);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_div_nr_ni(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t oneish  = float4_isplat(0x3f800001);

														
 
															-		const float4_t est     = float4_rcp_est(_b);

														
 
															-		const float4_t iter0   = float4_mul(_a, est);

														
 
															-		const float4_t tmp1    = float4_nmsub(_b, est, oneish);

														
 
															-		const float4_t result  = float4_madd(tmp1, iter0, iter0);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rcp_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t one    = float4_splat(1.0f);

														
 
															-		const float4_t result = float4_div(one, _a);

														
 
															-		

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_orx_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t zwxy   = float4_swiz_zwxy(_a);

														
 
															-		const float4_t tmp0   = float4_or(_a, zwxy);

														
 
															-		const float4_t tmp1   = float4_swiz_yyyy(_a);

														
 
															-		const float4_t tmp2   = float4_or(tmp0, tmp1);

														
 
															-		const float4_t mf000  = float4_ild(-1, 0, 0, 0);

														
 
															-		const float4_t result = float4_and(tmp2, mf000);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_orc_ni(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t aorb   = float4_or(_a, _b);

														
 
															-		const float4_t mffff  = float4_isplat(-1);

														
 
															-		const float4_t result = float4_xor(aorb, mffff);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_neg_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t zero   = float4_zero();

														
 
															-		const float4_t result = float4_sub(zero, _a);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_selb_ni(float4_t _mask, float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t sel_a  = float4_and(_a, _mask);

														
 
															-		const float4_t sel_b  = float4_andc(_b, _mask);

														
 
															-		const float4_t result = float4_or(sel_a, sel_b);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sels_ni(float4_t _test, float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t mask   = float4_sra(_test, 31);

														
 
															-		const float4_t result = float4_selb(mask, _a, _b);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_not_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t mffff  = float4_isplat(-1);

														
 
															-		const float4_t result = float4_xor(_a, mffff);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_abs_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t a_neg  = float4_neg(_a);

														
 
															-		const float4_t result = float4_max(a_neg, _a);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_clamp_ni(float4_t _a, float4_t _min, float4_t _max)

														
 
															-	{

														
 
															-		const float4_t tmp    = float4_min(_a, _max);

														
 
															-		const float4_t result = float4_max(tmp, _min);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_lerp_ni(float4_t _a, float4_t _b, float4_t _s)

														
 
															-	{

														
 
															-		const float4_t ba     = float4_sub(_b, _a);

														
 
															-		const float4_t result = float4_madd(_s, ba, _a);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sqrt_nr_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t half   = float4_splat(0.5f);

														
 
															-		const float4_t one    = float4_splat(1.0f);

														
 
															-		const float4_t zero   = float4_zero();

														
 
															-		const float4_t tmp0   = float4_rsqrt_est(_a);

														
 
															-		const float4_t tmp1   = float4_madd(tmp0, _a, zero);

														
 
															-		const float4_t tmp2   = float4_madd(tmp1, half, zero);

														
 
															-		const float4_t tmp3   = float4_nmsub(tmp0, tmp1, one);

														
 
															-		const float4_t result = float4_madd(tmp3, tmp2, tmp1);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t one    = float4_splat(1.0f);

														
 
															-		const float4_t sqrt   = float4_sqrt(_a);

														
 
															-		const float4_t result = float4_div(one, sqrt);

														
 
															-		

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rsqrt_nr_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t rsqrt           = float4_rsqrt_est(_a);

														
 
															-		const float4_t iter0           = float4_mul(_a, rsqrt);

														
 
															-		const float4_t iter1           = float4_mul(iter0, rsqrt);

														
 
															-		const float4_t half            = float4_splat(0.5f);

														
 
															-		const float4_t half_rsqrt      = float4_mul(half, rsqrt);

														
 
															-		const float4_t three           = float4_splat(3.0f);

														
 
															-		const float4_t three_sub_iter1 = float4_sub(three, iter1);

														
 
															-		const float4_t result          = float4_mul(half_rsqrt, three_sub_iter1);

														
 
															-		

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rsqrt_carmack_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t half    = float4_splat(0.5f);

														
 
															-		const float4_t ah      = float4_mul(half, _a);

														
 
															-		const float4_t ashift  = float4_sra(_a, 1);

														
 
															-		const float4_t magic   = float4_isplat(0x5f3759df);

														
 
															-		const float4_t msuba   = float4_isub(magic, ashift);

														
 
															-		const float4_t msubasq = float4_mul(msuba, msuba);

														
 
															-		const float4_t tmp0    = float4_splat(1.5f);

														
 
															-		const float4_t tmp1    = float4_mul(ah, msubasq);

														
 
															-		const float4_t tmp2    = float4_sub(tmp0, tmp1);

														
 
															-		const float4_t result  = float4_mul(msuba, tmp2);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	namespace float4_logexp_detail

														
 
															-	{

														
 
															-		BX_FLOAT4_INLINE float4_t float4_poly0(float4_t _a, float _b)

														
 
															-		{

														
 
															-			return float4_splat(_b);

														
 
															-		}

														
 
															-

														
 
															-		BX_FLOAT4_INLINE float4_t float4_poly1(float4_t _a, float _b, float _c)

														
 
															-		{

														
 
															-			const float4_t bbbb   = float4_splat(_b);

														
 
															-			const float4_t poly0  = float4_poly0(_a, _c);

														
 
															-			const float4_t result = float4_madd(poly0, _a, bbbb);

														
 
															-

														
 
															-			return result;

														
 
															-		}

														
 
															-

														
 
															-		BX_FLOAT4_INLINE float4_t float4_poly2(float4_t _a, float _b, float _c, float _d)

														
 
															-		{

														
 
															-			const float4_t bbbb   = float4_splat(_b);

														
 
															-			const float4_t poly   = float4_poly1(_a, _c, _d);

														
 
															-			const float4_t result = float4_madd(poly, _a, bbbb);

														
 
															-

														
 
															-			return result;

														
 
															-		}

														
 
															-

														
 
															-		BX_FLOAT4_INLINE float4_t float4_poly3(float4_t _a, float _b, float _c, float _d, float _e)

														
 
															-		{

														
 
															-			const float4_t bbbb   = float4_splat(_b);

														
 
															-			const float4_t poly   = float4_poly2(_a, _c, _d, _e);

														
 
															-			const float4_t result = float4_madd(poly, _a, bbbb);

														
 
															-

														
 
															-			return result;

														
 
															-		}

														
 
															-

														
 
															-		BX_FLOAT4_INLINE float4_t float4_poly4(float4_t _a, float _b, float _c, float _d, float _e, float _f)

														
 
															-		{

														
 
															-			const float4_t bbbb   = float4_splat(_b);

														
 
															-			const float4_t poly   = float4_poly3(_a, _c, _d, _e, _f);

														
 
															-			const float4_t result = float4_madd(poly, _a, bbbb);

														
 
															-

														
 
															-			return result;

														
 
															-		}

														
 
															-

														
 
															-		BX_FLOAT4_INLINE float4_t float4_poly5(float4_t _a, float _b, float _c, float _d, float _e, float _f, float _g)

														
 
															-		{

														
 
															-			const float4_t bbbb   = float4_splat(_b);

														
 
															-			const float4_t poly   = float4_poly4(_a, _c, _d, _e, _f, _g);

														
 
															-			const float4_t result = float4_madd(poly, _a, bbbb);

														
 
															-

														
 
															-			return result;

														
 
															-		}

														
 
															-

														
 
															-		BX_FLOAT4_INLINE float4_t float4_logpoly(float4_t _a)

														
 
															-		{

														
 
															-#if 1

														
 
															-			const float4_t result = float4_poly5(_a

														
 
															-				, 3.11578814719469302614f, -3.32419399085241980044f

														
 
															-				, 2.59883907202499966007f, -1.23152682416275988241f

														
 
															-				, 0.318212422185251071475f, -0.0344359067839062357313f

														
 
															-				);

														
 
															-#elif 0

														
 
															-			const float4_t result = float4_poly4(_a

														
 
															-				, 2.8882704548164776201f, -2.52074962577807006663f

														
 
															-				, 1.48116647521213171641f, -0.465725644288844778798f

														
 
															-				, 0.0596515482674574969533f

														
 
															-				);

														
 
															-#elif 0

														
 
															-			const float4_t result = float4_poly3(_a

														
 
															-				, 2.61761038894603480148f, -1.75647175389045657003f

														
 
															-				, 0.688243882994381274313f, -0.107254423828329604454f

														
 
															-				);

														
 
															-#else

														
 
															-			const float4_t result = float4_poly2(_a

														
 
															-				, 2.28330284476918490682f, -1.04913055217340124191f

														
 
															-				, 0.204446009836232697516f

														
 
															-				);

														
 
															-#endif

														
 
															-

														
 
															-			return result;

														
 
															-		}

														
 
															-

														
 
															-		BX_FLOAT4_INLINE float4_t float4_exppoly(float4_t _a)

														
 
															-		{

														
 
															-#if 1

														
 
															-			const float4_t result = float4_poly5(_a

														
 
															-				, 9.9999994e-1f, 6.9315308e-1f

														
 
															-				, 2.4015361e-1f, 5.5826318e-2f

														
 
															-				, 8.9893397e-3f, 1.8775767e-3f

														
 
															-				);

														
 
															-#elif 0

														
 
															-			const float4_t result = float4_poly4(_a

														
 
															-				, 1.0000026f, 6.9300383e-1f

														
 
															-				, 2.4144275e-1f, 5.2011464e-2f

														
 
															-				, 1.3534167e-2f

														
 
															-				);

														
 
															-#elif 0

														
 
															-			const float4_t result = float4_poly3(_a

														
 
															-				, 9.9992520e-1f, 6.9583356e-1f

														
 
															-				, 2.2606716e-1f, 7.8024521e-2f

														
 
															-				);

														
 
															-#else

														
 
															-			const float4_t result = float4_poly2(_a

														
 
															-				, 1.0017247f, 6.5763628e-1f

														
 
															-				, 3.3718944e-1f

														
 
															-				);

														
 
															-#endif // 0

														
 
															-

														
 
															-			return result;

														
 
															-		}

														
 
															-	} // namespace float4_internal

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_log2_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t expmask  = float4_isplat(0x7f800000);

														
 
															-		const float4_t mantmask = float4_isplat(0x007fffff);

														
 
															-		const float4_t one      = float4_splat(1.0f);

														
 
															-

														
 
															-		const float4_t c127     = float4_isplat(127);

														
 
															-		const float4_t aexp     = float4_and(_a, expmask);

														
 
															-		const float4_t aexpsr   = float4_srl(aexp, 23);

														
 
															-		const float4_t tmp0     = float4_isub(aexpsr, c127);

														
 
															-		const float4_t exp      = float4_itof(tmp0);

														
 
															-

														
 
															-		const float4_t amask    = float4_and(_a, mantmask);

														
 
															-		const float4_t mant     = float4_or(amask, one);

														
 
															-

														
 
															-		const float4_t poly     = float4_logexp_detail::float4_logpoly(mant);

														
 
															-

														
 
															-		const float4_t mandiff  = float4_sub(mant, one);

														
 
															-		const float4_t result   = float4_madd(poly, mandiff, exp);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_exp2_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t min      = float4_splat( 129.0f);

														
 
															-		const float4_t max      = float4_splat(-126.99999f);

														
 
															-		const float4_t tmp0     = float4_min(_a, min);

														
 
															-		const float4_t aaaa     = float4_max(tmp0, max);

														
 
															-

														
 
															-		const float4_t half     = float4_splat(0.5f);

														
 
															-		const float4_t tmp2     = float4_sub(aaaa, half);

														
 
															-		const float4_t ipart    = float4_ftoi(tmp2);

														
 
															-		const float4_t iround   = float4_itof(ipart);

														
 
															-		const float4_t fpart    = float4_sub(aaaa, iround);

														
 
															-

														
 
															-		const float4_t c127     = float4_isplat(127);

														
 
															-		const float4_t tmp5     = float4_iadd(ipart, c127);

														
 
															-		const float4_t expipart = float4_sll(tmp5, 23);

														
 
															-

														
 
															-		const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart);

														
 
															-

														
 
															-		const float4_t result   = float4_mul(expipart, expfpart);

														
 
															-		

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_pow_ni(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t alog2  = float4_log2(_a);

														
 
															-		const float4_t alog2b = float4_mul(alog2, _b);

														
 
															-		const float4_t result = float4_exp2(alog2b);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_dot3_ni(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t xyzw   = float4_mul(_a, _b);

														
 
															-		const float4_t xxxx   = float4_swiz_xxxx(xyzw);

														
 
															-		const float4_t yyyy   = float4_swiz_yyyy(xyzw);

														
 
															-		const float4_t zzzz   = float4_swiz_zzzz(xyzw);

														
 
															-		const float4_t tmp1   = float4_add(xxxx, yyyy);

														
 
															-		const float4_t result = float4_add(zzzz, tmp1);

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t a_yzxw = float4_swiz_yzxw(_a);

														
 
															-		const float4_t a_zxyw = float4_swiz_zxyw(_a);

														
 
															-		const float4_t b_zxyw = float4_swiz_zxyw(_b);

														
 
															-		const float4_t b_yzxw = float4_swiz_yzxw(_b);

														
 
															-		const float4_t tmp    = float4_mul(a_yzxw, b_zxyw);

														
 
															-		const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_normalize3_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t dot3    = float4_dot3(_a, _a);

														
 
															-		const float4_t invSqrt = float4_rsqrt(dot3);

														
 
															-		const float4_t result  = float4_mul(_a, invSqrt);

														
 
															-		

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_dot_ni(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const float4_t xyzw   = float4_mul(_a, _b);

														
 
															-		const float4_t yzwx   = float4_swiz_yzwx(xyzw);

														
 
															-		const float4_t tmp0   = float4_add(xyzw, yzwx);

														
 
															-		const float4_t zwxy   = float4_swiz_zwxy(tmp0);

														
 
															-		const float4_t result = float4_add(tmp0, zwxy);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ceil_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t tmp0   = float4_ftoi(_a);

														
 
															-		const float4_t tmp1   = float4_itof(tmp0);

														
 
															-		const float4_t mask   = float4_cmplt(tmp1, _a);

														
 
															-		const float4_t one    = float4_splat(1.0f);

														
 
															-		const float4_t tmp2   = float4_and(one, mask);

														
 
															-		const float4_t result = float4_add(tmp1, tmp2);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_floor_ni(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t tmp0   = float4_ftoi(_a);

														
 
															-		const float4_t tmp1   = float4_itof(tmp0);

														
 
															-		const float4_t mask   = float4_cmpgt(tmp1, _a);

														
 
															-		const float4_t one    = float4_splat(1.0f);

														
 
															-		const float4_t tmp2   = float4_and(one, mask);

														
 
															-		const float4_t result = float4_sub(tmp1, tmp2);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_FLOAT4_NI_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_FLOAT4_NI_H__
														
 
															+#define __BX_FLOAT4_NI_H__
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_xAzC_ni(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t xAyB   = float4_shuf_xAyB(_a, _b);
														
 
															+		const float4_t zCwD   = float4_shuf_zCwD(_a, _b);
														
 
															+		const float4_t result = float4_shuf_xyAB(xAyB, zCwD);
														
 
															+		
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_yBwD_ni(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t xAyB   = float4_shuf_xAyB(_a, _b);
														
 
															+		const float4_t zCwD   = float4_shuf_zCwD(_a, _b);
														
 
															+		const float4_t result = float4_shuf_zwCD(xAyB, zCwD);
														
 
															+		
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_madd_ni(float4_t _a, float4_t _b, float4_t _c)
														
 
															+	{
														
 
															+		const float4_t mul    = float4_mul(_a, _b);
														
 
															+		const float4_t result = float4_add(mul, _c);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_nmsub_ni(float4_t _a, float4_t _b, float4_t _c)
														
 
															+	{
														
 
															+		const float4_t mul    = float4_mul(_a, _b);
														
 
															+		const float4_t result = float4_sub(_c, mul);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_div_nr_ni(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t oneish  = float4_isplat(0x3f800001);
														
 
															+		const float4_t est     = float4_rcp_est(_b);
														
 
															+		const float4_t iter0   = float4_mul(_a, est);
														
 
															+		const float4_t tmp1    = float4_nmsub(_b, est, oneish);
														
 
															+		const float4_t result  = float4_madd(tmp1, iter0, iter0);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rcp_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t one    = float4_splat(1.0f);
														
 
															+		const float4_t result = float4_div(one, _a);
														
 
															+		
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_orx_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t zwxy   = float4_swiz_zwxy(_a);
														
 
															+		const float4_t tmp0   = float4_or(_a, zwxy);
														
 
															+		const float4_t tmp1   = float4_swiz_yyyy(_a);
														
 
															+		const float4_t tmp2   = float4_or(tmp0, tmp1);
														
 
															+		const float4_t mf000  = float4_ild(-1, 0, 0, 0);
														
 
															+		const float4_t result = float4_and(tmp2, mf000);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_orc_ni(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t aorb   = float4_or(_a, _b);
														
 
															+		const float4_t mffff  = float4_isplat(-1);
														
 
															+		const float4_t result = float4_xor(aorb, mffff);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_neg_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t zero   = float4_zero();
														
 
															+		const float4_t result = float4_sub(zero, _a);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_selb_ni(float4_t _mask, float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t sel_a  = float4_and(_a, _mask);
														
 
															+		const float4_t sel_b  = float4_andc(_b, _mask);
														
 
															+		const float4_t result = float4_or(sel_a, sel_b);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sels_ni(float4_t _test, float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t mask   = float4_sra(_test, 31);
														
 
															+		const float4_t result = float4_selb(mask, _a, _b);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_not_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t mffff  = float4_isplat(-1);
														
 
															+		const float4_t result = float4_xor(_a, mffff);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_abs_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t a_neg  = float4_neg(_a);
														
 
															+		const float4_t result = float4_max(a_neg, _a);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_clamp_ni(float4_t _a, float4_t _min, float4_t _max)
														
 
															+	{
														
 
															+		const float4_t tmp    = float4_min(_a, _max);
														
 
															+		const float4_t result = float4_max(tmp, _min);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_lerp_ni(float4_t _a, float4_t _b, float4_t _s)
														
 
															+	{
														
 
															+		const float4_t ba     = float4_sub(_b, _a);
														
 
															+		const float4_t result = float4_madd(_s, ba, _a);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sqrt_nr_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t half   = float4_splat(0.5f);
														
 
															+		const float4_t one    = float4_splat(1.0f);
														
 
															+		const float4_t zero   = float4_zero();
														
 
															+		const float4_t tmp0   = float4_rsqrt_est(_a);
														
 
															+		const float4_t tmp1   = float4_madd(tmp0, _a, zero);
														
 
															+		const float4_t tmp2   = float4_madd(tmp1, half, zero);
														
 
															+		const float4_t tmp3   = float4_nmsub(tmp0, tmp1, one);
														
 
															+		const float4_t result = float4_madd(tmp3, tmp2, tmp1);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rsqrt_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t one    = float4_splat(1.0f);
														
 
															+		const float4_t sqrt   = float4_sqrt(_a);
														
 
															+		const float4_t result = float4_div(one, sqrt);
														
 
															+		
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rsqrt_nr_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t rsqrt           = float4_rsqrt_est(_a);
														
 
															+		const float4_t iter0           = float4_mul(_a, rsqrt);
														
 
															+		const float4_t iter1           = float4_mul(iter0, rsqrt);
														
 
															+		const float4_t half            = float4_splat(0.5f);
														
 
															+		const float4_t half_rsqrt      = float4_mul(half, rsqrt);
														
 
															+		const float4_t three           = float4_splat(3.0f);
														
 
															+		const float4_t three_sub_iter1 = float4_sub(three, iter1);
														
 
															+		const float4_t result          = float4_mul(half_rsqrt, three_sub_iter1);
														
 
															+		
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rsqrt_carmack_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t half    = float4_splat(0.5f);
														
 
															+		const float4_t ah      = float4_mul(half, _a);
														
 
															+		const float4_t ashift  = float4_sra(_a, 1);
														
 
															+		const float4_t magic   = float4_isplat(0x5f3759df);
														
 
															+		const float4_t msuba   = float4_isub(magic, ashift);
														
 
															+		const float4_t msubasq = float4_mul(msuba, msuba);
														
 
															+		const float4_t tmp0    = float4_splat(1.5f);
														
 
															+		const float4_t tmp1    = float4_mul(ah, msubasq);
														
 
															+		const float4_t tmp2    = float4_sub(tmp0, tmp1);
														
 
															+		const float4_t result  = float4_mul(msuba, tmp2);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	namespace float4_logexp_detail
														
 
															+	{
														
 
															+		BX_FLOAT4_INLINE float4_t float4_poly0(float4_t _a, float _b)
														
 
															+		{
														
 
															+			return float4_splat(_b);
														
 
															+		}
														
 
															+
														
 
															+		BX_FLOAT4_INLINE float4_t float4_poly1(float4_t _a, float _b, float _c)
														
 
															+		{
														
 
															+			const float4_t bbbb   = float4_splat(_b);
														
 
															+			const float4_t poly0  = float4_poly0(_a, _c);
														
 
															+			const float4_t result = float4_madd(poly0, _a, bbbb);
														
 
															+
														
 
															+			return result;
														
 
															+		}
														
 
															+
														
 
															+		BX_FLOAT4_INLINE float4_t float4_poly2(float4_t _a, float _b, float _c, float _d)
														
 
															+		{
														
 
															+			const float4_t bbbb   = float4_splat(_b);
														
 
															+			const float4_t poly   = float4_poly1(_a, _c, _d);
														
 
															+			const float4_t result = float4_madd(poly, _a, bbbb);
														
 
															+
														
 
															+			return result;
														
 
															+		}
														
 
															+
														
 
															+		BX_FLOAT4_INLINE float4_t float4_poly3(float4_t _a, float _b, float _c, float _d, float _e)
														
 
															+		{
														
 
															+			const float4_t bbbb   = float4_splat(_b);
														
 
															+			const float4_t poly   = float4_poly2(_a, _c, _d, _e);
														
 
															+			const float4_t result = float4_madd(poly, _a, bbbb);
														
 
															+
														
 
															+			return result;
														
 
															+		}
														
 
															+
														
 
															+		BX_FLOAT4_INLINE float4_t float4_poly4(float4_t _a, float _b, float _c, float _d, float _e, float _f)
														
 
															+		{
														
 
															+			const float4_t bbbb   = float4_splat(_b);
														
 
															+			const float4_t poly   = float4_poly3(_a, _c, _d, _e, _f);
														
 
															+			const float4_t result = float4_madd(poly, _a, bbbb);
														
 
															+
														
 
															+			return result;
														
 
															+		}
														
 
															+
														
 
															+		BX_FLOAT4_INLINE float4_t float4_poly5(float4_t _a, float _b, float _c, float _d, float _e, float _f, float _g)
														
 
															+		{
														
 
															+			const float4_t bbbb   = float4_splat(_b);
														
 
															+			const float4_t poly   = float4_poly4(_a, _c, _d, _e, _f, _g);
														
 
															+			const float4_t result = float4_madd(poly, _a, bbbb);
														
 
															+
														
 
															+			return result;
														
 
															+		}
														
 
															+
														
 
															+		BX_FLOAT4_INLINE float4_t float4_logpoly(float4_t _a)
														
 
															+		{
														
 
															+#if 1
														
 
															+			const float4_t result = float4_poly5(_a
														
 
															+				, 3.11578814719469302614f, -3.32419399085241980044f
														
 
															+				, 2.59883907202499966007f, -1.23152682416275988241f
														
 
															+				, 0.318212422185251071475f, -0.0344359067839062357313f
														
 
															+				);
														
 
															+#elif 0
														
 
															+			const float4_t result = float4_poly4(_a
														
 
															+				, 2.8882704548164776201f, -2.52074962577807006663f
														
 
															+				, 1.48116647521213171641f, -0.465725644288844778798f
														
 
															+				, 0.0596515482674574969533f
														
 
															+				);
														
 
															+#elif 0
														
 
															+			const float4_t result = float4_poly3(_a
														
 
															+				, 2.61761038894603480148f, -1.75647175389045657003f
														
 
															+				, 0.688243882994381274313f, -0.107254423828329604454f
														
 
															+				);
														
 
															+#else
														
 
															+			const float4_t result = float4_poly2(_a
														
 
															+				, 2.28330284476918490682f, -1.04913055217340124191f
														
 
															+				, 0.204446009836232697516f
														
 
															+				);
														
 
															+#endif
														
 
															+
														
 
															+			return result;
														
 
															+		}
														
 
															+
														
 
															+		BX_FLOAT4_INLINE float4_t float4_exppoly(float4_t _a)
														
 
															+		{
														
 
															+#if 1
														
 
															+			const float4_t result = float4_poly5(_a
														
 
															+				, 9.9999994e-1f, 6.9315308e-1f
														
 
															+				, 2.4015361e-1f, 5.5826318e-2f
														
 
															+				, 8.9893397e-3f, 1.8775767e-3f
														
 
															+				);
														
 
															+#elif 0
														
 
															+			const float4_t result = float4_poly4(_a
														
 
															+				, 1.0000026f, 6.9300383e-1f
														
 
															+				, 2.4144275e-1f, 5.2011464e-2f
														
 
															+				, 1.3534167e-2f
														
 
															+				);
														
 
															+#elif 0
														
 
															+			const float4_t result = float4_poly3(_a
														
 
															+				, 9.9992520e-1f, 6.9583356e-1f
														
 
															+				, 2.2606716e-1f, 7.8024521e-2f
														
 
															+				);
														
 
															+#else
														
 
															+			const float4_t result = float4_poly2(_a
														
 
															+				, 1.0017247f, 6.5763628e-1f
														
 
															+				, 3.3718944e-1f
														
 
															+				);
														
 
															+#endif // 0
														
 
															+
														
 
															+			return result;
														
 
															+		}
														
 
															+	} // namespace float4_internal
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_log2_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t expmask  = float4_isplat(0x7f800000);
														
 
															+		const float4_t mantmask = float4_isplat(0x007fffff);
														
 
															+		const float4_t one      = float4_splat(1.0f);
														
 
															+
														
 
															+		const float4_t c127     = float4_isplat(127);
														
 
															+		const float4_t aexp     = float4_and(_a, expmask);
														
 
															+		const float4_t aexpsr   = float4_srl(aexp, 23);
														
 
															+		const float4_t tmp0     = float4_isub(aexpsr, c127);
														
 
															+		const float4_t exp      = float4_itof(tmp0);
														
 
															+
														
 
															+		const float4_t amask    = float4_and(_a, mantmask);
														
 
															+		const float4_t mant     = float4_or(amask, one);
														
 
															+
														
 
															+		const float4_t poly     = float4_logexp_detail::float4_logpoly(mant);
														
 
															+
														
 
															+		const float4_t mandiff  = float4_sub(mant, one);
														
 
															+		const float4_t result   = float4_madd(poly, mandiff, exp);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_exp2_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t min      = float4_splat( 129.0f);
														
 
															+		const float4_t max      = float4_splat(-126.99999f);
														
 
															+		const float4_t tmp0     = float4_min(_a, min);
														
 
															+		const float4_t aaaa     = float4_max(tmp0, max);
														
 
															+
														
 
															+		const float4_t half     = float4_splat(0.5f);
														
 
															+		const float4_t tmp2     = float4_sub(aaaa, half);
														
 
															+		const float4_t ipart    = float4_ftoi(tmp2);
														
 
															+		const float4_t iround   = float4_itof(ipart);
														
 
															+		const float4_t fpart    = float4_sub(aaaa, iround);
														
 
															+
														
 
															+		const float4_t c127     = float4_isplat(127);
														
 
															+		const float4_t tmp5     = float4_iadd(ipart, c127);
														
 
															+		const float4_t expipart = float4_sll(tmp5, 23);
														
 
															+
														
 
															+		const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart);
														
 
															+
														
 
															+		const float4_t result   = float4_mul(expipart, expfpart);
														
 
															+		
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_pow_ni(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t alog2  = float4_log2(_a);
														
 
															+		const float4_t alog2b = float4_mul(alog2, _b);
														
 
															+		const float4_t result = float4_exp2(alog2b);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_dot3_ni(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t xyzw   = float4_mul(_a, _b);
														
 
															+		const float4_t xxxx   = float4_swiz_xxxx(xyzw);
														
 
															+		const float4_t yyyy   = float4_swiz_yyyy(xyzw);
														
 
															+		const float4_t zzzz   = float4_swiz_zzzz(xyzw);
														
 
															+		const float4_t tmp1   = float4_add(xxxx, yyyy);
														
 
															+		const float4_t result = float4_add(zzzz, tmp1);
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t a_yzxw = float4_swiz_yzxw(_a);
														
 
															+		const float4_t a_zxyw = float4_swiz_zxyw(_a);
														
 
															+		const float4_t b_zxyw = float4_swiz_zxyw(_b);
														
 
															+		const float4_t b_yzxw = float4_swiz_yzxw(_b);
														
 
															+		const float4_t tmp    = float4_mul(a_yzxw, b_zxyw);
														
 
															+		const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_normalize3_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t dot3    = float4_dot3(_a, _a);
														
 
															+		const float4_t invSqrt = float4_rsqrt(dot3);
														
 
															+		const float4_t result  = float4_mul(_a, invSqrt);
														
 
															+		
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_dot_ni(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const float4_t xyzw   = float4_mul(_a, _b);
														
 
															+		const float4_t yzwx   = float4_swiz_yzwx(xyzw);
														
 
															+		const float4_t tmp0   = float4_add(xyzw, yzwx);
														
 
															+		const float4_t zwxy   = float4_swiz_zwxy(tmp0);
														
 
															+		const float4_t result = float4_add(tmp0, zwxy);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ceil_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t tmp0   = float4_ftoi(_a);
														
 
															+		const float4_t tmp1   = float4_itof(tmp0);
														
 
															+		const float4_t mask   = float4_cmplt(tmp1, _a);
														
 
															+		const float4_t one    = float4_splat(1.0f);
														
 
															+		const float4_t tmp2   = float4_and(one, mask);
														
 
															+		const float4_t result = float4_add(tmp1, tmp2);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_floor_ni(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t tmp0   = float4_ftoi(_a);
														
 
															+		const float4_t tmp1   = float4_itof(tmp0);
														
 
															+		const float4_t mask   = float4_cmpgt(tmp1, _a);
														
 
															+		const float4_t one    = float4_splat(1.0f);
														
 
															+		const float4_t tmp2   = float4_and(one, mask);
														
 
															+		const float4_t result = float4_sub(tmp1, tmp2);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_FLOAT4_NI_H__
														
--- a/include/bx/float4_ref.h
+++ b/include/bx/float4_ref.h
@@ -1,529 +1,529 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_FLOAT4_REF_H__

														
 
															-#define __BX_FLOAT4_REF_H__

														
 
															-

														
 
															-#include <math.h> // sqrtf

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	typedef union float4_t

														
 
															-	{

														
 
															-		int32_t  ixyzw[4];

														
 
															-		uint32_t uxyzw[4];

														
 
															-		float    fxyzw[4];

														
 
															-

														
 
															-	} float4_t;

														
 
															-

														
 
															-#define ELEMx 0

														
 
															-#define ELEMy 1

														
 
															-#define ELEMz 2

														
 
															-#define ELEMw 3

														
 
															-#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \

														
 
															-			BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \

														
 
															-			{ \

														
 
															-				float4_t result; \

														
 
															-				result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \

														
 
															-				result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \

														
 
															-				result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \

														
 
															-				result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \

														
 
															-				return result; \

														
 
															-			}

														
 
															-

														
 
															-#include "float4_swizzle.inl"

														
 
															-

														
 
															-#undef IMPLEMENT_SWIZZLE

														
 
															-#undef ELEMw

														
 
															-#undef ELEMz

														
 
															-#undef ELEMy

														
 
															-#undef ELEMx

														
 
															-

														
 
															-#define IMPLEMENT_TEST(_xyzw, _mask) \

														
 
															-			BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \

														
 
															-			{ \

														
 
															-				uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \

														
 
															-				             | ( (_test.uxyzw[2]>>31)<<2) \

														
 
															-				             | ( (_test.uxyzw[1]>>31)<<1) \

														
 
															-				             | (_test.uxyzw[0]>>31) \

														
 
															-				             ; \

														
 
															-				return 0 != (tmp&(_mask) ); \

														
 
															-			} \

														
 
															-			\

														
 
															-			BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \

														
 
															-			{ \

														
 
															-				uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \

														
 
															-				             | ( (_test.uxyzw[2]>>31)<<2) \

														
 
															-				             | ( (_test.uxyzw[1]>>31)<<1) \

														
 
															-				             | (_test.uxyzw[0]>>31) \

														
 
															-				             ; \

														
 
															-				return (_mask) == (tmp&(_mask) ); \

														
 
															-			}

														
 
															-

														
 
															-IMPLEMENT_TEST(x    , 0x1);

														
 
															-IMPLEMENT_TEST(y    , 0x2);

														
 
															-IMPLEMENT_TEST(xy   , 0x3);

														
 
															-IMPLEMENT_TEST(z    , 0x4);

														
 
															-IMPLEMENT_TEST(xz   , 0x5);

														
 
															-IMPLEMENT_TEST(yz   , 0x6);

														
 
															-IMPLEMENT_TEST(xyz  , 0x7);

														
 
															-IMPLEMENT_TEST(w    , 0x8);

														
 
															-IMPLEMENT_TEST(xw   , 0x9);

														
 
															-IMPLEMENT_TEST(yw   , 0xa);

														
 
															-IMPLEMENT_TEST(xyw  , 0xb);

														
 
															-IMPLEMENT_TEST(zw   , 0xc);

														
 
															-IMPLEMENT_TEST(xzw  , 0xd);

														
 
															-IMPLEMENT_TEST(yzw  , 0xe);

														
 
															-IMPLEMENT_TEST(xyzw , 0xf);

														
 
															-

														
 
															-#undef IMPLEMENT_TEST

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[0];

														
 
															-		result.uxyzw[1] = _a.uxyzw[1];

														
 
															-		result.uxyzw[2] = _b.uxyzw[0];

														
 
															-		result.uxyzw[3] = _b.uxyzw[1];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _b.uxyzw[0];

														
 
															-		result.uxyzw[1] = _b.uxyzw[1];

														
 
															-		result.uxyzw[2] = _a.uxyzw[0];

														
 
															-		result.uxyzw[3] = _a.uxyzw[1];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _b.uxyzw[2];

														
 
															-		result.uxyzw[1] = _b.uxyzw[3];

														
 
															-		result.uxyzw[2] = _a.uxyzw[2];

														
 
															-		result.uxyzw[3] = _a.uxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[2];

														
 
															-		result.uxyzw[1] = _a.uxyzw[3];

														
 
															-		result.uxyzw[2] = _b.uxyzw[2];

														
 
															-		result.uxyzw[3] = _b.uxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[0];

														
 
															-		result.uxyzw[1] = _b.uxyzw[0];

														
 
															-		result.uxyzw[2] = _a.uxyzw[1];

														
 
															-		result.uxyzw[3] = _b.uxyzw[1];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[1];

														
 
															-		result.uxyzw[1] = _b.uxyzw[1];

														
 
															-		result.uxyzw[2] = _a.uxyzw[0];

														
 
															-		result.uxyzw[3] = _b.uxyzw[0];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[2];

														
 
															-		result.uxyzw[1] = _b.uxyzw[2];

														
 
															-		result.uxyzw[2] = _a.uxyzw[3];

														
 
															-		result.uxyzw[3] = _b.uxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _b.uxyzw[2];

														
 
															-		result.uxyzw[1] = _a.uxyzw[2];

														
 
															-		result.uxyzw[2] = _b.uxyzw[3];

														
 
															-		result.uxyzw[3] = _a.uxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_x(float4_t _a)

														
 
															-	{

														
 
															-		return _a.fxyzw[0];

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_y(float4_t _a)

														
 
															-	{

														
 
															-		return _a.fxyzw[1];

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_z(float4_t _a)

														
 
															-	{

														
 
															-		return _a.fxyzw[2];

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_w(float4_t _a)

														
 
															-	{

														
 
															-		return _a.fxyzw[3];

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)

														
 
															-	{

														
 
															-		return *reinterpret_cast<const float4_t*>(_ptr);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)

														
 
															-	{

														
 
															-		*reinterpret_cast<float4_t*>(_ptr) = _a;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)

														
 
															-	{

														
 
															-		*reinterpret_cast<float4_t*>(_ptr) = _a;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = _x;

														
 
															-		result.fxyzw[1] = _y;

														
 
															-		result.fxyzw[2] = _z;

														
 
															-		result.fxyzw[3] = _w;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _x;

														
 
															-		result.uxyzw[1] = _y;

														
 
															-		result.uxyzw[2] = _z;

														
 
															-		result.uxyzw[3] = _w;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)

														
 
															-	{

														
 
															-		float val = *reinterpret_cast<const float*>(_ptr);

														
 
															-		return float4_ld(val, val, val, val);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_splat(float _a)

														
 
															-	{

														
 
															-		return float4_ld(_a, _a, _a, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)

														
 
															-	{

														
 
															-		return float4_ild(_a, _a, _a, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_zero()

														
 
															-	{

														
 
															-		return float4_ild(0, 0, 0, 0);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = (float)result.ixyzw[0];

														
 
															-		result.fxyzw[1] = (float)result.ixyzw[1];

														
 
															-		result.fxyzw[2] = (float)result.ixyzw[2];

														
 
															-		result.fxyzw[3] = (float)result.ixyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = (int)result.fxyzw[0];

														
 
															-		result.ixyzw[1] = (int)result.fxyzw[1];

														
 
															-		result.ixyzw[2] = (int)result.fxyzw[2];

														
 
															-		result.ixyzw[3] = (int)result.fxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t tmp    = float4_ftoi(_a);

														
 
															-		const float4_t result = float4_itof(tmp);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = _a.fxyzw[0] + _b.fxyzw[0];

														
 
															-		result.fxyzw[1] = _a.fxyzw[1] + _b.fxyzw[1];

														
 
															-		result.fxyzw[2] = _a.fxyzw[2] + _b.fxyzw[2];

														
 
															-		result.fxyzw[3] = _a.fxyzw[3] + _b.fxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = _a.fxyzw[0] - _b.fxyzw[0];

														
 
															-		result.fxyzw[1] = _a.fxyzw[1] - _b.fxyzw[1];

														
 
															-		result.fxyzw[2] = _a.fxyzw[2] - _b.fxyzw[2];

														
 
															-		result.fxyzw[3] = _a.fxyzw[3] - _b.fxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];

														
 
															-		result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];

														
 
															-		result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];

														
 
															-		result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];

														
 
															-		result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];

														
 
															-		result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];

														
 
															-		result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = 1.0f / _a.fxyzw[0];

														
 
															-		result.fxyzw[1] = 1.0f / _a.fxyzw[1];

														
 
															-		result.fxyzw[2] = 1.0f / _a.fxyzw[2];

														
 
															-		result.fxyzw[3] = 1.0f / _a.fxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = sqrtf(_a.fxyzw[0]);

														
 
															-		result.fxyzw[1] = sqrtf(_a.fxyzw[1]);

														
 
															-		result.fxyzw[2] = sqrtf(_a.fxyzw[2]);

														
 
															-		result.fxyzw[3] = sqrtf(_a.fxyzw[3]);

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = 1.0f / sqrtf(_a.fxyzw[0]);

														
 
															-		result.fxyzw[1] = 1.0f / sqrtf(_a.fxyzw[1]);

														
 
															-		result.fxyzw[2] = 1.0f / sqrtf(_a.fxyzw[2]);

														
 
															-		result.fxyzw[3] = 1.0f / sqrtf(_a.fxyzw[3]);

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = _a.fxyzw[0] == _b.fxyzw[0] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[1] = _a.fxyzw[1] == _b.fxyzw[1] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[2] = _a.fxyzw[2] == _b.fxyzw[2] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[3] = _a.fxyzw[3] == _b.fxyzw[3] ? 0xffffffff : 0x0;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? 0xffffffff : 0x0;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = _a.fxyzw[0] <= _b.fxyzw[0] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[1] = _a.fxyzw[1] <= _b.fxyzw[1] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[2] = _a.fxyzw[2] <= _b.fxyzw[2] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[3] = _a.fxyzw[3] <= _b.fxyzw[3] ? 0xffffffff : 0x0;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? 0xffffffff : 0x0;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = _a.fxyzw[0] >= _b.fxyzw[0] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[1] = _a.fxyzw[1] >= _b.fxyzw[1] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[2] = _a.fxyzw[2] >= _b.fxyzw[2] ? 0xffffffff : 0x0;

														
 
															-		result.ixyzw[3] = _a.fxyzw[3] >= _b.fxyzw[3] ? 0xffffffff : 0x0;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];

														
 
															-		result.fxyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];

														
 
															-		result.fxyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];

														
 
															-		result.fxyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];

														
 
															-		result.fxyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];

														
 
															-		result.fxyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];

														
 
															-		result.fxyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[0] & _b.uxyzw[0];

														
 
															-		result.uxyzw[1] = _a.uxyzw[1] & _b.uxyzw[1];

														
 
															-		result.uxyzw[2] = _a.uxyzw[2] & _b.uxyzw[2];

														
 
															-		result.uxyzw[3] = _a.uxyzw[3] & _b.uxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[0] & ~_b.uxyzw[0];

														
 
															-		result.uxyzw[1] = _a.uxyzw[1] & ~_b.uxyzw[1];

														
 
															-		result.uxyzw[2] = _a.uxyzw[2] & ~_b.uxyzw[2];

														
 
															-		result.uxyzw[3] = _a.uxyzw[3] & ~_b.uxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[0] | _b.uxyzw[0];

														
 
															-		result.uxyzw[1] = _a.uxyzw[1] | _b.uxyzw[1];

														
 
															-		result.uxyzw[2] = _a.uxyzw[2] | _b.uxyzw[2];

														
 
															-		result.uxyzw[3] = _a.uxyzw[3] | _b.uxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[0] ^ _b.uxyzw[0];

														
 
															-		result.uxyzw[1] = _a.uxyzw[1] ^ _b.uxyzw[1];

														
 
															-		result.uxyzw[2] = _a.uxyzw[2] ^ _b.uxyzw[2];

														
 
															-		result.uxyzw[3] = _a.uxyzw[3] ^ _b.uxyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[0] << _count;

														
 
															-		result.uxyzw[1] = _a.uxyzw[1] << _count;

														
 
															-		result.uxyzw[2] = _a.uxyzw[2] << _count;

														
 
															-		result.uxyzw[3] = _a.uxyzw[3] << _count;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.uxyzw[0] = _a.uxyzw[0] >> _count;

														
 
															-		result.uxyzw[1] = _a.uxyzw[1] >> _count;

														
 
															-		result.uxyzw[2] = _a.uxyzw[2] >> _count;

														
 
															-		result.uxyzw[3] = _a.uxyzw[3] >> _count;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = _a.ixyzw[0] >> _count;

														
 
															-		result.ixyzw[1] = _a.ixyzw[1] >> _count;

														
 
															-		result.ixyzw[2] = _a.ixyzw[2] >> _count;

														
 
															-		result.ixyzw[3] = _a.ixyzw[3] >> _count;

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = _a.ixyzw[0] + _b.ixyzw[0];

														
 
															-		result.ixyzw[1] = _a.ixyzw[1] + _b.ixyzw[1];

														
 
															-		result.ixyzw[2] = _a.ixyzw[2] + _b.ixyzw[2];

														
 
															-		result.ixyzw[3] = _a.ixyzw[3] + _b.ixyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		float4_t result;

														
 
															-		result.ixyzw[0] = _a.ixyzw[0] - _b.ixyzw[0];

														
 
															-		result.ixyzw[1] = _a.ixyzw[1] - _b.ixyzw[1];

														
 
															-		result.ixyzw[2] = _a.ixyzw[2] - _b.ixyzw[2];

														
 
															-		result.ixyzw[3] = _a.ixyzw[3] - _b.ixyzw[3];

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#define float4_shuf_xAzC float4_shuf_xAzC_ni

														
 
															-#define float4_shuf_yBwD float4_shuf_yBwD_ni

														
 
															-#define float4_rcp float4_rcp_ni

														
 
															-#define float4_orx float4_orx_ni

														
 
															-#define float4_orc float4_orc_ni

														
 
															-#define float4_neg float4_neg_ni

														
 
															-#define float4_madd float4_madd_ni

														
 
															-#define float4_nmsub float4_nmsub_ni

														
 
															-#define float4_div_nr float4_div_nr_ni

														
 
															-#define float4_selb float4_selb_ni

														
 
															-#define float4_sels float4_sels_ni

														
 
															-#define float4_not float4_not_ni

														
 
															-#define float4_abs float4_abs_ni

														
 
															-#define float4_clamp float4_clamp_ni

														
 
															-#define float4_lerp float4_lerp_ni

														
 
															-#define float4_rsqrt float4_rsqrt_ni

														
 
															-#define float4_rsqrt_nr float4_rsqrt_nr_ni

														
 
															-#define float4_rsqrt_carmack float4_rsqrt_carmack_ni

														
 
															-#define float4_sqrt_nr float4_sqrt_nr_ni

														
 
															-#define float4_log2 float4_log2_ni

														
 
															-#define float4_exp2 float4_exp2_ni

														
 
															-#define float4_pow float4_pow_ni

														
 
															-#define float4_cross3 float4_cross3_ni

														
 
															-#define float4_normalize3 float4_normalize3_ni

														
 
															-#define float4_dot3 float4_dot3_ni

														
 
															-#define float4_dot float4_dot_ni

														
 
															-#define float4_ceil float4_ceil_ni

														
 
															-#define float4_floor float4_floor_ni

														
 
															-#include "float4_ni.h"

														
 
															-

														
 
															-#endif // __BX_FLOAT4_REF_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_FLOAT4_REF_H__
														
 
															+#define __BX_FLOAT4_REF_H__
														
 
															+
														
 
															+#include <math.h> // sqrtf
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	typedef union float4_t
														
 
															+	{
														
 
															+		int32_t  ixyzw[4];
														
 
															+		uint32_t uxyzw[4];
														
 
															+		float    fxyzw[4];
														
 
															+
														
 
															+	} float4_t;
														
 
															+
														
 
															+#define ELEMx 0
														
 
															+#define ELEMy 1
														
 
															+#define ELEMz 2
														
 
															+#define ELEMw 3
														
 
															+#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
														
 
															+			BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
														
 
															+			{ \
														
 
															+				float4_t result; \
														
 
															+				result.ixyzw[0] = _a.ixyzw[ELEM##_x]; \
														
 
															+				result.ixyzw[1] = _a.ixyzw[ELEM##_y]; \
														
 
															+				result.ixyzw[2] = _a.ixyzw[ELEM##_z]; \
														
 
															+				result.ixyzw[3] = _a.ixyzw[ELEM##_w]; \
														
 
															+				return result; \
														
 
															+			}
														
 
															+
														
 
															+#include "float4_swizzle.inl"
														
 
															+
														
 
															+#undef IMPLEMENT_SWIZZLE
														
 
															+#undef ELEMw
														
 
															+#undef ELEMz
														
 
															+#undef ELEMy
														
 
															+#undef ELEMx
														
 
															+
														
 
															+#define IMPLEMENT_TEST(_xyzw, _mask) \
														
 
															+			BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
														
 
															+			{ \
														
 
															+				uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \
														
 
															+				             | ( (_test.uxyzw[2]>>31)<<2) \
														
 
															+				             | ( (_test.uxyzw[1]>>31)<<1) \
														
 
															+				             | (_test.uxyzw[0]>>31) \
														
 
															+				             ; \
														
 
															+				return 0 != (tmp&(_mask) ); \
														
 
															+			} \
														
 
															+			\
														
 
															+			BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
														
 
															+			{ \
														
 
															+				uint32_t tmp = ( (_test.uxyzw[3]>>31)<<3) \
														
 
															+				             | ( (_test.uxyzw[2]>>31)<<2) \
														
 
															+				             | ( (_test.uxyzw[1]>>31)<<1) \
														
 
															+				             | (_test.uxyzw[0]>>31) \
														
 
															+				             ; \
														
 
															+				return (_mask) == (tmp&(_mask) ); \
														
 
															+			}
														
 
															+
														
 
															+IMPLEMENT_TEST(x    , 0x1);
														
 
															+IMPLEMENT_TEST(y    , 0x2);
														
 
															+IMPLEMENT_TEST(xy   , 0x3);
														
 
															+IMPLEMENT_TEST(z    , 0x4);
														
 
															+IMPLEMENT_TEST(xz   , 0x5);
														
 
															+IMPLEMENT_TEST(yz   , 0x6);
														
 
															+IMPLEMENT_TEST(xyz  , 0x7);
														
 
															+IMPLEMENT_TEST(w    , 0x8);
														
 
															+IMPLEMENT_TEST(xw   , 0x9);
														
 
															+IMPLEMENT_TEST(yw   , 0xa);
														
 
															+IMPLEMENT_TEST(xyw  , 0xb);
														
 
															+IMPLEMENT_TEST(zw   , 0xc);
														
 
															+IMPLEMENT_TEST(xzw  , 0xd);
														
 
															+IMPLEMENT_TEST(yzw  , 0xe);
														
 
															+IMPLEMENT_TEST(xyzw , 0xf);
														
 
															+
														
 
															+#undef IMPLEMENT_TEST
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[0];
														
 
															+		result.uxyzw[1] = _a.uxyzw[1];
														
 
															+		result.uxyzw[2] = _b.uxyzw[0];
														
 
															+		result.uxyzw[3] = _b.uxyzw[1];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _b.uxyzw[0];
														
 
															+		result.uxyzw[1] = _b.uxyzw[1];
														
 
															+		result.uxyzw[2] = _a.uxyzw[0];
														
 
															+		result.uxyzw[3] = _a.uxyzw[1];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _b.uxyzw[2];
														
 
															+		result.uxyzw[1] = _b.uxyzw[3];
														
 
															+		result.uxyzw[2] = _a.uxyzw[2];
														
 
															+		result.uxyzw[3] = _a.uxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[2];
														
 
															+		result.uxyzw[1] = _a.uxyzw[3];
														
 
															+		result.uxyzw[2] = _b.uxyzw[2];
														
 
															+		result.uxyzw[3] = _b.uxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[0];
														
 
															+		result.uxyzw[1] = _b.uxyzw[0];
														
 
															+		result.uxyzw[2] = _a.uxyzw[1];
														
 
															+		result.uxyzw[3] = _b.uxyzw[1];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[1];
														
 
															+		result.uxyzw[1] = _b.uxyzw[1];
														
 
															+		result.uxyzw[2] = _a.uxyzw[0];
														
 
															+		result.uxyzw[3] = _b.uxyzw[0];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[2];
														
 
															+		result.uxyzw[1] = _b.uxyzw[2];
														
 
															+		result.uxyzw[2] = _a.uxyzw[3];
														
 
															+		result.uxyzw[3] = _b.uxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _b.uxyzw[2];
														
 
															+		result.uxyzw[1] = _a.uxyzw[2];
														
 
															+		result.uxyzw[2] = _b.uxyzw[3];
														
 
															+		result.uxyzw[3] = _a.uxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_x(float4_t _a)
														
 
															+	{
														
 
															+		return _a.fxyzw[0];
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_y(float4_t _a)
														
 
															+	{
														
 
															+		return _a.fxyzw[1];
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_z(float4_t _a)
														
 
															+	{
														
 
															+		return _a.fxyzw[2];
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_w(float4_t _a)
														
 
															+	{
														
 
															+		return _a.fxyzw[3];
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
														
 
															+	{
														
 
															+		return *reinterpret_cast<const float4_t*>(_ptr);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
														
 
															+	{
														
 
															+		*reinterpret_cast<float4_t*>(_ptr) = _a;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
														
 
															+	{
														
 
															+		*reinterpret_cast<float4_t*>(_ptr) = _a;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = _x;
														
 
															+		result.fxyzw[1] = _y;
														
 
															+		result.fxyzw[2] = _z;
														
 
															+		result.fxyzw[3] = _w;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _x;
														
 
															+		result.uxyzw[1] = _y;
														
 
															+		result.uxyzw[2] = _z;
														
 
															+		result.uxyzw[3] = _w;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
														
 
															+	{
														
 
															+		float val = *reinterpret_cast<const float*>(_ptr);
														
 
															+		return float4_ld(val, val, val, val);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_splat(float _a)
														
 
															+	{
														
 
															+		return float4_ld(_a, _a, _a, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
														
 
															+	{
														
 
															+		return float4_ild(_a, _a, _a, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_zero()
														
 
															+	{
														
 
															+		return float4_ild(0, 0, 0, 0);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = (float)result.ixyzw[0];
														
 
															+		result.fxyzw[1] = (float)result.ixyzw[1];
														
 
															+		result.fxyzw[2] = (float)result.ixyzw[2];
														
 
															+		result.fxyzw[3] = (float)result.ixyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = (int)result.fxyzw[0];
														
 
															+		result.ixyzw[1] = (int)result.fxyzw[1];
														
 
															+		result.ixyzw[2] = (int)result.fxyzw[2];
														
 
															+		result.ixyzw[3] = (int)result.fxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t tmp    = float4_ftoi(_a);
														
 
															+		const float4_t result = float4_itof(tmp);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = _a.fxyzw[0] + _b.fxyzw[0];
														
 
															+		result.fxyzw[1] = _a.fxyzw[1] + _b.fxyzw[1];
														
 
															+		result.fxyzw[2] = _a.fxyzw[2] + _b.fxyzw[2];
														
 
															+		result.fxyzw[3] = _a.fxyzw[3] + _b.fxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = _a.fxyzw[0] - _b.fxyzw[0];
														
 
															+		result.fxyzw[1] = _a.fxyzw[1] - _b.fxyzw[1];
														
 
															+		result.fxyzw[2] = _a.fxyzw[2] - _b.fxyzw[2];
														
 
															+		result.fxyzw[3] = _a.fxyzw[3] - _b.fxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];
														
 
															+		result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];
														
 
															+		result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];
														
 
															+		result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = _a.fxyzw[0] * _b.fxyzw[0];
														
 
															+		result.fxyzw[1] = _a.fxyzw[1] * _b.fxyzw[1];
														
 
															+		result.fxyzw[2] = _a.fxyzw[2] * _b.fxyzw[2];
														
 
															+		result.fxyzw[3] = _a.fxyzw[3] * _b.fxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = 1.0f / _a.fxyzw[0];
														
 
															+		result.fxyzw[1] = 1.0f / _a.fxyzw[1];
														
 
															+		result.fxyzw[2] = 1.0f / _a.fxyzw[2];
														
 
															+		result.fxyzw[3] = 1.0f / _a.fxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = sqrtf(_a.fxyzw[0]);
														
 
															+		result.fxyzw[1] = sqrtf(_a.fxyzw[1]);
														
 
															+		result.fxyzw[2] = sqrtf(_a.fxyzw[2]);
														
 
															+		result.fxyzw[3] = sqrtf(_a.fxyzw[3]);
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = 1.0f / sqrtf(_a.fxyzw[0]);
														
 
															+		result.fxyzw[1] = 1.0f / sqrtf(_a.fxyzw[1]);
														
 
															+		result.fxyzw[2] = 1.0f / sqrtf(_a.fxyzw[2]);
														
 
															+		result.fxyzw[3] = 1.0f / sqrtf(_a.fxyzw[3]);
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = _a.fxyzw[0] == _b.fxyzw[0] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[1] = _a.fxyzw[1] == _b.fxyzw[1] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[2] = _a.fxyzw[2] == _b.fxyzw[2] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[3] = _a.fxyzw[3] == _b.fxyzw[3] ? 0xffffffff : 0x0;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? 0xffffffff : 0x0;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = _a.fxyzw[0] <= _b.fxyzw[0] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[1] = _a.fxyzw[1] <= _b.fxyzw[1] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[2] = _a.fxyzw[2] <= _b.fxyzw[2] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[3] = _a.fxyzw[3] <= _b.fxyzw[3] ? 0xffffffff : 0x0;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? 0xffffffff : 0x0;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = _a.fxyzw[0] >= _b.fxyzw[0] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[1] = _a.fxyzw[1] >= _b.fxyzw[1] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[2] = _a.fxyzw[2] >= _b.fxyzw[2] ? 0xffffffff : 0x0;
														
 
															+		result.ixyzw[3] = _a.fxyzw[3] >= _b.fxyzw[3] ? 0xffffffff : 0x0;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = _a.fxyzw[0] < _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];
														
 
															+		result.fxyzw[1] = _a.fxyzw[1] < _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];
														
 
															+		result.fxyzw[2] = _a.fxyzw[2] < _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];
														
 
															+		result.fxyzw[3] = _a.fxyzw[3] < _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.fxyzw[0] = _a.fxyzw[0] > _b.fxyzw[0] ? _a.fxyzw[0] : _b.fxyzw[0];
														
 
															+		result.fxyzw[1] = _a.fxyzw[1] > _b.fxyzw[1] ? _a.fxyzw[1] : _b.fxyzw[1];
														
 
															+		result.fxyzw[2] = _a.fxyzw[2] > _b.fxyzw[2] ? _a.fxyzw[2] : _b.fxyzw[2];
														
 
															+		result.fxyzw[3] = _a.fxyzw[3] > _b.fxyzw[3] ? _a.fxyzw[3] : _b.fxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[0] & _b.uxyzw[0];
														
 
															+		result.uxyzw[1] = _a.uxyzw[1] & _b.uxyzw[1];
														
 
															+		result.uxyzw[2] = _a.uxyzw[2] & _b.uxyzw[2];
														
 
															+		result.uxyzw[3] = _a.uxyzw[3] & _b.uxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[0] & ~_b.uxyzw[0];
														
 
															+		result.uxyzw[1] = _a.uxyzw[1] & ~_b.uxyzw[1];
														
 
															+		result.uxyzw[2] = _a.uxyzw[2] & ~_b.uxyzw[2];
														
 
															+		result.uxyzw[3] = _a.uxyzw[3] & ~_b.uxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[0] | _b.uxyzw[0];
														
 
															+		result.uxyzw[1] = _a.uxyzw[1] | _b.uxyzw[1];
														
 
															+		result.uxyzw[2] = _a.uxyzw[2] | _b.uxyzw[2];
														
 
															+		result.uxyzw[3] = _a.uxyzw[3] | _b.uxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[0] ^ _b.uxyzw[0];
														
 
															+		result.uxyzw[1] = _a.uxyzw[1] ^ _b.uxyzw[1];
														
 
															+		result.uxyzw[2] = _a.uxyzw[2] ^ _b.uxyzw[2];
														
 
															+		result.uxyzw[3] = _a.uxyzw[3] ^ _b.uxyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[0] << _count;
														
 
															+		result.uxyzw[1] = _a.uxyzw[1] << _count;
														
 
															+		result.uxyzw[2] = _a.uxyzw[2] << _count;
														
 
															+		result.uxyzw[3] = _a.uxyzw[3] << _count;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.uxyzw[0] = _a.uxyzw[0] >> _count;
														
 
															+		result.uxyzw[1] = _a.uxyzw[1] >> _count;
														
 
															+		result.uxyzw[2] = _a.uxyzw[2] >> _count;
														
 
															+		result.uxyzw[3] = _a.uxyzw[3] >> _count;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = _a.ixyzw[0] >> _count;
														
 
															+		result.ixyzw[1] = _a.ixyzw[1] >> _count;
														
 
															+		result.ixyzw[2] = _a.ixyzw[2] >> _count;
														
 
															+		result.ixyzw[3] = _a.ixyzw[3] >> _count;
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = _a.ixyzw[0] + _b.ixyzw[0];
														
 
															+		result.ixyzw[1] = _a.ixyzw[1] + _b.ixyzw[1];
														
 
															+		result.ixyzw[2] = _a.ixyzw[2] + _b.ixyzw[2];
														
 
															+		result.ixyzw[3] = _a.ixyzw[3] + _b.ixyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		float4_t result;
														
 
															+		result.ixyzw[0] = _a.ixyzw[0] - _b.ixyzw[0];
														
 
															+		result.ixyzw[1] = _a.ixyzw[1] - _b.ixyzw[1];
														
 
															+		result.ixyzw[2] = _a.ixyzw[2] - _b.ixyzw[2];
														
 
															+		result.ixyzw[3] = _a.ixyzw[3] - _b.ixyzw[3];
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#define float4_shuf_xAzC float4_shuf_xAzC_ni
														
 
															+#define float4_shuf_yBwD float4_shuf_yBwD_ni
														
 
															+#define float4_rcp float4_rcp_ni
														
 
															+#define float4_orx float4_orx_ni
														
 
															+#define float4_orc float4_orc_ni
														
 
															+#define float4_neg float4_neg_ni
														
 
															+#define float4_madd float4_madd_ni
														
 
															+#define float4_nmsub float4_nmsub_ni
														
 
															+#define float4_div_nr float4_div_nr_ni
														
 
															+#define float4_selb float4_selb_ni
														
 
															+#define float4_sels float4_sels_ni
														
 
															+#define float4_not float4_not_ni
														
 
															+#define float4_abs float4_abs_ni
														
 
															+#define float4_clamp float4_clamp_ni
														
 
															+#define float4_lerp float4_lerp_ni
														
 
															+#define float4_rsqrt float4_rsqrt_ni
														
 
															+#define float4_rsqrt_nr float4_rsqrt_nr_ni
														
 
															+#define float4_rsqrt_carmack float4_rsqrt_carmack_ni
														
 
															+#define float4_sqrt_nr float4_sqrt_nr_ni
														
 
															+#define float4_log2 float4_log2_ni
														
 
															+#define float4_exp2 float4_exp2_ni
														
 
															+#define float4_pow float4_pow_ni
														
 
															+#define float4_cross3 float4_cross3_ni
														
 
															+#define float4_normalize3 float4_normalize3_ni
														
 
															+#define float4_dot3 float4_dot3_ni
														
 
															+#define float4_dot float4_dot_ni
														
 
															+#define float4_ceil float4_ceil_ni
														
 
															+#define float4_floor float4_floor_ni
														
 
															+#include "float4_ni.h"
														
 
															+
														
 
															+#endif // __BX_FLOAT4_REF_H__
														
--- a/include/bx/float4_sse.h
+++ b/include/bx/float4_sse.h
@@ -1,401 +1,401 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_FLOAT4_SSE_H__

														
 
															-#define __BX_FLOAT4_SSE_H__

														
 
															-

														
 
															-#include <emmintrin.h> // __m128i

														
 
															-#if defined(__SSE4_1__)

														
 
															-#	include <smmintrin.h>

														
 
															-#endif // defined(__SSE4_1__)

														
 
															-#include <xmmintrin.h> // __m128

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-

														
 
															-	typedef __m128 float4_t;

														
 
															-

														
 
															-#define ELEMx 0

														
 
															-#define ELEMy 1

														
 
															-#define ELEMz 2

														
 
															-#define ELEMw 3

														
 
															-#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \

														
 
															-			BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \

														
 
															-			{ \

														
 
															-				return _mm_shuffle_ps( _a, _a, _MM_SHUFFLE(ELEM##_w, ELEM##_z, ELEM##_y, ELEM##_x ) ); \

														
 
															-			}

														
 
															-

														
 
															-#include "float4_swizzle.inl"

														
 
															-

														
 
															-#undef IMPLEMENT_SWIZZLE

														
 
															-#undef ELEMw

														
 
															-#undef ELEMz

														
 
															-#undef ELEMy

														
 
															-#undef ELEMx

														
 
															-

														
 
															-#define IMPLEMENT_TEST(_xyzw, _mask) \

														
 
															-			BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \

														
 
															-			{ \

														
 
															-				return 0x0 != (_mm_movemask_ps(_test)&(_mask) ); \

														
 
															-			} \

														
 
															-			\

														
 
															-			BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \

														
 
															-			{ \

														
 
															-				return (_mask) == (_mm_movemask_ps(_test)&(_mask) ); \

														
 
															-			}

														
 
															-

														
 
															-IMPLEMENT_TEST(x    , 0x1);

														
 
															-IMPLEMENT_TEST(y    , 0x2);

														
 
															-IMPLEMENT_TEST(xy   , 0x3);

														
 
															-IMPLEMENT_TEST(z    , 0x4);

														
 
															-IMPLEMENT_TEST(xz   , 0x5);

														
 
															-IMPLEMENT_TEST(yz   , 0x6);

														
 
															-IMPLEMENT_TEST(xyz  , 0x7);

														
 
															-IMPLEMENT_TEST(w    , 0x8);

														
 
															-IMPLEMENT_TEST(xw   , 0x9);

														
 
															-IMPLEMENT_TEST(yw   , 0xa);

														
 
															-IMPLEMENT_TEST(xyw  , 0xb);

														
 
															-IMPLEMENT_TEST(zw   , 0xc);

														
 
															-IMPLEMENT_TEST(xzw  , 0xd);

														
 
															-IMPLEMENT_TEST(yzw  , 0xe);

														
 
															-IMPLEMENT_TEST(xyzw , 0xf);

														
 
															-

														
 
															-#undef IMPLEMENT_TEST

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_movelh_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_movelh_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_movehl_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_movehl_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_unpacklo_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_unpacklo_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_unpackhi_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_unpackhi_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_x(float4_t _a)

														
 
															-	{

														
 
															-		return _mm_cvtss_f32(_a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_y(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t yyyy = float4_swiz_yyyy(_a);

														
 
															-		const float result  = _mm_cvtss_f32(yyyy);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_z(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t zzzz = float4_swiz_zzzz(_a);

														
 
															-		const float result  = _mm_cvtss_f32(zzzz);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float float4_w(float4_t _a)

														
 
															-	{

														
 
															-		const float4_t wwww = float4_swiz_wwww(_a);

														
 
															-		const float result  = _mm_cvtss_f32(wwww);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)

														
 
															-	{

														
 
															-		return _mm_load_ps(reinterpret_cast<const float*>(_ptr) );

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)

														
 
															-	{

														
 
															-		_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)

														
 
															-	{

														
 
															-		_mm_stream_ps(reinterpret_cast<float*>(_ptr), _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)

														
 
															-	{

														
 
															-		return _mm_set_ps(_w, _z, _y, _x);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)

														
 
															-	{

														
 
															-		const __m128i set     = _mm_set_epi32(_w, _z, _y, _x);

														
 
															-		const float4_t result = _mm_castsi128_ps(set);

														
 
															-		

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)

														
 
															-	{

														
 
															-		const float4_t x___   = _mm_load_ss(reinterpret_cast<const float*>(_ptr) );

														
 
															-		const float4_t result = float4_swiz_xxxx(x___);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_splat(float _a)

														
 
															-	{

														
 
															-		return _mm_set1_ps(_a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)

														
 
															-	{

														
 
															-		const __m128i splat   = _mm_set1_epi32(_a);

														
 
															-		const float4_t result = _mm_castsi128_ps(splat);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_zero()

														
 
															-	{

														
 
															-		return _mm_setzero_ps();

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)

														
 
															-	{

														
 
															-		const __m128i  itof   = _mm_castps_si128(_a);

														
 
															-		const float4_t result = _mm_cvtepi32_ps(itof);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)

														
 
															-	{

														
 
															-		const __m128i ftoi    = _mm_cvtps_epi32(_a);

														
 
															-		const float4_t result = _mm_castsi128_ps(ftoi);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)

														
 
															-	{

														
 
															-#if defined(__SSE4_1__)

														
 
															-		return _mm_round_ps(_a, _MM_FROUND_NINT);

														
 
															-#else

														
 
															-		const __m128i round   = _mm_cvtps_epi32(_a);

														
 
															-		const float4_t result = _mm_cvtepi32_ps(round);

														
 
															-

														
 
															-		return result;

														
 
															-#endif // defined(__SSE4_1__)

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_add_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_sub_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_mul_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_div_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)

														
 
															-	{

														
 
															-		return _mm_rcp_ps(_a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)

														
 
															-	{

														
 
															-		return _mm_sqrt_ps(_a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)

														
 
															-	{

														
 
															-		return _mm_rsqrt_ps(_a);

														
 
															-	}

														
 
															-

														
 
															-#if defined(__SSE4_1__)

														
 
															-	BX_FLOAT4_INLINE float4_t float4_dot3(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_dp_ps(_a, _b, 0x77);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_dot(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_dp_ps(_a, _b, 0xFF);

														
 
															-	}

														
 
															-#endif // defined(__SSE4__)

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_cmpeq_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_cmplt_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_cmple_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_cmpgt_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_cmpge_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_min_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_max_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_and_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_andnot_ps(_b, _a);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_or_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		return _mm_xor_ps(_a, _b);

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)

														
 
															-	{

														
 
															-		const __m128i a       = _mm_castps_si128(_a);

														
 
															-		const __m128i shift   = _mm_slli_epi32(a, _count);

														
 
															-		const float4_t result = _mm_castsi128_ps(shift);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)

														
 
															-	{

														
 
															-		const __m128i a       = _mm_castps_si128(_a);

														
 
															-		const __m128i shift   = _mm_srli_epi32(a, _count);

														
 
															-		const float4_t result = _mm_castsi128_ps(shift);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)

														
 
															-	{

														
 
															-		const __m128i a       = _mm_castps_si128(_a);

														
 
															-		const __m128i shift   = _mm_srai_epi32(a, _count);

														
 
															-		const float4_t result = _mm_castsi128_ps(shift);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const __m128i a       = _mm_castps_si128(_a);

														
 
															-		const __m128i b       = _mm_castps_si128(_b);

														
 
															-		const __m128i add     = _mm_add_epi32(a, b);

														
 
															-		const float4_t result = _mm_castsi128_ps(add);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)

														
 
															-	{

														
 
															-		const __m128i a       = _mm_castps_si128(_a);

														
 
															-		const __m128i b       = _mm_castps_si128(_b);

														
 
															-		const __m128i sub     = _mm_sub_epi32(a, b);

														
 
															-		const float4_t result = _mm_castsi128_ps(sub);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#define float4_shuf_xAzC float4_shuf_xAzC_ni

														
 
															-#define float4_shuf_yBwD float4_shuf_yBwD_ni

														
 
															-#define float4_rcp float4_rcp_ni

														
 
															-#define float4_orx float4_orx_ni

														
 
															-#define float4_orc float4_orc_ni

														
 
															-#define float4_neg float4_neg_ni

														
 
															-#define float4_madd float4_madd_ni

														
 
															-#define float4_nmsub float4_nmsub_ni

														
 
															-#define float4_div_nr float4_div_nr_ni

														
 
															-#define float4_selb float4_selb_ni

														
 
															-#define float4_sels float4_sels_ni

														
 
															-#define float4_not float4_not_ni

														
 
															-#define float4_abs float4_abs_ni

														
 
															-#define float4_clamp float4_clamp_ni

														
 
															-#define float4_lerp float4_lerp_ni

														
 
															-#define float4_rsqrt float4_rsqrt_ni

														
 
															-#define float4_rsqrt_nr float4_rsqrt_nr_ni

														
 
															-#define float4_rsqrt_carmack float4_rsqrt_carmack_ni

														
 
															-#define float4_sqrt_nr float4_sqrt_nr_ni

														
 
															-#define float4_log2 float4_log2_ni

														
 
															-#define float4_exp2 float4_exp2_ni

														
 
															-#define float4_pow float4_pow_ni

														
 
															-#define float4_cross3 float4_cross3_ni

														
 
															-#define float4_normalize3 float4_normalize3_ni

														
 
															-#if !defined(__SSE4_1__)

														
 
															-#define float4_dot3 float4_dot3_ni

														
 
															-#define float4_dot float4_dot_ni

														
 
															-#endif // defined(__SSE4_1__)

														
 
															-#define float4_ceil float4_ceil_ni

														
 
															-#define float4_floor float4_floor_ni

														
 
															-#include "float4_ni.h"

														
 
															-

														
 
															-#endif // __FLOAT4_SSE_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_FLOAT4_SSE_H__
														
 
															+#define __BX_FLOAT4_SSE_H__
														
 
															+
														
 
															+#include <emmintrin.h> // __m128i
														
 
															+#if defined(__SSE4_1__)
														
 
															+#	include <smmintrin.h>
														
 
															+#endif // defined(__SSE4_1__)
														
 
															+#include <xmmintrin.h> // __m128
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+
														
 
															+	typedef __m128 float4_t;
														
 
															+
														
 
															+#define ELEMx 0
														
 
															+#define ELEMy 1
														
 
															+#define ELEMz 2
														
 
															+#define ELEMw 3
														
 
															+#define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
														
 
															+			BX_FLOAT4_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
														
 
															+			{ \
														
 
															+				return _mm_shuffle_ps( _a, _a, _MM_SHUFFLE(ELEM##_w, ELEM##_z, ELEM##_y, ELEM##_x ) ); \
														
 
															+			}
														
 
															+
														
 
															+#include "float4_swizzle.inl"
														
 
															+
														
 
															+#undef IMPLEMENT_SWIZZLE
														
 
															+#undef ELEMw
														
 
															+#undef ELEMz
														
 
															+#undef ELEMy
														
 
															+#undef ELEMx
														
 
															+
														
 
															+#define IMPLEMENT_TEST(_xyzw, _mask) \
														
 
															+			BX_FLOAT4_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
														
 
															+			{ \
														
 
															+				return 0x0 != (_mm_movemask_ps(_test)&(_mask) ); \
														
 
															+			} \
														
 
															+			\
														
 
															+			BX_FLOAT4_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
														
 
															+			{ \
														
 
															+				return (_mask) == (_mm_movemask_ps(_test)&(_mask) ); \
														
 
															+			}
														
 
															+
														
 
															+IMPLEMENT_TEST(x    , 0x1);
														
 
															+IMPLEMENT_TEST(y    , 0x2);
														
 
															+IMPLEMENT_TEST(xy   , 0x3);
														
 
															+IMPLEMENT_TEST(z    , 0x4);
														
 
															+IMPLEMENT_TEST(xz   , 0x5);
														
 
															+IMPLEMENT_TEST(yz   , 0x6);
														
 
															+IMPLEMENT_TEST(xyz  , 0x7);
														
 
															+IMPLEMENT_TEST(w    , 0x8);
														
 
															+IMPLEMENT_TEST(xw   , 0x9);
														
 
															+IMPLEMENT_TEST(yw   , 0xa);
														
 
															+IMPLEMENT_TEST(xyw  , 0xb);
														
 
															+IMPLEMENT_TEST(zw   , 0xc);
														
 
															+IMPLEMENT_TEST(xzw  , 0xd);
														
 
															+IMPLEMENT_TEST(yzw  , 0xe);
														
 
															+IMPLEMENT_TEST(xyzw , 0xf);
														
 
															+
														
 
															+#undef IMPLEMENT_TEST
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_movelh_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_movelh_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_movehl_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_movehl_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_unpacklo_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_unpacklo_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_unpackhi_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_unpackhi_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_x(float4_t _a)
														
 
															+	{
														
 
															+		return _mm_cvtss_f32(_a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_y(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t yyyy = float4_swiz_yyyy(_a);
														
 
															+		const float result  = _mm_cvtss_f32(yyyy);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_z(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t zzzz = float4_swiz_zzzz(_a);
														
 
															+		const float result  = _mm_cvtss_f32(zzzz);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float float4_w(float4_t _a)
														
 
															+	{
														
 
															+		const float4_t wwww = float4_swiz_wwww(_a);
														
 
															+		const float result  = _mm_cvtss_f32(wwww);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ld(const void* _ptr)
														
 
															+	{
														
 
															+		return _mm_load_ps(reinterpret_cast<const float*>(_ptr) );
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE void float4_st(void* _ptr, float4_t _a)
														
 
															+	{
														
 
															+		_mm_store_ps(reinterpret_cast<float*>(_ptr), _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE void float4_stream(void* _ptr, float4_t _a)
														
 
															+	{
														
 
															+		_mm_stream_ps(reinterpret_cast<float*>(_ptr), _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
														
 
															+	{
														
 
															+		return _mm_set_ps(_w, _z, _y, _x);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
														
 
															+	{
														
 
															+		const __m128i set     = _mm_set_epi32(_w, _z, _y, _x);
														
 
															+		const float4_t result = _mm_castsi128_ps(set);
														
 
															+		
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_splat(const void* _ptr)
														
 
															+	{
														
 
															+		const float4_t x___   = _mm_load_ss(reinterpret_cast<const float*>(_ptr) );
														
 
															+		const float4_t result = float4_swiz_xxxx(x___);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_splat(float _a)
														
 
															+	{
														
 
															+		return _mm_set1_ps(_a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_isplat(uint32_t _a)
														
 
															+	{
														
 
															+		const __m128i splat   = _mm_set1_epi32(_a);
														
 
															+		const float4_t result = _mm_castsi128_ps(splat);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_zero()
														
 
															+	{
														
 
															+		return _mm_setzero_ps();
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_itof(float4_t _a)
														
 
															+	{
														
 
															+		const __m128i  itof   = _mm_castps_si128(_a);
														
 
															+		const float4_t result = _mm_cvtepi32_ps(itof);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_ftoi(float4_t _a)
														
 
															+	{
														
 
															+		const __m128i ftoi    = _mm_cvtps_epi32(_a);
														
 
															+		const float4_t result = _mm_castsi128_ps(ftoi);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_round(float4_t _a)
														
 
															+	{
														
 
															+#if defined(__SSE4_1__)
														
 
															+		return _mm_round_ps(_a, _MM_FROUND_NINT);
														
 
															+#else
														
 
															+		const __m128i round   = _mm_cvtps_epi32(_a);
														
 
															+		const float4_t result = _mm_cvtepi32_ps(round);
														
 
															+
														
 
															+		return result;
														
 
															+#endif // defined(__SSE4_1__)
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_add(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_add_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_sub_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_mul_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_div(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_div_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rcp_est(float4_t _a)
														
 
															+	{
														
 
															+		return _mm_rcp_ps(_a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sqrt(float4_t _a)
														
 
															+	{
														
 
															+		return _mm_sqrt_ps(_a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_rsqrt_est(float4_t _a)
														
 
															+	{
														
 
															+		return _mm_rsqrt_ps(_a);
														
 
															+	}
														
 
															+
														
 
															+#if defined(__SSE4_1__)
														
 
															+	BX_FLOAT4_INLINE float4_t float4_dot3(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_dp_ps(_a, _b, 0x77);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_dot(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_dp_ps(_a, _b, 0xFF);
														
 
															+	}
														
 
															+#endif // defined(__SSE4__)
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_cmpeq_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_cmplt_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_cmple_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_cmpgt_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_cmpge_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_min(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_min_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_max(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_max_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_and(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_and_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_andnot_ps(_b, _a);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_or(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_or_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		return _mm_xor_ps(_a, _b);
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sll(float4_t _a, int _count)
														
 
															+	{
														
 
															+		const __m128i a       = _mm_castps_si128(_a);
														
 
															+		const __m128i shift   = _mm_slli_epi32(a, _count);
														
 
															+		const float4_t result = _mm_castsi128_ps(shift);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_srl(float4_t _a, int _count)
														
 
															+	{
														
 
															+		const __m128i a       = _mm_castps_si128(_a);
														
 
															+		const __m128i shift   = _mm_srli_epi32(a, _count);
														
 
															+		const float4_t result = _mm_castsi128_ps(shift);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_sra(float4_t _a, int _count)
														
 
															+	{
														
 
															+		const __m128i a       = _mm_castps_si128(_a);
														
 
															+		const __m128i shift   = _mm_srai_epi32(a, _count);
														
 
															+		const float4_t result = _mm_castsi128_ps(shift);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const __m128i a       = _mm_castps_si128(_a);
														
 
															+		const __m128i b       = _mm_castps_si128(_b);
														
 
															+		const __m128i add     = _mm_add_epi32(a, b);
														
 
															+		const float4_t result = _mm_castsi128_ps(add);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
														
 
															+	{
														
 
															+		const __m128i a       = _mm_castps_si128(_a);
														
 
															+		const __m128i b       = _mm_castps_si128(_b);
														
 
															+		const __m128i sub     = _mm_sub_epi32(a, b);
														
 
															+		const float4_t result = _mm_castsi128_ps(sub);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#define float4_shuf_xAzC float4_shuf_xAzC_ni
														
 
															+#define float4_shuf_yBwD float4_shuf_yBwD_ni
														
 
															+#define float4_rcp float4_rcp_ni
														
 
															+#define float4_orx float4_orx_ni
														
 
															+#define float4_orc float4_orc_ni
														
 
															+#define float4_neg float4_neg_ni
														
 
															+#define float4_madd float4_madd_ni
														
 
															+#define float4_nmsub float4_nmsub_ni
														
 
															+#define float4_div_nr float4_div_nr_ni
														
 
															+#define float4_selb float4_selb_ni
														
 
															+#define float4_sels float4_sels_ni
														
 
															+#define float4_not float4_not_ni
														
 
															+#define float4_abs float4_abs_ni
														
 
															+#define float4_clamp float4_clamp_ni
														
 
															+#define float4_lerp float4_lerp_ni
														
 
															+#define float4_rsqrt float4_rsqrt_ni
														
 
															+#define float4_rsqrt_nr float4_rsqrt_nr_ni
														
 
															+#define float4_rsqrt_carmack float4_rsqrt_carmack_ni
														
 
															+#define float4_sqrt_nr float4_sqrt_nr_ni
														
 
															+#define float4_log2 float4_log2_ni
														
 
															+#define float4_exp2 float4_exp2_ni
														
 
															+#define float4_pow float4_pow_ni
														
 
															+#define float4_cross3 float4_cross3_ni
														
 
															+#define float4_normalize3 float4_normalize3_ni
														
 
															+#if !defined(__SSE4_1__)
														
 
															+#define float4_dot3 float4_dot3_ni
														
 
															+#define float4_dot float4_dot_ni
														
 
															+#endif // defined(__SSE4_1__)
														
 
															+#define float4_ceil float4_ceil_ni
														
 
															+#define float4_floor float4_floor_ni
														
 
															+#include "float4_ni.h"
														
 
															+
														
 
															+#endif // __FLOAT4_SSE_H__
														
--- a/include/bx/float4_t.h
+++ b/include/bx/float4_t.h
@@ -1,21 +1,21 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_FLOAT4_T_H__

														
 
															-#define __BX_FLOAT4_T_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-#define BX_FLOAT4_INLINE BX_FORCE_INLINE

														
 
															-

														
 
															-#if defined(__SSE2__) || (BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2) )

														
 
															-#	include "float4_sse.h"

														
 
															-#elif 0 // __ARM_NEON__

														
 
															-#	include "float4_neon.h"

														
 
															-#else

														
 
															-#	include "float4_ref.h"

														
 
															-#endif //

														
 
															-

														
 
															-#endif // __BX_FLOAT4_T_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_FLOAT4_T_H__
														
 
															+#define __BX_FLOAT4_T_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+#define BX_FLOAT4_INLINE BX_FORCE_INLINE
														
 
															+
														
 
															+#if defined(__SSE2__) || (BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2) )
														
 
															+#	include "float4_sse.h"
														
 
															+#elif 0 // __ARM_NEON__
														
 
															+#	include "float4_neon.h"
														
 
															+#else
														
 
															+#	include "float4_ref.h"
														
 
															+#endif //
														
 
															+
														
 
															+#endif // __BX_FLOAT4_T_H__
														
--- a/include/bx/float4x4_t.h
+++ b/include/bx/float4x4_t.h
@@ -1,168 +1,168 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_FLOAT4X4_H__

														
 
															-#define __BX_FLOAT4x4_H__

														
 
															-

														
 
															-#include "float4_t.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	typedef BX_ALIGN_STRUCT_16(struct)

														
 
															-	{

														
 
															-		float4_t col[4];

														
 
															-

														
 
															-	} float4x4_t;

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_mul_xyz1(float4_t _a, const float4x4_t& _b)

														
 
															-	{

														
 
															-		const float4_t xxxx   = float4_swiz_xxxx(_a);

														
 
															-		const float4_t yyyy   = float4_swiz_yyyy(_a);

														
 
															-		const float4_t zzzz   = float4_swiz_zzzz(_a);

														
 
															-		const float4_t col0   = float4_mul(_b.col[0], xxxx);

														
 
															-		const float4_t col1   = float4_mul(_b.col[1], yyyy);

														
 
															-		const float4_t col2   = float4_madd(_b.col[2], zzzz, col0);

														
 
															-		const float4_t col3   = float4_add(_b.col[3], col1);

														
 
															-		const float4_t result = float4_add(col2, col3);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, const float4x4_t& _b)

														
 
															-	{

														
 
															-		const float4_t xxxx   = float4_swiz_xxxx(_a);

														
 
															-		const float4_t yyyy   = float4_swiz_yyyy(_a);

														
 
															-		const float4_t zzzz   = float4_swiz_zzzz(_a);

														
 
															-		const float4_t wwww   = float4_swiz_wwww(_a);

														
 
															-		const float4_t col0   = float4_mul(_b.col[0], xxxx);

														
 
															-		const float4_t col1   = float4_mul(_b.col[1], yyyy);

														
 
															-		const float4_t col2   = float4_madd(_b.col[2], zzzz, col0);

														
 
															-		const float4_t col3   = float4_madd(_b.col[3], wwww, col1);

														
 
															-		const float4_t result = float4_add(col2, col3);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4x4_t float4x4_mul(const float4x4_t& _a, const float4x4_t& _b)

														
 
															-	{

														
 
															-		float4x4_t result;

														
 
															-		result.col[0] = float4_mul(_a.col[0], _b);

														
 
															-		result.col[1] = float4_mul(_a.col[1], _b);

														
 
															-		result.col[2] = float4_mul(_a.col[2], _b);

														
 
															-		result.col[3] = float4_mul(_a.col[3], _b);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4x4_t float4x4_transpose(const float4x4_t& _mtx)

														
 
															-	{

														
 
															-		const float4_t aibj = float4_shuf_xAyB(_mtx.col[0], _mtx.col[2]); // aibj

														
 
															-		const float4_t emfn = float4_shuf_xAyB(_mtx.col[1], _mtx.col[3]); // emfn

														
 
															-		const float4_t ckdl = float4_shuf_zCwD(_mtx.col[0], _mtx.col[2]); // ckdl

														
 
															-		const float4_t gohp = float4_shuf_zCwD(_mtx.col[1], _mtx.col[3]); // gohp

														
 
															-		float4x4_t result;

														
 
															-		result.col[0] = float4_shuf_xAyB(aibj, emfn); // aeim

														
 
															-		result.col[1] = float4_shuf_zCwD(aibj, emfn); // bfjn

														
 
															-		result.col[2] = float4_shuf_xAyB(ckdl, gohp); // cgko

														
 
															-		result.col[3] = float4_shuf_zCwD(ckdl, gohp); // dhlp

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	BX_FLOAT4_INLINE float4x4_t float4x4_inverse(const float4x4_t& _a)

														
 
															-	{

														
 
															-		const float4_t tmp0 = float4_shuf_xAzC(_a.col[0], _a.col[1]);

														
 
															-		const float4_t tmp1 = float4_shuf_xAzC(_a.col[2], _a.col[3]);

														
 
															-		const float4_t tmp2 = float4_shuf_yBwD(_a.col[0], _a.col[1]);

														
 
															-		const float4_t tmp3 = float4_shuf_yBwD(_a.col[2], _a.col[3]);

														
 
															-		const float4_t t0   = float4_shuf_xyAB(tmp0, tmp1);

														
 
															-		const float4_t t1   = float4_shuf_xyAB(tmp3, tmp2);

														
 
															-		const float4_t t2   = float4_shuf_zwCD(tmp0, tmp1);

														
 
															-		const float4_t t3   = float4_shuf_zwCD(tmp3, tmp2);

														
 
															-

														
 
															-		const float4_t t23 = float4_mul(t2, t3);

														
 
															-		const float4_t t23_yxwz = float4_swiz_yxwz(t23);

														
 
															-		const float4_t t23_wzyx = float4_swiz_wzyx(t23);

														
 
															-

														
 
															-		float4_t cof0, cof1, cof2, cof3;

														
 
															-

														
 
															-		const float4_t zero = float4_zero();

														
 
															-		cof0 = float4_nmsub(t1, t23_yxwz, zero);

														
 
															-		cof0 = float4_madd(t1, t23_wzyx, cof0);

														
 
															-

														
 
															-		cof1 = float4_nmsub(t0, t23_yxwz, zero);

														
 
															-		cof1 = float4_madd(t0, t23_wzyx, cof1);

														
 
															-		cof1 = float4_swiz_zwxy(cof1);

														
 
															-		

														
 
															-		const float4_t t12 = float4_mul(t1, t2);

														
 
															-		const float4_t t12_yxwz = float4_swiz_yxwz(t12);

														
 
															-		const float4_t t12_wzyx = float4_swiz_wzyx(t12);

														
 
															-		

														
 
															-		cof0 = float4_madd(t3, t12_yxwz, cof0);

														
 
															-		cof0 = float4_nmsub(t3, t12_wzyx, cof0);

														
 
															-

														
 
															-		cof3 = float4_mul(t0, t12_yxwz);

														
 
															-		cof3 = float4_nmsub(t0, t12_wzyx, cof3);

														
 
															-		cof3 = float4_swiz_zwxy(cof3);

														
 
															-

														
 
															-		const float4_t t1_zwxy = float4_swiz_zwxy(t1);

														
 
															-		const float4_t t2_zwxy = float4_swiz_zwxy(t2);

														
 
															-

														
 
															-		const float4_t t13 = float4_mul(t1_zwxy, t3);

														
 
															-		const float4_t t13_yxwz = float4_swiz_yxwz(t13);

														
 
															-		const float4_t t13_wzyx = float4_swiz_wzyx(t13);

														
 
															-

														
 
															-		cof0 = float4_madd(t2_zwxy, t13_yxwz, cof0);

														
 
															-		cof0 = float4_nmsub(t2_zwxy, t13_wzyx, cof0);

														
 
															-

														
 
															-		cof2 = float4_mul(t0, t13_yxwz);

														
 
															-		cof2 = float4_nmsub(t0, t13_wzyx, cof2);

														
 
															-		cof2 = float4_swiz_zwxy(cof2);

														
 
															-

														
 
															-		const float4_t t01 = float4_mul(t0, t1);

														
 
															-		const float4_t t01_yxwz = float4_swiz_yxwz(t01);

														
 
															-		const float4_t t01_wzyx = float4_swiz_wzyx(t01);

														
 
															-

														
 
															-		cof2 = float4_nmsub(t3, t01_yxwz, cof2);

														
 
															-		cof2 = float4_madd(t3, t01_wzyx, cof2);

														
 
															-

														
 
															-		cof3 = float4_madd(t2_zwxy, t01_yxwz, cof3);

														
 
															-		cof3 = float4_nmsub(t2_zwxy, t01_wzyx, cof3);

														
 
															-

														
 
															-		const float4_t t03 = float4_mul(t0, t3);

														
 
															-		const float4_t t03_yxwz = float4_swiz_yxwz(t03);

														
 
															-		const float4_t t03_wzyx = float4_swiz_wzyx(t03);

														
 
															-

														
 
															-		cof1 = float4_nmsub(t2_zwxy, t03_yxwz, cof1);

														
 
															-		cof1 = float4_madd(t2_zwxy, t03_wzyx, cof1);

														
 
															-

														
 
															-		cof2 = float4_madd(t1, t03_yxwz, cof2);

														
 
															-		cof2 = float4_nmsub(t1, t03_wzyx, cof2);

														
 
															-

														
 
															-		const float4_t t02 = float4_mul(t0, t2_zwxy);

														
 
															-		const float4_t t02_yxwz = float4_swiz_yxwz(t02);

														
 
															-		const float4_t t02_wzyx = float4_swiz_wzyx(t02);

														
 
															-

														
 
															-		cof1 = float4_madd(t3, t02_yxwz, cof1);

														
 
															-		cof1 = float4_nmsub(t3, t02_wzyx, cof1);

														
 
															-

														
 
															-		cof3 = float4_nmsub(t1, t02_yxwz, cof3);

														
 
															-		cof3 = float4_madd(t1, t02_wzyx, cof3);

														
 
															-

														
 
															-		const float4_t det    = float4_dot(t0, cof0);

														
 
															-		const float4_t invdet = float4_rcp(det);

														
 
															-

														
 
															-		float4x4_t result;

														
 
															-		result.col[0] = float4_mul(cof0, invdet);

														
 
															-		result.col[1] = float4_mul(cof1, invdet);

														
 
															-		result.col[2] = float4_mul(cof2, invdet);

														
 
															-		result.col[3] = float4_mul(cof3, invdet);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_FLOAT4X4_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_FLOAT4X4_H__
														
 
															+#define __BX_FLOAT4x4_H__
														
 
															+
														
 
															+#include "float4_t.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	typedef BX_ALIGN_STRUCT_16(struct)
														
 
															+	{
														
 
															+		float4_t col[4];
														
 
															+
														
 
															+	} float4x4_t;
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_mul_xyz1(float4_t _a, const float4x4_t& _b)
														
 
															+	{
														
 
															+		const float4_t xxxx   = float4_swiz_xxxx(_a);
														
 
															+		const float4_t yyyy   = float4_swiz_yyyy(_a);
														
 
															+		const float4_t zzzz   = float4_swiz_zzzz(_a);
														
 
															+		const float4_t col0   = float4_mul(_b.col[0], xxxx);
														
 
															+		const float4_t col1   = float4_mul(_b.col[1], yyyy);
														
 
															+		const float4_t col2   = float4_madd(_b.col[2], zzzz, col0);
														
 
															+		const float4_t col3   = float4_add(_b.col[3], col1);
														
 
															+		const float4_t result = float4_add(col2, col3);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4_t float4_mul(float4_t _a, const float4x4_t& _b)
														
 
															+	{
														
 
															+		const float4_t xxxx   = float4_swiz_xxxx(_a);
														
 
															+		const float4_t yyyy   = float4_swiz_yyyy(_a);
														
 
															+		const float4_t zzzz   = float4_swiz_zzzz(_a);
														
 
															+		const float4_t wwww   = float4_swiz_wwww(_a);
														
 
															+		const float4_t col0   = float4_mul(_b.col[0], xxxx);
														
 
															+		const float4_t col1   = float4_mul(_b.col[1], yyyy);
														
 
															+		const float4_t col2   = float4_madd(_b.col[2], zzzz, col0);
														
 
															+		const float4_t col3   = float4_madd(_b.col[3], wwww, col1);
														
 
															+		const float4_t result = float4_add(col2, col3);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4x4_t float4x4_mul(const float4x4_t& _a, const float4x4_t& _b)
														
 
															+	{
														
 
															+		float4x4_t result;
														
 
															+		result.col[0] = float4_mul(_a.col[0], _b);
														
 
															+		result.col[1] = float4_mul(_a.col[1], _b);
														
 
															+		result.col[2] = float4_mul(_a.col[2], _b);
														
 
															+		result.col[3] = float4_mul(_a.col[3], _b);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4x4_t float4x4_transpose(const float4x4_t& _mtx)
														
 
															+	{
														
 
															+		const float4_t aibj = float4_shuf_xAyB(_mtx.col[0], _mtx.col[2]); // aibj
														
 
															+		const float4_t emfn = float4_shuf_xAyB(_mtx.col[1], _mtx.col[3]); // emfn
														
 
															+		const float4_t ckdl = float4_shuf_zCwD(_mtx.col[0], _mtx.col[2]); // ckdl
														
 
															+		const float4_t gohp = float4_shuf_zCwD(_mtx.col[1], _mtx.col[3]); // gohp
														
 
															+		float4x4_t result;
														
 
															+		result.col[0] = float4_shuf_xAyB(aibj, emfn); // aeim
														
 
															+		result.col[1] = float4_shuf_zCwD(aibj, emfn); // bfjn
														
 
															+		result.col[2] = float4_shuf_xAyB(ckdl, gohp); // cgko
														
 
															+		result.col[3] = float4_shuf_zCwD(ckdl, gohp); // dhlp
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	BX_FLOAT4_INLINE float4x4_t float4x4_inverse(const float4x4_t& _a)
														
 
															+	{
														
 
															+		const float4_t tmp0 = float4_shuf_xAzC(_a.col[0], _a.col[1]);
														
 
															+		const float4_t tmp1 = float4_shuf_xAzC(_a.col[2], _a.col[3]);
														
 
															+		const float4_t tmp2 = float4_shuf_yBwD(_a.col[0], _a.col[1]);
														
 
															+		const float4_t tmp3 = float4_shuf_yBwD(_a.col[2], _a.col[3]);
														
 
															+		const float4_t t0   = float4_shuf_xyAB(tmp0, tmp1);
														
 
															+		const float4_t t1   = float4_shuf_xyAB(tmp3, tmp2);
														
 
															+		const float4_t t2   = float4_shuf_zwCD(tmp0, tmp1);
														
 
															+		const float4_t t3   = float4_shuf_zwCD(tmp3, tmp2);
														
 
															+
														
 
															+		const float4_t t23 = float4_mul(t2, t3);
														
 
															+		const float4_t t23_yxwz = float4_swiz_yxwz(t23);
														
 
															+		const float4_t t23_wzyx = float4_swiz_wzyx(t23);
														
 
															+
														
 
															+		float4_t cof0, cof1, cof2, cof3;
														
 
															+
														
 
															+		const float4_t zero = float4_zero();
														
 
															+		cof0 = float4_nmsub(t1, t23_yxwz, zero);
														
 
															+		cof0 = float4_madd(t1, t23_wzyx, cof0);
														
 
															+
														
 
															+		cof1 = float4_nmsub(t0, t23_yxwz, zero);
														
 
															+		cof1 = float4_madd(t0, t23_wzyx, cof1);
														
 
															+		cof1 = float4_swiz_zwxy(cof1);
														
 
															+		
														
 
															+		const float4_t t12 = float4_mul(t1, t2);
														
 
															+		const float4_t t12_yxwz = float4_swiz_yxwz(t12);
														
 
															+		const float4_t t12_wzyx = float4_swiz_wzyx(t12);
														
 
															+		
														
 
															+		cof0 = float4_madd(t3, t12_yxwz, cof0);
														
 
															+		cof0 = float4_nmsub(t3, t12_wzyx, cof0);
														
 
															+
														
 
															+		cof3 = float4_mul(t0, t12_yxwz);
														
 
															+		cof3 = float4_nmsub(t0, t12_wzyx, cof3);
														
 
															+		cof3 = float4_swiz_zwxy(cof3);
														
 
															+
														
 
															+		const float4_t t1_zwxy = float4_swiz_zwxy(t1);
														
 
															+		const float4_t t2_zwxy = float4_swiz_zwxy(t2);
														
 
															+
														
 
															+		const float4_t t13 = float4_mul(t1_zwxy, t3);
														
 
															+		const float4_t t13_yxwz = float4_swiz_yxwz(t13);
														
 
															+		const float4_t t13_wzyx = float4_swiz_wzyx(t13);
														
 
															+
														
 
															+		cof0 = float4_madd(t2_zwxy, t13_yxwz, cof0);
														
 
															+		cof0 = float4_nmsub(t2_zwxy, t13_wzyx, cof0);
														
 
															+
														
 
															+		cof2 = float4_mul(t0, t13_yxwz);
														
 
															+		cof2 = float4_nmsub(t0, t13_wzyx, cof2);
														
 
															+		cof2 = float4_swiz_zwxy(cof2);
														
 
															+
														
 
															+		const float4_t t01 = float4_mul(t0, t1);
														
 
															+		const float4_t t01_yxwz = float4_swiz_yxwz(t01);
														
 
															+		const float4_t t01_wzyx = float4_swiz_wzyx(t01);
														
 
															+
														
 
															+		cof2 = float4_nmsub(t3, t01_yxwz, cof2);
														
 
															+		cof2 = float4_madd(t3, t01_wzyx, cof2);
														
 
															+
														
 
															+		cof3 = float4_madd(t2_zwxy, t01_yxwz, cof3);
														
 
															+		cof3 = float4_nmsub(t2_zwxy, t01_wzyx, cof3);
														
 
															+
														
 
															+		const float4_t t03 = float4_mul(t0, t3);
														
 
															+		const float4_t t03_yxwz = float4_swiz_yxwz(t03);
														
 
															+		const float4_t t03_wzyx = float4_swiz_wzyx(t03);
														
 
															+
														
 
															+		cof1 = float4_nmsub(t2_zwxy, t03_yxwz, cof1);
														
 
															+		cof1 = float4_madd(t2_zwxy, t03_wzyx, cof1);
														
 
															+
														
 
															+		cof2 = float4_madd(t1, t03_yxwz, cof2);
														
 
															+		cof2 = float4_nmsub(t1, t03_wzyx, cof2);
														
 
															+
														
 
															+		const float4_t t02 = float4_mul(t0, t2_zwxy);
														
 
															+		const float4_t t02_yxwz = float4_swiz_yxwz(t02);
														
 
															+		const float4_t t02_wzyx = float4_swiz_wzyx(t02);
														
 
															+
														
 
															+		cof1 = float4_madd(t3, t02_yxwz, cof1);
														
 
															+		cof1 = float4_nmsub(t3, t02_wzyx, cof1);
														
 
															+
														
 
															+		cof3 = float4_nmsub(t1, t02_yxwz, cof3);
														
 
															+		cof3 = float4_madd(t1, t02_wzyx, cof3);
														
 
															+
														
 
															+		const float4_t det    = float4_dot(t0, cof0);
														
 
															+		const float4_t invdet = float4_rcp(det);
														
 
															+
														
 
															+		float4x4_t result;
														
 
															+		result.col[0] = float4_mul(cof0, invdet);
														
 
															+		result.col[1] = float4_mul(cof1, invdet);
														
 
															+		result.col[2] = float4_mul(cof2, invdet);
														
 
															+		result.col[3] = float4_mul(cof3, invdet);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_FLOAT4X4_H__
														
--- a/include/bx/foreach.h
+++ b/include/bx/foreach.h
@@ -1,71 +1,71 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_FOREACH_H__

														
 
															-#define __BX_FOREACH_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	namespace foreach_ns

														
 
															-	{

														
 
															-		struct ContainerBase

														
 
															-		{

														
 
															-		};

														
 
															-

														
 
															-		template <typename Ty>

														
 
															-		class Container : public ContainerBase

														
 
															-		{

														
 
															-		public:

														
 
															-			inline Container(const Ty& _container)

														
 
															-				: m_container(_container)

														
 
															-				, m_break(0)

														
 
															-				, m_it( _container.begin() )

														
 
															-				, m_itEnd( _container.end() )

														
 
															-			{

														
 
															-			}

														
 
															-

														
 
															-			inline bool condition() const

														
 
															-			{

														
 
															-				return (!m_break++ && m_it != m_itEnd);

														
 
															-			}

														
 
															-

														
 
															-			const Ty& m_container;

														
 
															-			mutable int m_break;

														
 
															-			mutable typename Ty::const_iterator m_it;

														
 
															-			mutable typename Ty::const_iterator m_itEnd;

														
 
															-		};

														
 
															-

														
 
															-		template <typename Ty>

														
 
															-		inline Ty* pointer(const Ty&)

														
 
															-		{

														
 
															-			return 0;

														
 
															-		}

														
 
															-

														
 
															-		template <typename Ty>

														
 
															-		inline Container<Ty> containerNew(const Ty& _container)

														
 
															-		{

														
 
															-			return Container<Ty>(_container);

														
 
															-		}

														
 
															-

														
 
															-		template <typename Ty>

														
 
															-		inline const Container<Ty>* container(const ContainerBase* _base, const Ty*)

														
 
															-		{

														
 
															-			return static_cast<const Container<Ty>*>(_base);

														
 
															-		}

														
 
															-	} // namespace foreach_ns

														
 
															-

														
 
															-#define foreach(_variable, _container) \

														
 
															-	for (const bx::foreach_ns::ContainerBase &__temp_container__ = bx::foreach_ns::containerNew(_container); \

														
 
															-			bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->condition(); \

														
 
															-			++bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_it) \

														
 
															-	for (_variable = *container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_it; \

														
 
															-			bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_break; \

														
 
															-			--bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_break)

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_FOREACH_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_FOREACH_H__
														
 
															+#define __BX_FOREACH_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	namespace foreach_ns
														
 
															+	{
														
 
															+		struct ContainerBase
														
 
															+		{
														
 
															+		};
														
 
															+
														
 
															+		template <typename Ty>
														
 
															+		class Container : public ContainerBase
														
 
															+		{
														
 
															+		public:
														
 
															+			inline Container(const Ty& _container)
														
 
															+				: m_container(_container)
														
 
															+				, m_break(0)
														
 
															+				, m_it( _container.begin() )
														
 
															+				, m_itEnd( _container.end() )
														
 
															+			{
														
 
															+			}
														
 
															+
														
 
															+			inline bool condition() const
														
 
															+			{
														
 
															+				return (!m_break++ && m_it != m_itEnd);
														
 
															+			}
														
 
															+
														
 
															+			const Ty& m_container;
														
 
															+			mutable int m_break;
														
 
															+			mutable typename Ty::const_iterator m_it;
														
 
															+			mutable typename Ty::const_iterator m_itEnd;
														
 
															+		};
														
 
															+
														
 
															+		template <typename Ty>
														
 
															+		inline Ty* pointer(const Ty&)
														
 
															+		{
														
 
															+			return 0;
														
 
															+		}
														
 
															+
														
 
															+		template <typename Ty>
														
 
															+		inline Container<Ty> containerNew(const Ty& _container)
														
 
															+		{
														
 
															+			return Container<Ty>(_container);
														
 
															+		}
														
 
															+
														
 
															+		template <typename Ty>
														
 
															+		inline const Container<Ty>* container(const ContainerBase* _base, const Ty*)
														
 
															+		{
														
 
															+			return static_cast<const Container<Ty>*>(_base);
														
 
															+		}
														
 
															+	} // namespace foreach_ns
														
 
															+
														
 
															+#define foreach(_variable, _container) \
														
 
															+	for (const bx::foreach_ns::ContainerBase &__temp_container__ = bx::foreach_ns::containerNew(_container); \
														
 
															+			bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->condition(); \
														
 
															+			++bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_it) \
														
 
															+	for (_variable = *container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_it; \
														
 
															+			bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_break; \
														
 
															+			--bx::foreach_ns::container(&__temp_container__, true ? 0 : bx::foreach_ns::pointer(_container) )->m_break)
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_FOREACH_H__
														
--- a/include/bx/handlealloc.h
+++ b/include/bx/handlealloc.h
@@ -1,88 +1,88 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_HANDLE_ALLOC_H__

														
 
															-#define __BX_HANDLE_ALLOC_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	class HandleAlloc

														
 
															-	{

														
 
															-	public:

														
 
															-		static const uint16_t invalid = 0xffff;

														
 
															-

														
 
															-		HandleAlloc(uint16_t _maxHandles)

														
 
															-			: m_dense(new uint16_t[_maxHandles*2])

														
 
															-			, m_sparse(&m_dense[_maxHandles])

														
 
															-			, m_numHandles(0)

														
 
															-			, m_maxHandles(_maxHandles)

														
 
															-		{

														
 
															-			for (uint16_t ii = 0; ii < _maxHandles; ++ii)

														
 
															-			{

														
 
															-				m_dense[ii] = ii;

														
 
															-			}

														
 
															-		}

														
 
															-

														
 
															-		~HandleAlloc()

														
 
															-		{

														
 
															-			delete [] m_dense;

														
 
															-		}

														
 
															-

														
 
															-		const uint16_t* getHandles() const

														
 
															-		{

														
 
															-			return m_dense;

														
 
															-		}

														
 
															-

														
 
															-		uint16_t getHandleAt(uint16_t _at) const

														
 
															-		{

														
 
															-			return m_dense[_at];

														
 
															-		}

														
 
															-

														
 
															-		uint16_t getNumHandles() const

														
 
															-		{

														
 
															-			return m_numHandles;

														
 
															-		}

														
 
															-

														
 
															-		uint16_t getMaxHandles() const

														
 
															-		{

														
 
															-			return m_maxHandles;

														
 
															-		}

														
 
															-

														
 
															-		uint16_t alloc()

														
 
															-		{

														
 
															-			if (m_numHandles < m_maxHandles)

														
 
															-			{

														
 
															-				uint16_t index = m_numHandles;

														
 
															-				++m_numHandles;

														
 
															-

														
 
															-				uint16_t handle = m_dense[index];

														
 
															-				m_sparse[handle] = index;

														
 
															-				return handle;

														
 
															-			}

														
 
															-

														
 
															-			return invalid;

														
 
															-		}

														
 
															-

														
 
															-		void free(uint16_t _handle)

														
 
															-		{

														
 
															-			uint16_t index = m_sparse[_handle];

														
 
															-			--m_numHandles;

														
 
															-			uint16_t temp = m_dense[m_numHandles];

														
 
															-			m_dense[m_numHandles] = _handle;

														
 
															-			m_sparse[temp] = index;

														
 
															-			m_dense[index] = temp;

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		uint16_t* m_dense;

														
 
															-		uint16_t* m_sparse;

														
 
															-		uint16_t m_numHandles;

														
 
															-		uint16_t m_maxHandles;

														
 
															-	};

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __HANDLE_ALLOC_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_HANDLE_ALLOC_H__
														
 
															+#define __BX_HANDLE_ALLOC_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	class HandleAlloc
														
 
															+	{
														
 
															+	public:
														
 
															+		static const uint16_t invalid = 0xffff;
														
 
															+
														
 
															+		HandleAlloc(uint16_t _maxHandles)
														
 
															+			: m_dense(new uint16_t[_maxHandles*2])
														
 
															+			, m_sparse(&m_dense[_maxHandles])
														
 
															+			, m_numHandles(0)
														
 
															+			, m_maxHandles(_maxHandles)
														
 
															+		{
														
 
															+			for (uint16_t ii = 0; ii < _maxHandles; ++ii)
														
 
															+			{
														
 
															+				m_dense[ii] = ii;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		~HandleAlloc()
														
 
															+		{
														
 
															+			delete [] m_dense;
														
 
															+		}
														
 
															+
														
 
															+		const uint16_t* getHandles() const
														
 
															+		{
														
 
															+			return m_dense;
														
 
															+		}
														
 
															+
														
 
															+		uint16_t getHandleAt(uint16_t _at) const
														
 
															+		{
														
 
															+			return m_dense[_at];
														
 
															+		}
														
 
															+
														
 
															+		uint16_t getNumHandles() const
														
 
															+		{
														
 
															+			return m_numHandles;
														
 
															+		}
														
 
															+
														
 
															+		uint16_t getMaxHandles() const
														
 
															+		{
														
 
															+			return m_maxHandles;
														
 
															+		}
														
 
															+
														
 
															+		uint16_t alloc()
														
 
															+		{
														
 
															+			if (m_numHandles < m_maxHandles)
														
 
															+			{
														
 
															+				uint16_t index = m_numHandles;
														
 
															+				++m_numHandles;
														
 
															+
														
 
															+				uint16_t handle = m_dense[index];
														
 
															+				m_sparse[handle] = index;
														
 
															+				return handle;
														
 
															+			}
														
 
															+
														
 
															+			return invalid;
														
 
															+		}
														
 
															+
														
 
															+		void free(uint16_t _handle)
														
 
															+		{
														
 
															+			uint16_t index = m_sparse[_handle];
														
 
															+			--m_numHandles;
														
 
															+			uint16_t temp = m_dense[m_numHandles];
														
 
															+			m_dense[m_numHandles] = _handle;
														
 
															+			m_sparse[temp] = index;
														
 
															+			m_dense[index] = temp;
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		uint16_t* m_dense;
														
 
															+		uint16_t* m_sparse;
														
 
															+		uint16_t m_numHandles;
														
 
															+		uint16_t m_maxHandles;
														
 
															+	};
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __HANDLE_ALLOC_H__
														
--- a/include/bx/maputil.h
+++ b/include/bx/maputil.h
@@ -1,29 +1,29 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_MAPUTIL_H__

														
 
															-#define __BX_MAPUTIL_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	template<typename MapType>

														
 
															-	typename MapType::iterator mapInsertOrUpdate(MapType& _map, const typename MapType::key_type& _key, const typename MapType::mapped_type& _value)

														
 
															-	{

														
 
															-		typename MapType::iterator it = _map.lower_bound(_key);

														
 
															-		if (it != _map.end()

														
 
															-		&&  !_map.key_comp()(_key, it->first) )

														
 
															-		{

														
 
															-			it->second = _value;

														
 
															-			return it;

														
 
															-		}

														
 
															-

														
 
															-		typename MapType::value_type pair(_key, _value);

														
 
															-		return _map.insert(it, pair);

														
 
															-	}

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_MAPUTIL_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_MAPUTIL_H__
														
 
															+#define __BX_MAPUTIL_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	template<typename MapType>
														
 
															+	typename MapType::iterator mapInsertOrUpdate(MapType& _map, const typename MapType::key_type& _key, const typename MapType::mapped_type& _value)
														
 
															+	{
														
 
															+		typename MapType::iterator it = _map.lower_bound(_key);
														
 
															+		if (it != _map.end()
														
 
															+		&&  !_map.key_comp()(_key, it->first) )
														
 
															+		{
														
 
															+			it->second = _value;
														
 
															+			return it;
														
 
															+		}
														
 
															+
														
 
															+		typename MapType::value_type pair(_key, _value);
														
 
															+		return _map.insert(it, pair);
														
 
															+	}
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_MAPUTIL_H__
														
--- a/include/bx/mutex.h
+++ b/include/bx/mutex.h
@@ -1,171 +1,171 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_MUTEX_H__

														
 
															-#define __BX_MUTEX_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-#include "cpu.h"

														
 
															-#include "sem.h"

														
 
															-

														
 
															-#if BX_PLATFORM_NACL || BX_PLATFORM_LINUX || BX_PLATFORM_ANDROID || BX_PLATFORM_OSX

														
 
															-#	include <pthread.h>

														
 
															-#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360

														
 
															-#	include <errno.h>

														
 
															-#endif // BX_PLATFORM_

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360

														
 
															-	typedef CRITICAL_SECTION pthread_mutex_t;

														
 
															-	typedef unsigned pthread_mutexattr_t;

														
 
															-

														
 
															-	inline int pthread_mutex_lock(pthread_mutex_t* _mutex)

														
 
															-	{

														
 
															-		EnterCriticalSection(_mutex);

														
 
															-		return 0;

														
 
															-	}

														
 
															-

														
 
															-	inline int pthread_mutex_unlock(pthread_mutex_t* _mutex)

														
 
															-	{

														
 
															-		LeaveCriticalSection(_mutex);

														
 
															-		return 0;

														
 
															-	}

														
 
															-

														
 
															-	inline int pthread_mutex_trylock(pthread_mutex_t* _mutex)

														
 
															-	{

														
 
															-		return TryEnterCriticalSection(_mutex) ? 0 : EBUSY;

														
 
															-	}

														
 
															-

														
 
															-	inline int pthread_mutex_init(pthread_mutex_t* _mutex, pthread_mutexattr_t* /*_attr*/)

														
 
															-	{

														
 
															-		InitializeCriticalSection(_mutex);

														
 
															-		return 0;

														
 
															-	}

														
 
															-

														
 
															-	inline int pthread_mutex_destroy(pthread_mutex_t* _mutex)

														
 
															-	{

														
 
															-		DeleteCriticalSection(_mutex);

														
 
															-		return 0;

														
 
															-	}

														
 
															-#endif // BX_PLATFORM_

														
 
															-

														
 
															-	class Mutex

														
 
															-	{

														
 
															-	public:

														
 
															-		Mutex()

														
 
															-		{

														
 
															-			pthread_mutex_init(&m_handle, NULL);

														
 
															-		}

														
 
															-

														
 
															-		~Mutex()

														
 
															-		{

														
 
															-			pthread_mutex_destroy(&m_handle);

														
 
															-		}

														
 
															-

														
 
															-		void lock()

														
 
															-		{

														
 
															-			pthread_mutex_lock(&m_handle);

														
 
															-		}

														
 
															-

														
 
															-		void unlock()

														
 
															-		{

														
 
															-			pthread_mutex_unlock(&m_handle);

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		Mutex(const Mutex& _rhs); // no copy constructor

														
 
															-		Mutex& operator=(const Mutex& _rhs); // no assignment operator

														
 
															-

														
 
															-		pthread_mutex_t m_handle;

														
 
															-	};

														
 
															-

														
 
															-	class MutexScope

														
 
															-	{

														
 
															-	public:

														
 
															-		MutexScope(Mutex& _mutex)

														
 
															-			: m_mutex(_mutex)

														
 
															-		{

														
 
															-			m_mutex.lock();

														
 
															-		}

														
 
															-

														
 
															-		~MutexScope()

														
 
															-		{

														
 
															-			m_mutex.unlock();

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		MutexScope(); // no default constructor

														
 
															-		MutexScope(const MutexScope& _rhs); // no copy constructor

														
 
															-		MutexScope& operator=(const MutexScope& _rhs); // no assignment operator

														
 
															-

														
 
															-		Mutex& m_mutex;

														
 
															-	};

														
 
															-

														
 
															-#if 1

														
 
															-	typedef Mutex LwMutex;

														
 
															-#else

														
 
															-	class LwMutex

														
 
															-	{

														
 
															-	public:

														
 
															-		LwMutex()

														
 
															-			: m_count(0)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		~LwMutex()

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		void lock()

														
 
															-		{

														
 
															-			if (atomicIncr(&m_count) > 1)

														
 
															-			{

														
 
															-				m_sem.wait();

														
 
															-			}

														
 
															-		}

														
 
															-

														
 
															-		void unlock()

														
 
															-		{

														
 
															-			if (atomicDecr(&m_count) > 0)

														
 
															-			{

														
 
															-				m_sem.post();

														
 
															-			}

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		LwMutex(const LwMutex& _rhs); // no copy constructor

														
 
															-		LwMutex& operator=(const LwMutex& _rhs); // no assignment operator

														
 
															-

														
 
															-		Semaphore m_sem;

														
 
															-		volatile int32_t m_count;

														
 
															-	};

														
 
															-#endif // 0

														
 
															-

														
 
															-	class LwMutexScope

														
 
															-	{

														
 
															-	public:

														
 
															-		LwMutexScope(LwMutex& _mutex)

														
 
															-			: m_mutex(_mutex)

														
 
															-		{

														
 
															-			m_mutex.lock();

														
 
															-		}

														
 
															-

														
 
															-		~LwMutexScope()

														
 
															-		{

														
 
															-			m_mutex.unlock();

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		LwMutexScope(); // no default constructor

														
 
															-		LwMutexScope(const LwMutexScope& _rhs); // no copy constructor

														
 
															-		LwMutexScope& operator=(const LwMutexScope& _rhs); // no assignment operator

														
 
															-

														
 
															-		LwMutex& m_mutex;

														
 
															-	};

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_MUTEX_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_MUTEX_H__
														
 
															+#define __BX_MUTEX_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+#include "cpu.h"
														
 
															+#include "sem.h"
														
 
															+
														
 
															+#if BX_PLATFORM_NACL || BX_PLATFORM_LINUX || BX_PLATFORM_ANDROID || BX_PLATFORM_OSX
														
 
															+#	include <pthread.h>
														
 
															+#elif BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
														
 
															+#	include <errno.h>
														
 
															+#endif // BX_PLATFORM_
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
														
 
															+	typedef CRITICAL_SECTION pthread_mutex_t;
														
 
															+	typedef unsigned pthread_mutexattr_t;
														
 
															+
														
 
															+	inline int pthread_mutex_lock(pthread_mutex_t* _mutex)
														
 
															+	{
														
 
															+		EnterCriticalSection(_mutex);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	inline int pthread_mutex_unlock(pthread_mutex_t* _mutex)
														
 
															+	{
														
 
															+		LeaveCriticalSection(_mutex);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	inline int pthread_mutex_trylock(pthread_mutex_t* _mutex)
														
 
															+	{
														
 
															+		return TryEnterCriticalSection(_mutex) ? 0 : EBUSY;
														
 
															+	}
														
 
															+
														
 
															+	inline int pthread_mutex_init(pthread_mutex_t* _mutex, pthread_mutexattr_t* /*_attr*/)
														
 
															+	{
														
 
															+		InitializeCriticalSection(_mutex);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	inline int pthread_mutex_destroy(pthread_mutex_t* _mutex)
														
 
															+	{
														
 
															+		DeleteCriticalSection(_mutex);
														
 
															+		return 0;
														
 
															+	}
														
 
															+#endif // BX_PLATFORM_
														
 
															+
														
 
															+	class Mutex
														
 
															+	{
														
 
															+	public:
														
 
															+		Mutex()
														
 
															+		{
														
 
															+			pthread_mutex_init(&m_handle, NULL);
														
 
															+		}
														
 
															+
														
 
															+		~Mutex()
														
 
															+		{
														
 
															+			pthread_mutex_destroy(&m_handle);
														
 
															+		}
														
 
															+
														
 
															+		void lock()
														
 
															+		{
														
 
															+			pthread_mutex_lock(&m_handle);
														
 
															+		}
														
 
															+
														
 
															+		void unlock()
														
 
															+		{
														
 
															+			pthread_mutex_unlock(&m_handle);
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		Mutex(const Mutex& _rhs); // no copy constructor
														
 
															+		Mutex& operator=(const Mutex& _rhs); // no assignment operator
														
 
															+
														
 
															+		pthread_mutex_t m_handle;
														
 
															+	};
														
 
															+
														
 
															+	class MutexScope
														
 
															+	{
														
 
															+	public:
														
 
															+		MutexScope(Mutex& _mutex)
														
 
															+			: m_mutex(_mutex)
														
 
															+		{
														
 
															+			m_mutex.lock();
														
 
															+		}
														
 
															+
														
 
															+		~MutexScope()
														
 
															+		{
														
 
															+			m_mutex.unlock();
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		MutexScope(); // no default constructor
														
 
															+		MutexScope(const MutexScope& _rhs); // no copy constructor
														
 
															+		MutexScope& operator=(const MutexScope& _rhs); // no assignment operator
														
 
															+
														
 
															+		Mutex& m_mutex;
														
 
															+	};
														
 
															+
														
 
															+#if 1
														
 
															+	typedef Mutex LwMutex;
														
 
															+#else
														
 
															+	class LwMutex
														
 
															+	{
														
 
															+	public:
														
 
															+		LwMutex()
														
 
															+			: m_count(0)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		~LwMutex()
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		void lock()
														
 
															+		{
														
 
															+			if (atomicIncr(&m_count) > 1)
														
 
															+			{
														
 
															+				m_sem.wait();
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		void unlock()
														
 
															+		{
														
 
															+			if (atomicDecr(&m_count) > 0)
														
 
															+			{
														
 
															+				m_sem.post();
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		LwMutex(const LwMutex& _rhs); // no copy constructor
														
 
															+		LwMutex& operator=(const LwMutex& _rhs); // no assignment operator
														
 
															+
														
 
															+		Semaphore m_sem;
														
 
															+		volatile int32_t m_count;
														
 
															+	};
														
 
															+#endif // 0
														
 
															+
														
 
															+	class LwMutexScope
														
 
															+	{
														
 
															+	public:
														
 
															+		LwMutexScope(LwMutex& _mutex)
														
 
															+			: m_mutex(_mutex)
														
 
															+		{
														
 
															+			m_mutex.lock();
														
 
															+		}
														
 
															+
														
 
															+		~LwMutexScope()
														
 
															+		{
														
 
															+			m_mutex.unlock();
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		LwMutexScope(); // no default constructor
														
 
															+		LwMutexScope(const LwMutexScope& _rhs); // no copy constructor
														
 
															+		LwMutexScope& operator=(const LwMutexScope& _rhs); // no assignment operator
														
 
															+
														
 
															+		LwMutex& m_mutex;
														
 
															+	};
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_MUTEX_H__
														
--- a/include/bx/os.h
+++ b/include/bx/os.h
@@ -1,46 +1,46 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_OS_H__

														
 
															-#define __BX_OS_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-#if BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX || BX_PLATFORM_OSX

														
 
															-#	include <sched.h> // sched_yield

														
 
															-#	if BX_PLATFORM_NACL

														
 
															-#		include <sys/nacl_syscalls.h> // nanosleep

														
 
															-#	else

														
 
															-#		include <time.h> // nanosleep

														
 
															-#	endif // BX_PLATFORM_NACL

														
 
															-#endif // BX_PLATFORM_

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	inline void sleep(uint32_t _ms)

														
 
															-	{

														
 
															-#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360

														
 
															-		Sleep(_ms);

														
 
															-#else

														
 
															-		timespec req = {(time_t)_ms/1000, (long)((_ms%1000)*1000000)};

														
 
															-		timespec rem = {0, 0};

														
 
															-		nanosleep(&req, &rem);

														
 
															-#endif // BX_PLATFORM_

														
 
															-	}

														
 
															-

														
 
															-	inline void yield()

														
 
															-	{

														
 
															-#if BX_PLATFORM_WINDOWS

														
 
															-		SwitchToThread();

														
 
															-#elif BX_PLATFORM_XBOX360

														
 
															-		Sleep(0);

														
 
															-#else

														
 
															-		sched_yield();

														
 
															-#endif // BX_PLATFORM_

														
 
															-	}

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_OS_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_OS_H__
														
 
															+#define __BX_OS_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+#if BX_PLATFORM_NACL || BX_PLATFORM_ANDROID || BX_PLATFORM_LINUX || BX_PLATFORM_OSX
														
 
															+#	include <sched.h> // sched_yield
														
 
															+#	if BX_PLATFORM_NACL
														
 
															+#		include <sys/nacl_syscalls.h> // nanosleep
														
 
															+#	else
														
 
															+#		include <time.h> // nanosleep
														
 
															+#	endif // BX_PLATFORM_NACL
														
 
															+#endif // BX_PLATFORM_
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	inline void sleep(uint32_t _ms)
														
 
															+	{
														
 
															+#if BX_PLATFORM_WINDOWS || BX_PLATFORM_XBOX360
														
 
															+		Sleep(_ms);
														
 
															+#else
														
 
															+		timespec req = {(time_t)_ms/1000, (long)((_ms%1000)*1000000)};
														
 
															+		timespec rem = {0, 0};
														
 
															+		nanosleep(&req, &rem);
														
 
															+#endif // BX_PLATFORM_
														
 
															+	}
														
 
															+
														
 
															+	inline void yield()
														
 
															+	{
														
 
															+#if BX_PLATFORM_WINDOWS
														
 
															+		SwitchToThread();
														
 
															+#elif BX_PLATFORM_XBOX360
														
 
															+		Sleep(0);
														
 
															+#else
														
 
															+		sched_yield();
														
 
															+#endif // BX_PLATFORM_
														
 
															+	}
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_OS_H__
														
--- a/include/bx/platform.h
+++ b/include/bx/platform.h
@@ -119,17 +119,17 @@
 
															 #if BX_CONFIG_ENABLE_MSVC_LEVEL4_WARNINGS && BX_COMPILER_MSVC
														
 
															 #	pragma warning(error:4062) // ENABLE warning C4062: enumerator'...' in switch of enum '...' is not handled
														
 
															-#	pragma warning(error:4121) // ENABLE warning C4121: 'symbol' : alignment of a member was sensitive to packing

														
 
															-#	pragma warning(error:4130) // ENABLE warning C4130: 'operator' : logical operation on address of string constant

														
 
															-#	pragma warning(error:4239) // ENABLE warning C4239: nonstandard extension used : 'argument' : conversion from '*' to '* &' A non-const reference may only be bound to an lvalue

														
 
															-//#	pragma warning(error:4244) // ENABLE warning C4244: 'conversion' conversion from 'type1' to 'type2', possible loss of data

														
 
															-#	pragma warning(error:4263) // ENABLE warning C4263: 'function' : member function does not override any base class virtual member function

														
 
															-#	pragma warning(error:4265) // ENABLE warning C4265: class has virtual functions, but destructor is not virtual

														
 
															-#	pragma warning(error:4431) // ENABLE warning C4431: missing type specifier - int assumed. Note: C no longer supports default-int

														
 
															-#	pragma warning(error:4545) // ENABLE warning C4545: expression before comma evaluates to a function which is missing an argument list

														
 
															-#	pragma warning(error:4549) // ENABLE warning C4549: 'operator' : operator before comma has no effect; did you intend 'operator'?

														
 
															-#	pragma warning(error:4701) // ENABLE warning C4701: potentially uninitialized local variable 'name' used

														
 
															-#	pragma warning(error:4706) // ENABLE warning C4706: assignment within conditional expression

														
 
															+#	pragma warning(error:4121) // ENABLE warning C4121: 'symbol' : alignment of a member was sensitive to packing
														
 
															+#	pragma warning(error:4130) // ENABLE warning C4130: 'operator' : logical operation on address of string constant
														
 
															+#	pragma warning(error:4239) // ENABLE warning C4239: nonstandard extension used : 'argument' : conversion from '*' to '* &' A non-const reference may only be bound to an lvalue
														
 
															+//#	pragma warning(error:4244) // ENABLE warning C4244: 'conversion' conversion from 'type1' to 'type2', possible loss of data
														
 
															+#	pragma warning(error:4263) // ENABLE warning C4263: 'function' : member function does not override any base class virtual member function
														
 
															+#	pragma warning(error:4265) // ENABLE warning C4265: class has virtual functions, but destructor is not virtual
														
 
															+#	pragma warning(error:4431) // ENABLE warning C4431: missing type specifier - int assumed. Note: C no longer supports default-int
														
 
															+#	pragma warning(error:4545) // ENABLE warning C4545: expression before comma evaluates to a function which is missing an argument list
														
 
															+#	pragma warning(error:4549) // ENABLE warning C4549: 'operator' : operator before comma has no effect; did you intend 'operator'?
														
 
															+#	pragma warning(error:4701) // ENABLE warning C4701: potentially uninitialized local variable 'name' used
														
 
															+#	pragma warning(error:4706) // ENABLE warning C4706: assignment within conditional expression
														
 
															 #endif // BX_CONFIG_ENABLE_MSVC_LEVEL4_WARNINGS && BX_COMPILER_MSVC
														
 
															 #endif // __BX_PLATFORM_H__
														
--- a/include/bx/radixsort.h
+++ b/include/bx/radixsort.h
@@ -1,111 +1,111 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-#ifndef __BX_RADIXSORT_H__

														
 
															-#define __BX_RADIXSORT_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-#define BX_RADIXSORT_BITS 11

														
 
															-#define BX_RADIXSORT_HISTOGRAM_SIZE (1<<BX_RADIXSORT_BITS)

														
 
															-#define BX_RADIXSORT_BIT_MASK (BX_RADIXSORT_HISTOGRAM_SIZE-1)

														
 
															-

														
 
															-	template <typename Ty>

														
 
															-	void radixSort32(uint32_t* _keys, uint32_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size)

														
 
															-	{

														
 
															-		uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];

														
 
															-		uint16_t shift = 0;

														
 
															-		for (uint32_t pass = 0; pass < 3; ++pass)

														
 
															-		{

														
 
															-			memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE);

														
 
															-			for (uint32_t ii = 0; ii < _size; ++ii)

														
 
															-			{

														
 
															-				uint32_t key = _keys[ii];

														
 
															-				uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;

														
 
															-				++histogram[index];

														
 
															-			}

														
 
															-

														
 
															-			uint16_t offset = 0;

														
 
															-			for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)

														
 
															-			{

														
 
															-				uint16_t count = histogram[ii];

														
 
															-				histogram[ii] = offset;

														
 
															-				offset += count;

														
 
															-			}

														
 
															-

														
 
															-			for (uint32_t ii = 0; ii < _size; ++ii)

														
 
															-			{

														
 
															-				uint32_t key = _keys[ii];

														
 
															-				uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;

														
 
															-				uint16_t dest = histogram[index]++;

														
 
															-				_tempKeys[dest] = key;

														
 
															-				_tempValues[dest] = _values[ii];

														
 
															-			}

														
 
															-

														
 
															-			uint32_t* swapKeys = _tempKeys;

														
 
															-			_tempKeys = _keys;

														
 
															-			_keys = swapKeys;

														
 
															-

														
 
															-			Ty* swapValues = _tempValues;

														
 
															-			_tempValues = _values;

														
 
															-			_values = swapValues;

														
 
															-

														
 
															-			shift += BX_RADIXSORT_BITS;

														
 
															-		}

														
 
															-	}

														
 
															-

														
 
															-	template <typename Ty>

														
 
															-	void radixSort64(uint64_t* _keys, uint64_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size)

														
 
															-	{

														
 
															-		uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];

														
 
															-		uint16_t shift = 0;

														
 
															-		for (uint32_t pass = 0; pass < 6; ++pass)

														
 
															-		{

														
 
															-			memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE);

														
 
															-			for (uint32_t ii = 0; ii < _size; ++ii)

														
 
															-			{

														
 
															-				uint64_t key = _keys[ii];

														
 
															-				uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;

														
 
															-				++histogram[index];

														
 
															-			}

														
 
															-

														
 
															-			uint16_t offset = 0;

														
 
															-			for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)

														
 
															-			{

														
 
															-				uint16_t count = histogram[ii];

														
 
															-				histogram[ii] = offset;

														
 
															-				offset += count;

														
 
															-			}

														
 
															-

														
 
															-			for (uint32_t ii = 0; ii < _size; ++ii)

														
 
															-			{

														
 
															-				uint64_t key = _keys[ii];

														
 
															-				uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;

														
 
															-				uint16_t dest = histogram[index]++;

														
 
															-				_tempKeys[dest] = key;

														
 
															-				_tempValues[dest] = _values[ii];

														
 
															-			}

														
 
															-

														
 
															-			uint64_t* swapKeys = _tempKeys;

														
 
															-			_tempKeys = _keys;

														
 
															-			_keys = swapKeys;

														
 
															-

														
 
															-			Ty* swapValues = _tempValues;

														
 
															-			_tempValues = _values;

														
 
															-			_values = swapValues;

														
 
															-

														
 
															-			shift += BX_RADIXSORT_BITS;

														
 
															-		}

														
 
															-	}

														
 
															-

														
 
															-#undef BX_RADIXSORT_BITS

														
 
															-#undef BX_RADIXSORT_HISTOGRAM_SIZE

														
 
															-#undef BX_RADIXSORT_BIT_MASK

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_RADIXSORT_H__

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BX_RADIXSORT_H__
														
 
															+#define __BX_RADIXSORT_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+#define BX_RADIXSORT_BITS 11
														
 
															+#define BX_RADIXSORT_HISTOGRAM_SIZE (1<<BX_RADIXSORT_BITS)
														
 
															+#define BX_RADIXSORT_BIT_MASK (BX_RADIXSORT_HISTOGRAM_SIZE-1)
														
 
															+
														
 
															+	template <typename Ty>
														
 
															+	void radixSort32(uint32_t* _keys, uint32_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size)
														
 
															+	{
														
 
															+		uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
														
 
															+		uint16_t shift = 0;
														
 
															+		for (uint32_t pass = 0; pass < 3; ++pass)
														
 
															+		{
														
 
															+			memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
														
 
															+			for (uint32_t ii = 0; ii < _size; ++ii)
														
 
															+			{
														
 
															+				uint32_t key = _keys[ii];
														
 
															+				uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
														
 
															+				++histogram[index];
														
 
															+			}
														
 
															+
														
 
															+			uint16_t offset = 0;
														
 
															+			for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
														
 
															+			{
														
 
															+				uint16_t count = histogram[ii];
														
 
															+				histogram[ii] = offset;
														
 
															+				offset += count;
														
 
															+			}
														
 
															+
														
 
															+			for (uint32_t ii = 0; ii < _size; ++ii)
														
 
															+			{
														
 
															+				uint32_t key = _keys[ii];
														
 
															+				uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
														
 
															+				uint16_t dest = histogram[index]++;
														
 
															+				_tempKeys[dest] = key;
														
 
															+				_tempValues[dest] = _values[ii];
														
 
															+			}
														
 
															+
														
 
															+			uint32_t* swapKeys = _tempKeys;
														
 
															+			_tempKeys = _keys;
														
 
															+			_keys = swapKeys;
														
 
															+
														
 
															+			Ty* swapValues = _tempValues;
														
 
															+			_tempValues = _values;
														
 
															+			_values = swapValues;
														
 
															+
														
 
															+			shift += BX_RADIXSORT_BITS;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	template <typename Ty>
														
 
															+	void radixSort64(uint64_t* _keys, uint64_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size)
														
 
															+	{
														
 
															+		uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
														
 
															+		uint16_t shift = 0;
														
 
															+		for (uint32_t pass = 0; pass < 6; ++pass)
														
 
															+		{
														
 
															+			memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
														
 
															+			for (uint32_t ii = 0; ii < _size; ++ii)
														
 
															+			{
														
 
															+				uint64_t key = _keys[ii];
														
 
															+				uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
														
 
															+				++histogram[index];
														
 
															+			}
														
 
															+
														
 
															+			uint16_t offset = 0;
														
 
															+			for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii)
														
 
															+			{
														
 
															+				uint16_t count = histogram[ii];
														
 
															+				histogram[ii] = offset;
														
 
															+				offset += count;
														
 
															+			}
														
 
															+
														
 
															+			for (uint32_t ii = 0; ii < _size; ++ii)
														
 
															+			{
														
 
															+				uint64_t key = _keys[ii];
														
 
															+				uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
														
 
															+				uint16_t dest = histogram[index]++;
														
 
															+				_tempKeys[dest] = key;
														
 
															+				_tempValues[dest] = _values[ii];
														
 
															+			}
														
 
															+
														
 
															+			uint64_t* swapKeys = _tempKeys;
														
 
															+			_tempKeys = _keys;
														
 
															+			_keys = swapKeys;
														
 
															+
														
 
															+			Ty* swapValues = _tempValues;
														
 
															+			_tempValues = _values;
														
 
															+			_values = swapValues;
														
 
															+
														
 
															+			shift += BX_RADIXSORT_BITS;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+#undef BX_RADIXSORT_BITS
														
 
															+#undef BX_RADIXSORT_HISTOGRAM_SIZE
														
 
															+#undef BX_RADIXSORT_BIT_MASK
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_RADIXSORT_H__
														
--- a/include/bx/readerwriter.h
+++ b/include/bx/readerwriter.h
@@ -1,270 +1,270 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															- 

														
 
															-#ifndef __BX_READERWRITER_H__

														
 
															-#define __BX_READERWRITER_H__

														
 
															-

														
 
															-#include <stdio.h>

														
 
															-#include <string.h>

														
 
															-

														
 
															-#include "bx.h"

														
 
															-#include "uint32_t.h"

														
 
															-

														
 
															-#if BX_COMPILER_MSVC

														
 
															-#	define fseeko64 _fseeki64

														
 
															-#	define ftello64 _ftelli64

														
 
															-#elif BX_PLATFORM_OSX

														
 
															-#	define fseeko64 fseeko

														
 
															-#	define ftello64 ftello

														
 
															-#endif // BX_

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	struct Whence

														
 
															-	{

														
 
															-		enum Enum

														
 
															-		{

														
 
															-			Begin,

														
 
															-			Current,

														
 
															-			End,

														
 
															-		};

														
 
															-	};

														
 
															-

														
 
															-	struct BX_NO_VTABLE ReaderI

														
 
															-	{

														
 
															-		virtual ~ReaderI() = 0;

														
 
															-		virtual int32_t read(void* _data, int32_t _size) = 0;

														
 
															-	};

														
 
															-

														
 
															-	inline ReaderI::~ReaderI()

														
 
															-	{

														
 
															-	}

														
 
															-

														
 
															-	struct BX_NO_VTABLE WriterI

														
 
															-	{

														
 
															-		virtual ~WriterI() = 0;

														
 
															-		virtual int32_t write(const void* _data, int32_t _size) = 0;

														
 
															-	};

														
 
															-

														
 
															-	inline WriterI::~WriterI()

														
 
															-	{

														
 
															-	}

														
 
															-

														
 
															-	struct BX_NO_VTABLE SeekerI

														
 
															-	{

														
 
															-		virtual ~SeekerI() = 0;

														
 
															-		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) = 0;

														
 
															-	};

														
 
															-

														
 
															-	inline SeekerI::~SeekerI()

														
 
															-	{

														
 
															-	}

														
 
															-

														
 
															-	inline int32_t read(ReaderI* _reader, void* _data, int32_t _size)

														
 
															-	{

														
 
															-		return _reader->read(_data, _size);

														
 
															-	}

														
 
															-

														
 
															-	template<typename Ty>

														
 
															-	inline int32_t read(ReaderI* _reader, Ty& _value)

														
 
															-	{

														
 
															-		return _reader->read(&_value, sizeof(Ty) );

														
 
															-	}

														
 
															-

														
 
															-	inline int32_t write(WriterI* _writer, const void* _data, int32_t _size)

														
 
															-	{

														
 
															-		return _writer->write(_data, _size);

														
 
															-	}

														
 
															-

														
 
															-	template<typename Ty>

														
 
															-	inline int32_t write(WriterI* _writer, const Ty& _value)

														
 
															-	{

														
 
															-		return _writer->write(&_value, sizeof(Ty) );

														
 
															-	}

														
 
															-

														
 
															-	inline int64_t skip(SeekerI* _seeker, int64_t _offset)

														
 
															-	{

														
 
															-		return _seeker->seek(_offset, Whence::Current);

														
 
															-	}

														
 
															-

														
 
															-	inline int64_t getSize(SeekerI* _seeker)

														
 
															-	{

														
 
															-		int64_t offset = _seeker->seek();

														
 
															-		int64_t size = _seeker->seek(0, Whence::End);

														
 
															-		_seeker->seek(offset, Whence::Begin);

														
 
															-		return size;

														
 
															-	}

														
 
															-

														
 
															-	struct BX_NO_VTABLE ReaderSeekerI : public ReaderI, public SeekerI

														
 
															-	{

														
 
															-	};

														
 
															-

														
 
															-	struct BX_NO_VTABLE WriterSeekerI : public WriterI, public SeekerI

														
 
															-	{

														
 
															-	};

														
 
															-

														
 
															-	struct BX_NO_VTABLE FileReaderI : public ReaderSeekerI

														
 
															-	{

														
 
															-		virtual int32_t open(const char* _filePath) = 0;

														
 
															-		virtual int32_t close() = 0;

														
 
															-	};

														
 
															-

														
 
															-	struct BX_NO_VTABLE FileWriterI : public WriterSeekerI

														
 
															-	{

														
 
															-		virtual int32_t open(const char* _filePath, bool _append = false) = 0;

														
 
															-		virtual int32_t close() = 0;

														
 
															-	};

														
 
															-

														
 
															-	struct BX_NO_VTABLE MemoryBlockI

														
 
															-	{

														
 
															-		virtual void* more(uint32_t _size = 0) = 0;

														
 
															-		virtual uint32_t getSize() = 0;

														
 
															-	};

														
 
															-

														
 
															-	class StaticMemoryBlock : public MemoryBlockI

														
 
															-	{

														
 
															-	public:

														
 
															-		StaticMemoryBlock(void* _data, uint32_t _size)

														
 
															-			: m_data(_data)

														
 
															-			, m_size(_size)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual ~StaticMemoryBlock()

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual void* more(uint32_t /*_size*/ = 0) BX_OVERRIDE

														
 
															-		{

														
 
															-			return m_data;

														
 
															-		}

														
 
															-

														
 
															-		virtual uint32_t getSize() BX_OVERRIDE

														
 
															-		{

														
 
															-			return m_size;

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		void* m_data;

														
 
															-		uint32_t m_size;

														
 
															-	};

														
 
															-

														
 
															-	inline int64_t int64_min(int64_t _a, int64_t _b)

														
 
															-	{

														
 
															-		return _a < _b ? _a : _b;

														
 
															-	}

														
 
															-

														
 
															-	inline int64_t int64_max(int64_t _a, int64_t _b)

														
 
															-	{

														
 
															-		return _a > _b ? _a : _b;

														
 
															-	}

														
 
															-

														
 
															-	inline int64_t int64_clamp(int64_t _a, int64_t _min, int64_t _max)

														
 
															-	{

														
 
															-		const int64_t min    = int64_min(_a, _max);

														
 
															-		const int64_t result = int64_max(_min, min);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	class SizerWriter : public WriterSeekerI

														
 
															-	{

														
 
															-	public:

														
 
															-		SizerWriter()

														
 
															-			: m_pos(0)

														
 
															-			, m_top(0)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual ~SizerWriter()

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE

														
 
															-		{

														
 
															-			switch (_whence)

														
 
															-			{

														
 
															-			case Whence::Begin:

														
 
															-				m_pos = _offset;

														
 
															-				break;

														
 
															-

														
 
															-			case Whence::Current:

														
 
															-				m_pos = int64_clamp(m_pos + _offset, 0, m_top);

														
 
															-				break;

														
 
															-

														
 
															-			case Whence::End:

														
 
															-				m_pos = int64_clamp(m_top - _offset, 0, m_top);

														
 
															-				break;

														
 
															-			}

														
 
															-

														
 
															-			return m_pos;

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t write(const void* /*_data*/, int32_t _size) BX_OVERRIDE

														
 
															-		{

														
 
															-			int32_t morecore = int32_t(m_pos - m_top) + _size;

														
 
															-

														
 
															-			if (0 < morecore)

														
 
															-			{

														
 
															-				m_top += morecore;

														
 
															-			}

														
 
															-

														
 
															-			int64_t reminder = m_top-m_pos;

														
 
															-			int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) );

														
 
															-			m_pos += size;

														
 
															-			return size;

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		int64_t m_pos;

														
 
															-		int64_t m_top;

														
 
															-	};

														
 
															-

														
 
															-	class MemoryReader : public ReaderSeekerI

														
 
															-	{

														
 
															-	public:

														
 
															-		MemoryReader(const void* _data, uint32_t _size)

														
 
															-			: m_data( (const uint8_t*)_data)

														
 
															-			, m_pos(0)

														
 
															-			, m_top(_size)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual ~MemoryReader()

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual int64_t seek(int64_t _offset, Whence::Enum _whence) BX_OVERRIDE

														
 
															-		{

														
 
															-			switch (_whence)

														
 
															-			{

														
 
															-				case Whence::Begin:

														
 
															-					m_pos = _offset;

														
 
															-					break;

														
 
															-

														
 
															-				case Whence::Current:

														
 
															-					m_pos = int64_clamp(m_pos + _offset, 0, m_top);

														
 
															-					break;

														
 
															-

														
 
															-				case Whence::End:

														
 
															-					m_pos = int64_clamp(m_top - _offset, 0, m_top);

														
 
															-					break;

														
 
															-			}

														
 
															-

														
 
															-			return m_pos;

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t read(void* _data, int32_t _size) BX_OVERRIDE

														
 
															-		{

														
 
															-			int64_t reminder = m_top-m_pos;

														
 
															-			int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) );

														
 
															-			memcpy(_data, &m_data[m_pos], size);

														
 
															-			m_pos += size;

														
 
															-			return size;

														
 
															-		}

														
 
															-

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+ 
														
 
															+#ifndef __BX_READERWRITER_H__
														
 
															+#define __BX_READERWRITER_H__
														
 
															+
														
 
															+#include <stdio.h>
														
 
															+#include <string.h>
														
 
															+
														
 
															+#include "bx.h"
														
 
															+#include "uint32_t.h"
														
 
															+
														
 
															+#if BX_COMPILER_MSVC
														
 
															+#	define fseeko64 _fseeki64
														
 
															+#	define ftello64 _ftelli64
														
 
															+#elif BX_PLATFORM_OSX
														
 
															+#	define fseeko64 fseeko
														
 
															+#	define ftello64 ftello
														
 
															+#endif // BX_
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	struct Whence
														
 
															+	{
														
 
															+		enum Enum
														
 
															+		{
														
 
															+			Begin,
														
 
															+			Current,
														
 
															+			End,
														
 
															+		};
														
 
															+	};
														
 
															+
														
 
															+	struct BX_NO_VTABLE ReaderI
														
 
															+	{
														
 
															+		virtual ~ReaderI() = 0;
														
 
															+		virtual int32_t read(void* _data, int32_t _size) = 0;
														
 
															+	};
														
 
															+
														
 
															+	inline ReaderI::~ReaderI()
														
 
															+	{
														
 
															+	}
														
 
															+
														
 
															+	struct BX_NO_VTABLE WriterI
														
 
															+	{
														
 
															+		virtual ~WriterI() = 0;
														
 
															+		virtual int32_t write(const void* _data, int32_t _size) = 0;
														
 
															+	};
														
 
															+
														
 
															+	inline WriterI::~WriterI()
														
 
															+	{
														
 
															+	}
														
 
															+
														
 
															+	struct BX_NO_VTABLE SeekerI
														
 
															+	{
														
 
															+		virtual ~SeekerI() = 0;
														
 
															+		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) = 0;
														
 
															+	};
														
 
															+
														
 
															+	inline SeekerI::~SeekerI()
														
 
															+	{
														
 
															+	}
														
 
															+
														
 
															+	inline int32_t read(ReaderI* _reader, void* _data, int32_t _size)
														
 
															+	{
														
 
															+		return _reader->read(_data, _size);
														
 
															+	}
														
 
															+
														
 
															+	template<typename Ty>
														
 
															+	inline int32_t read(ReaderI* _reader, Ty& _value)
														
 
															+	{
														
 
															+		return _reader->read(&_value, sizeof(Ty) );
														
 
															+	}
														
 
															+
														
 
															+	inline int32_t write(WriterI* _writer, const void* _data, int32_t _size)
														
 
															+	{
														
 
															+		return _writer->write(_data, _size);
														
 
															+	}
														
 
															+
														
 
															+	template<typename Ty>
														
 
															+	inline int32_t write(WriterI* _writer, const Ty& _value)
														
 
															+	{
														
 
															+		return _writer->write(&_value, sizeof(Ty) );
														
 
															+	}
														
 
															+
														
 
															+	inline int64_t skip(SeekerI* _seeker, int64_t _offset)
														
 
															+	{
														
 
															+		return _seeker->seek(_offset, Whence::Current);
														
 
															+	}
														
 
															+
														
 
															+	inline int64_t getSize(SeekerI* _seeker)
														
 
															+	{
														
 
															+		int64_t offset = _seeker->seek();
														
 
															+		int64_t size = _seeker->seek(0, Whence::End);
														
 
															+		_seeker->seek(offset, Whence::Begin);
														
 
															+		return size;
														
 
															+	}
														
 
															+
														
 
															+	struct BX_NO_VTABLE ReaderSeekerI : public ReaderI, public SeekerI
														
 
															+	{
														
 
															+	};
														
 
															+
														
 
															+	struct BX_NO_VTABLE WriterSeekerI : public WriterI, public SeekerI
														
 
															+	{
														
 
															+	};
														
 
															+
														
 
															+	struct BX_NO_VTABLE FileReaderI : public ReaderSeekerI
														
 
															+	{
														
 
															+		virtual int32_t open(const char* _filePath) = 0;
														
 
															+		virtual int32_t close() = 0;
														
 
															+	};
														
 
															+
														
 
															+	struct BX_NO_VTABLE FileWriterI : public WriterSeekerI
														
 
															+	{
														
 
															+		virtual int32_t open(const char* _filePath, bool _append = false) = 0;
														
 
															+		virtual int32_t close() = 0;
														
 
															+	};
														
 
															+
														
 
															+	struct BX_NO_VTABLE MemoryBlockI
														
 
															+	{
														
 
															+		virtual void* more(uint32_t _size = 0) = 0;
														
 
															+		virtual uint32_t getSize() = 0;
														
 
															+	};
														
 
															+
														
 
															+	class StaticMemoryBlock : public MemoryBlockI
														
 
															+	{
														
 
															+	public:
														
 
															+		StaticMemoryBlock(void* _data, uint32_t _size)
														
 
															+			: m_data(_data)
														
 
															+			, m_size(_size)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual ~StaticMemoryBlock()
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual void* more(uint32_t /*_size*/ = 0) BX_OVERRIDE
														
 
															+		{
														
 
															+			return m_data;
														
 
															+		}
														
 
															+
														
 
															+		virtual uint32_t getSize() BX_OVERRIDE
														
 
															+		{
														
 
															+			return m_size;
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		void* m_data;
														
 
															+		uint32_t m_size;
														
 
															+	};
														
 
															+
														
 
															+	inline int64_t int64_min(int64_t _a, int64_t _b)
														
 
															+	{
														
 
															+		return _a < _b ? _a : _b;
														
 
															+	}
														
 
															+
														
 
															+	inline int64_t int64_max(int64_t _a, int64_t _b)
														
 
															+	{
														
 
															+		return _a > _b ? _a : _b;
														
 
															+	}
														
 
															+
														
 
															+	inline int64_t int64_clamp(int64_t _a, int64_t _min, int64_t _max)
														
 
															+	{
														
 
															+		const int64_t min    = int64_min(_a, _max);
														
 
															+		const int64_t result = int64_max(_min, min);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	class SizerWriter : public WriterSeekerI
														
 
															+	{
														
 
															+	public:
														
 
															+		SizerWriter()
														
 
															+			: m_pos(0)
														
 
															+			, m_top(0)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual ~SizerWriter()
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE
														
 
															+		{
														
 
															+			switch (_whence)
														
 
															+			{
														
 
															+			case Whence::Begin:
														
 
															+				m_pos = _offset;
														
 
															+				break;
														
 
															+
														
 
															+			case Whence::Current:
														
 
															+				m_pos = int64_clamp(m_pos + _offset, 0, m_top);
														
 
															+				break;
														
 
															+
														
 
															+			case Whence::End:
														
 
															+				m_pos = int64_clamp(m_top - _offset, 0, m_top);
														
 
															+				break;
														
 
															+			}
														
 
															+
														
 
															+			return m_pos;
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t write(const void* /*_data*/, int32_t _size) BX_OVERRIDE
														
 
															+		{
														
 
															+			int32_t morecore = int32_t(m_pos - m_top) + _size;
														
 
															+
														
 
															+			if (0 < morecore)
														
 
															+			{
														
 
															+				m_top += morecore;
														
 
															+			}
														
 
															+
														
 
															+			int64_t reminder = m_top-m_pos;
														
 
															+			int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) );
														
 
															+			m_pos += size;
														
 
															+			return size;
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		int64_t m_pos;
														
 
															+		int64_t m_top;
														
 
															+	};
														
 
															+
														
 
															+	class MemoryReader : public ReaderSeekerI
														
 
															+	{
														
 
															+	public:
														
 
															+		MemoryReader(const void* _data, uint32_t _size)
														
 
															+			: m_data( (const uint8_t*)_data)
														
 
															+			, m_pos(0)
														
 
															+			, m_top(_size)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual ~MemoryReader()
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual int64_t seek(int64_t _offset, Whence::Enum _whence) BX_OVERRIDE
														
 
															+		{
														
 
															+			switch (_whence)
														
 
															+			{
														
 
															+				case Whence::Begin:
														
 
															+					m_pos = _offset;
														
 
															+					break;
														
 
															+
														
 
															+				case Whence::Current:
														
 
															+					m_pos = int64_clamp(m_pos + _offset, 0, m_top);
														
 
															+					break;
														
 
															+
														
 
															+				case Whence::End:
														
 
															+					m_pos = int64_clamp(m_top - _offset, 0, m_top);
														
 
															+					break;
														
 
															+			}
														
 
															+
														
 
															+			return m_pos;
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t read(void* _data, int32_t _size) BX_OVERRIDE
														
 
															+		{
														
 
															+			int64_t reminder = m_top-m_pos;
														
 
															+			int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) );
														
 
															+			memcpy(_data, &m_data[m_pos], size);
														
 
															+			m_pos += size;
														
 
															+			return size;
														
 
															+		}
														
 
															+
														
 
															 		const uint8_t* getDataPtr() const
														
 
															 		{
														
 
															 			return &m_data[m_pos];
														
@@ -280,180 +280,180 @@ namespace bx
 
															 			return m_top-m_pos;
														
 
															 		}
														
 
															-	private:

														
 
															-		const uint8_t* m_data;

														
 
															-		int64_t m_pos;

														
 
															-		int64_t m_top;

														
 
															-	};

														
 
															-

														
 
															-	class MemoryWriter : public WriterSeekerI

														
 
															-	{

														
 
															-	public:

														
 
															-		MemoryWriter(MemoryBlockI* _memBlock)

														
 
															-			: m_memBlock(_memBlock)

														
 
															-			, m_data(NULL)

														
 
															-			, m_pos(0)

														
 
															-			, m_top(0)

														
 
															-			, m_size(0)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual ~MemoryWriter()

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE

														
 
															-		{

														
 
															-			switch (_whence)

														
 
															-			{

														
 
															-				case Whence::Begin:

														
 
															-					m_pos = _offset;

														
 
															-					break;

														
 
															-

														
 
															-				case Whence::Current:

														
 
															-					m_pos = int64_clamp(m_pos + _offset, 0, m_top);

														
 
															-					break;

														
 
															-

														
 
															-				case Whence::End:

														
 
															-					m_pos = int64_clamp(m_top - _offset, 0, m_top);

														
 
															-					break;

														
 
															-			}

														
 
															-

														
 
															-			return m_pos;

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t write(const void* _data, int32_t _size) BX_OVERRIDE

														
 
															-		{

														
 
															-			int32_t morecore = int32_t(m_pos - m_size) + _size;

														
 
															-

														
 
															-			if (0 < morecore)

														
 
															-			{

														
 
															-				morecore = BX_ALIGN_MASK(morecore, 0xfff);

														
 
															-				m_data = (uint8_t*)m_memBlock->more(morecore);

														
 
															-				m_size = m_memBlock->getSize();

														
 
															-			}

														
 
															-

														
 
															-			int64_t reminder = m_size-m_pos;

														
 
															-			int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) );

														
 
															-			memcpy(&m_data[m_pos], _data, size);

														
 
															-			m_pos += size;

														
 
															-			m_top = int64_max(m_top, m_pos);

														
 
															-			return size;

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		MemoryBlockI* m_memBlock;

														
 
															-		uint8_t* m_data;

														
 
															-		int64_t m_pos;

														
 
															-		int64_t m_top;

														
 
															-		int64_t m_size;

														
 
															-	};

														
 
															-

														
 
															-	class StaticMemoryBlockWriter : public MemoryWriter

														
 
															-	{

														
 
															-	public:

														
 
															-		StaticMemoryBlockWriter(void* _data, uint32_t _size)

														
 
															-			: MemoryWriter(&m_smb)

														
 
															-			, m_smb(_data, _size)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		~StaticMemoryBlockWriter()

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		StaticMemoryBlock m_smb;

														
 
															-	};

														
 
															-

														
 
															-#if BX_CONFIG_CRT_FILE_READER_WRITER

														
 
															-	class CrtFileReader : public FileReaderI

														
 
															-	{

														
 
															-	public:

														
 
															-		CrtFileReader()

														
 
															-			: m_file(NULL)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual ~CrtFileReader()

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t open(const char* _filePath) BX_OVERRIDE

														
 
															-		{

														
 
															-			m_file = fopen(_filePath, "rb");

														
 
															-			return NULL == m_file;

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t close() BX_OVERRIDE

														
 
															-		{

														
 
															-			fclose(m_file);

														
 
															-			return 0;

														
 
															-		}

														
 
															-

														
 
															-		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE

														
 
															-		{

														
 
															-			fseeko64(m_file, _offset, _whence);

														
 
															-			return ftello64(m_file);

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t read(void* _data, int32_t _size) BX_OVERRIDE

														
 
															-		{

														
 
															-			return (int32_t)fread(_data, 1, _size, m_file);

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		FILE* m_file;

														
 
															-	};

														
 
															-

														
 
															-	class CrtFileWriter : public FileWriterI

														
 
															-	{

														
 
															-	public:

														
 
															-		CrtFileWriter()

														
 
															-			: m_file(NULL)

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual ~CrtFileWriter()

														
 
															-		{

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t open(const char* _filePath, bool _append = false) BX_OVERRIDE

														
 
															-		{

														
 
															-			if (_append)

														
 
															-			{

														
 
															-				m_file = fopen(_filePath, "ab");

														
 
															-			}

														
 
															-			else

														
 
															-			{

														
 
															-				m_file = fopen(_filePath, "wb");

														
 
															-			}

														
 
															-

														
 
															-			return NULL == m_file;

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t close() BX_OVERRIDE

														
 
															-		{

														
 
															-			fclose(m_file);

														
 
															-			return 0;

														
 
															-		}

														
 
															-

														
 
															-		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE

														
 
															-		{

														
 
															-			fseeko64(m_file, _offset, _whence);

														
 
															-			return ftello64(m_file);

														
 
															-		}

														
 
															-

														
 
															-		virtual int32_t write(const void* _data, int32_t _size) BX_OVERRIDE

														
 
															-		{

														
 
															-			return (int32_t)fwrite(_data, 1, _size, m_file);

														
 
															-		}

														
 
															-

														
 
															-	private:

														
 
															-		FILE* m_file;

														
 
															-	};

														
 
															-#endif // BX_CONFIG_CRT_FILE_READER_WRITER

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_READERWRITER_H__

														
 
															+	private:
														
 
															+		const uint8_t* m_data;
														
 
															+		int64_t m_pos;
														
 
															+		int64_t m_top;
														
 
															+	};
														
 
															+
														
 
															+	class MemoryWriter : public WriterSeekerI
														
 
															+	{
														
 
															+	public:
														
 
															+		MemoryWriter(MemoryBlockI* _memBlock)
														
 
															+			: m_memBlock(_memBlock)
														
 
															+			, m_data(NULL)
														
 
															+			, m_pos(0)
														
 
															+			, m_top(0)
														
 
															+			, m_size(0)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual ~MemoryWriter()
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE
														
 
															+		{
														
 
															+			switch (_whence)
														
 
															+			{
														
 
															+				case Whence::Begin:
														
 
															+					m_pos = _offset;
														
 
															+					break;
														
 
															+
														
 
															+				case Whence::Current:
														
 
															+					m_pos = int64_clamp(m_pos + _offset, 0, m_top);
														
 
															+					break;
														
 
															+
														
 
															+				case Whence::End:
														
 
															+					m_pos = int64_clamp(m_top - _offset, 0, m_top);
														
 
															+					break;
														
 
															+			}
														
 
															+
														
 
															+			return m_pos;
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t write(const void* _data, int32_t _size) BX_OVERRIDE
														
 
															+		{
														
 
															+			int32_t morecore = int32_t(m_pos - m_size) + _size;
														
 
															+
														
 
															+			if (0 < morecore)
														
 
															+			{
														
 
															+				morecore = BX_ALIGN_MASK(morecore, 0xfff);
														
 
															+				m_data = (uint8_t*)m_memBlock->more(morecore);
														
 
															+				m_size = m_memBlock->getSize();
														
 
															+			}
														
 
															+
														
 
															+			int64_t reminder = m_size-m_pos;
														
 
															+			int32_t size = uint32_min(_size, int32_t(reminder > INT32_MAX ? INT32_MAX : reminder) );
														
 
															+			memcpy(&m_data[m_pos], _data, size);
														
 
															+			m_pos += size;
														
 
															+			m_top = int64_max(m_top, m_pos);
														
 
															+			return size;
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		MemoryBlockI* m_memBlock;
														
 
															+		uint8_t* m_data;
														
 
															+		int64_t m_pos;
														
 
															+		int64_t m_top;
														
 
															+		int64_t m_size;
														
 
															+	};
														
 
															+
														
 
															+	class StaticMemoryBlockWriter : public MemoryWriter
														
 
															+	{
														
 
															+	public:
														
 
															+		StaticMemoryBlockWriter(void* _data, uint32_t _size)
														
 
															+			: MemoryWriter(&m_smb)
														
 
															+			, m_smb(_data, _size)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		~StaticMemoryBlockWriter()
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		StaticMemoryBlock m_smb;
														
 
															+	};
														
 
															+
														
 
															+#if BX_CONFIG_CRT_FILE_READER_WRITER
														
 
															+	class CrtFileReader : public FileReaderI
														
 
															+	{
														
 
															+	public:
														
 
															+		CrtFileReader()
														
 
															+			: m_file(NULL)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual ~CrtFileReader()
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t open(const char* _filePath) BX_OVERRIDE
														
 
															+		{
														
 
															+			m_file = fopen(_filePath, "rb");
														
 
															+			return NULL == m_file;
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t close() BX_OVERRIDE
														
 
															+		{
														
 
															+			fclose(m_file);
														
 
															+			return 0;
														
 
															+		}
														
 
															+
														
 
															+		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE
														
 
															+		{
														
 
															+			fseeko64(m_file, _offset, _whence);
														
 
															+			return ftello64(m_file);
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t read(void* _data, int32_t _size) BX_OVERRIDE
														
 
															+		{
														
 
															+			return (int32_t)fread(_data, 1, _size, m_file);
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		FILE* m_file;
														
 
															+	};
														
 
															+
														
 
															+	class CrtFileWriter : public FileWriterI
														
 
															+	{
														
 
															+	public:
														
 
															+		CrtFileWriter()
														
 
															+			: m_file(NULL)
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual ~CrtFileWriter()
														
 
															+		{
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t open(const char* _filePath, bool _append = false) BX_OVERRIDE
														
 
															+		{
														
 
															+			if (_append)
														
 
															+			{
														
 
															+				m_file = fopen(_filePath, "ab");
														
 
															+			}
														
 
															+			else
														
 
															+			{
														
 
															+				m_file = fopen(_filePath, "wb");
														
 
															+			}
														
 
															+
														
 
															+			return NULL == m_file;
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t close() BX_OVERRIDE
														
 
															+		{
														
 
															+			fclose(m_file);
														
 
															+			return 0;
														
 
															+		}
														
 
															+
														
 
															+		virtual int64_t seek(int64_t _offset = 0, Whence::Enum _whence = Whence::Current) BX_OVERRIDE
														
 
															+		{
														
 
															+			fseeko64(m_file, _offset, _whence);
														
 
															+			return ftello64(m_file);
														
 
															+		}
														
 
															+
														
 
															+		virtual int32_t write(const void* _data, int32_t _size) BX_OVERRIDE
														
 
															+		{
														
 
															+			return (int32_t)fwrite(_data, 1, _size, m_file);
														
 
															+		}
														
 
															+
														
 
															+	private:
														
 
															+		FILE* m_file;
														
 
															+	};
														
 
															+#endif // BX_CONFIG_CRT_FILE_READER_WRITER
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_READERWRITER_H__
														
--- a/include/bx/uint32_t.h
+++ b/include/bx/uint32_t.h
@@ -1,455 +1,455 @@
 
															-/*

														
 
															- * Copyright 2010-2012 Branimir Karadzic. All rights reserved.

														
 
															- * License: http://www.opensource.org/licenses/BSD-2-Clause

														
 
															- */

														
 
															-

														
 
															-// Copyright 2006 Mike Acton <[email protected]>

														
 
															-//

														
 
															-// Permission is hereby granted, free of charge, to any person obtaining a

														
 
															-// copy of this software and associated documentation files (the "Software"),

														
 
															-// to deal in the Software without restriction, including without limitation

														
 
															-// the rights to use, copy, modify, merge, publish, distribute, sublicense,

														
 
															-// and/or sell copies of the Software, and to permit persons to whom the

														
 
															-// Software is furnished to do so, subject to the following conditions:

														
 
															-//

														
 
															-// The above copyright notice and this permission notice shall be included

														
 
															-// in all copies or substantial portions of the Software.

														
 
															-//

														
 
															-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

														
 
															-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

														
 
															-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

														
 
															-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

														
 
															-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

														
 
															-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

														
 
															-// THE SOFTWARE

														
 
															-

														
 
															-#ifndef __BX_UINT32_T_H__

														
 
															-#define __BX_UINT32_T_H__

														
 
															-

														
 
															-#include "bx.h"

														
 
															-

														
 
															-#if BX_COMPILER_MSVC

														
 
															-#	if BX_PLATFORM_WINDOWS

														
 
															-#		include <math.h> // math.h is included because VS bitches:

														
 
															-						 // warning C4985: 'ceil': attributes not present on previous declaration.

														
 
															-						 // must be included before intrin.h.

														
 
															-#		include <intrin.h>

														
 
															-#		pragma intrinsic(_BitScanForward)

														
 
															-#		pragma intrinsic(_BitScanReverse)

														
 
															-#	endif // BX_PLATFORM_WINDOWS

														
 
															-#endif // BX_COMPILER_MSVC

														
 
															-

														
 
															-namespace bx

														
 
															-{

														
 
															-	inline uint32_t uint32_li(uint32_t _a)

														
 
															-	{

														
 
															-		return _a;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_dec(uint32_t _a)

														
 
															-	{

														
 
															-		return _a - 1;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_inc(uint32_t _a)

														
 
															-	{

														
 
															-		return _a + 1;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_not(uint32_t _a)

														
 
															-	{

														
 
															-		return ~_a;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_neg(uint32_t _a)

														
 
															-	{

														
 
															-		return -(int32_t)_a;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_ext(uint32_t _a)

														
 
															-	{

														
 
															-		return ( (int32_t)_a)>>31;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_and(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a & _b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_xor(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a ^ _b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return !_a != !_b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_andc(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a & ~_b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_or(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a | _b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_sll(uint32_t _a, int _sa)

														
 
															-	{

														
 
															-		return _a << _sa;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_srl(uint32_t _a, int _sa)

														
 
															-	{

														
 
															-		return _a >> _sa;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_sra(uint32_t _a, int _sa)

														
 
															-	{

														
 
															-		return ( (int32_t)_a) >> _sa;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_rol(uint32_t _a, int _sa)

														
 
															-	{

														
 
															-		return ( _a << _sa) | (_a >> (32-_sa) );

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_ror(uint32_t _a, int _sa)

														
 
															-	{

														
 
															-		return ( _a >> _sa) | (_a << (32-_sa) );

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_add(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a + _b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_sub(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a - _b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_mul(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a * _b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_div(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return (_a / _b);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_mod(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return (_a % _b);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return -(_a == _b);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return -(_a != _b);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return -(_a < _b);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return -(_a <= _b);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return -(_a > _b);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return -(_a >= _b);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_setnz(uint32_t _a)

														
 
															-	{

														
 
															-		return -!!_a;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		const uint32_t add    = uint32_add(_a, _b);

														
 
															-		const uint32_t lt     = uint32_cmplt(add, _a);

														
 
															-		const uint32_t result = uint32_or(add, lt);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		const uint32_t sub    = uint32_sub(_a, _b);

														
 
															-		const uint32_t le     = uint32_cmple(sub, _a);

														
 
															-		const uint32_t result = uint32_and(sub, le);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		const uint64_t mul    = (uint64_t)_a * (uint64_t)_b;

														
 
															-		const uint32_t hi     = mul >> 32;

														
 
															-		const uint32_t nz     = uint32_setnz(hi);

														
 
															-		const uint32_t result = uint32_or(uint32_t(mul), nz);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		const uint32_t mask   = uint32_ext(test);

														
 
															-		const uint32_t sel_a  = uint32_and(_a, mask);

														
 
															-		const uint32_t sel_b  = uint32_andc(_b, mask);

														
 
															-		const uint32_t result = uint32_or(sel_a, sel_b);

														
 
															-

														
 
															-		return (result);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		const uint32_t sel_a  = uint32_and(_a, _mask);

														
 
															-		const uint32_t sel_b  = uint32_andc(_b, _mask);

														
 
															-		const uint32_t result = uint32_or(sel_a, sel_b);

														
 
															-

														
 
															-		return (result);

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_imin(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		const uint32_t a_sub_b = uint32_sub(_a, _b);

														
 
															-		const uint32_t result  = uint32_sels(a_sub_b, _a, _b);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_imax(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		const uint32_t b_sub_a = uint32_sub(_b, _a);

														
 
															-		const uint32_t result  = uint32_sels(b_sub_a, _a, _b);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_min(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a > _b ? _b : _a;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_max(uint32_t _a, uint32_t _b)

														
 
															-	{

														
 
															-		return _a > _b ? _a : _b;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max)

														
 
															-	{

														
 
															-		const uint32_t inc          = uint32_inc(_val);

														
 
															-		const uint32_t max_diff     = uint32_sub(_max, _val);

														
 
															-		const uint32_t neg_max_diff = uint32_neg(max_diff);

														
 
															-		const uint32_t max_or       = uint32_or(max_diff, neg_max_diff);

														
 
															-		const uint32_t max_diff_nz  = uint32_ext(max_or);

														
 
															-		const uint32_t result       = uint32_selb(max_diff_nz, inc, _min);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max)

														
 
															-	{

														
 
															-		const uint32_t dec          = uint32_dec(_val);

														
 
															-		const uint32_t min_diff     = uint32_sub(_min, _val);

														
 
															-		const uint32_t neg_min_diff = uint32_neg(min_diff);

														
 
															-		const uint32_t min_or       = uint32_or(min_diff, neg_min_diff);

														
 
															-		const uint32_t min_diff_nz  = uint32_ext(min_or);

														
 
															-		const uint32_t result       = uint32_selb(min_diff_nz, dec, _max);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cntbits_ref(uint32_t _val)

														
 
															-	{

														
 
															-		const uint32_t tmp0   = uint32_srl(_val, 1);

														
 
															-		const uint32_t tmp1   = uint32_and(tmp0, 0x55555555);

														
 
															-		const uint32_t tmp2   = uint32_sub(_val, tmp1);

														
 
															-		const uint32_t tmp3   = uint32_and(tmp2, 0xc30c30c3);

														
 
															-		const uint32_t tmp4   = uint32_srl(tmp2, 2);

														
 
															-		const uint32_t tmp5   = uint32_and(tmp4, 0xc30c30c3);

														
 
															-		const uint32_t tmp6   = uint32_srl(tmp2, 4);

														
 
															-		const uint32_t tmp7   = uint32_and(tmp6, 0xc30c30c3);

														
 
															-		const uint32_t tmp8   = uint32_add(tmp3, tmp5);

														
 
															-		const uint32_t tmp9   = uint32_add(tmp7, tmp8);

														
 
															-		const uint32_t tmpA   = uint32_srl(tmp9, 6);

														
 
															-		const uint32_t tmpB   = uint32_add(tmp9, tmpA);

														
 
															-		const uint32_t tmpC   = uint32_srl(tmpB, 12);

														
 
															-		const uint32_t tmpD   = uint32_srl(tmpB, 24);

														
 
															-		const uint32_t tmpE   = uint32_add(tmpB, tmpC);

														
 
															-		const uint32_t tmpF   = uint32_add(tmpD, tmpE);

														
 
															-		const uint32_t result = uint32_and(tmpF, 0x3f);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	/// Count number of bits set.

														
 
															-	inline uint32_t uint32_cntbits(uint32_t _val)

														
 
															-	{

														
 
															-#if BX_COMPILER_GCC

														
 
															-		return __builtin_popcount(_val);

														
 
															-#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS

														
 
															-		return __popcnt(_val);

														
 
															-#else

														
 
															-		return uint32_cntbits_ref(_val);

														
 
															-#endif // BX_COMPILER_GCC

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cntlz_ref(uint32_t _val)

														
 
															-	{

														
 
															-		const uint32_t tmp0   = uint32_srl(_val, 1);

														
 
															-		const uint32_t tmp1   = uint32_or(tmp0, _val);

														
 
															-		const uint32_t tmp2   = uint32_srl(tmp1, 2);

														
 
															-		const uint32_t tmp3   = uint32_or(tmp2, tmp1);

														
 
															-		const uint32_t tmp4   = uint32_srl(tmp3, 4);

														
 
															-		const uint32_t tmp5   = uint32_or(tmp4, tmp3);

														
 
															-		const uint32_t tmp6   = uint32_srl(tmp5, 8);

														
 
															-		const uint32_t tmp7   = uint32_or(tmp6, tmp5);

														
 
															-		const uint32_t tmp8   = uint32_srl(tmp7, 16);

														
 
															-		const uint32_t tmp9   = uint32_or(tmp8, tmp7);

														
 
															-		const uint32_t tmpA   = uint32_not(tmp9);

														
 
															-		const uint32_t result = uint32_cntbits(tmpA);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	/// Count number of leading zeros.

														
 
															-	inline uint32_t uint32_cntlz(uint32_t _val)

														
 
															-	{

														
 
															-#if BX_COMPILER_GCC

														
 
															-		return __builtin_clz(_val);

														
 
															-#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS

														
 
															-		unsigned long index;

														
 
															-		_BitScanReverse(&index, _val);

														
 
															-		return 31 - index;

														
 
															-#else

														
 
															-		return uint32_cntlz_ref(_val);

														
 
															-#endif // BX_COMPILER_

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cnttz_ref(uint32_t _val)

														
 
															-	{

														
 
															-		const uint32_t tmp0   = uint32_not(_val);

														
 
															-		const uint32_t tmp1   = uint32_dec(_val);

														
 
															-		const uint32_t tmp2   = uint32_and(tmp0, tmp1);

														
 
															-		const uint32_t result = uint32_cntbits(tmp2);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_cnttz(uint32_t _val)

														
 
															-	{

														
 
															-#if BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS

														
 
															-		unsigned long index;

														
 
															-		_BitScanForward(&index, _val);

														
 
															-		return index;

														
 
															-#else

														
 
															-		return uint32_cnttz_ref(_val);

														
 
															-#endif // BX_COMPILER_

														
 
															-	}

														
 
															-

														
 
															-	// shuffle:

														
 
															-	// ---- ---- ---- ---- fedc ba98 7654 3210

														
 
															-	// to:

														
 
															-	// -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0

														
 
															-	inline uint32_t uint32_part1by1(uint32_t _a)

														
 
															-	{

														
 
															-		const uint32_t val    = uint32_and(_a, 0xffff);

														
 
															-

														
 
															-		const uint32_t tmp0   = uint32_sll(val, 8);

														
 
															-		const uint32_t tmp1   = uint32_xor(val, tmp0);

														
 
															-		const uint32_t tmp2   = uint32_and(tmp1, 0x00ff00ff);

														
 
															-

														
 
															-		const uint32_t tmp3   = uint32_sll(tmp2, 4);

														
 
															-		const uint32_t tmp4   = uint32_xor(tmp2, tmp3);

														
 
															-		const uint32_t tmp5   = uint32_and(tmp4, 0x0f0f0f0f);

														
 
															-

														
 
															-		const uint32_t tmp6   = uint32_sll(tmp5, 2);

														
 
															-		const uint32_t tmp7   = uint32_xor(tmp5, tmp6);

														
 
															-		const uint32_t tmp8   = uint32_and(tmp7, 0x33333333);

														
 
															-

														
 
															-		const uint32_t tmp9   = uint32_sll(tmp8, 1);

														
 
															-		const uint32_t tmpA   = uint32_xor(tmp8, tmp9);

														
 
															-		const uint32_t result = uint32_and(tmpA, 0x55555555);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	// shuffle:

														
 
															-	// ---- ---- ---- ---- ---- --98 7654 3210

														
 
															-	// to:

														
 
															-	// ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0

														
 
															-	inline uint32_t uint32_part1by2(uint32_t _a)

														
 
															-	{

														
 
															-		const uint32_t val    = uint32_and(_a, 0x3ff);

														
 
															-

														
 
															-		const uint32_t tmp0   = uint32_sll(val, 16);

														
 
															-		const uint32_t tmp1   = uint32_xor(val, tmp0);

														
 
															-		const uint32_t tmp2   = uint32_and(tmp1, 0xff0000ff);

														
 
															-

														
 
															-		const uint32_t tmp3   = uint32_sll(tmp2, 8);

														
 
															-		const uint32_t tmp4   = uint32_xor(tmp2, tmp3);

														
 
															-		const uint32_t tmp5   = uint32_and(tmp4, 0x0300f00f);

														
 
															-

														
 
															-		const uint32_t tmp6   = uint32_sll(tmp5, 4);

														
 
															-		const uint32_t tmp7   = uint32_xor(tmp5, tmp6);

														
 
															-		const uint32_t tmp8   = uint32_and(tmp7, 0x030c30c3);

														
 
															-

														
 
															-		const uint32_t tmp9   = uint32_sll(tmp8, 2);

														
 
															-		const uint32_t tmpA   = uint32_xor(tmp8, tmp9);

														
 
															-		const uint32_t result = uint32_and(tmpA, 0x09249249);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_testpow2(uint32_t _a)

														
 
															-	{

														
 
															-		const uint32_t tmp0   = uint32_not(_a);

														
 
															-		const uint32_t tmp1   = uint32_inc(tmp0);

														
 
															-		const uint32_t tmp2   = uint32_and(_a, tmp1);

														
 
															-		const uint32_t tmp3   = uint32_cmpeq(tmp2, _a);

														
 
															-		const uint32_t tmp4   = uint32_cmpneq(_a, 0);

														
 
															-		const uint32_t result = uint32_and(tmp3, tmp4);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															-	inline uint32_t uint32_nextpow2(uint32_t _a)

														
 
															-	{

														
 
															-		const uint32_t tmp0   = uint32_dec(_a);

														
 
															-		const uint32_t tmp1   = uint32_srl(tmp0, 1);

														
 
															-		const uint32_t tmp2   = uint32_or(tmp0, tmp1);

														
 
															-		const uint32_t tmp3   = uint32_srl(tmp2, 2);

														
 
															-		const uint32_t tmp4   = uint32_or(tmp2, tmp3);

														
 
															-		const uint32_t tmp5   = uint32_srl(tmp4, 4);

														
 
															-		const uint32_t tmp6   = uint32_or(tmp4, tmp5);

														
 
															-		const uint32_t tmp7   = uint32_srl(tmp6, 8);

														
 
															-		const uint32_t tmp8   = uint32_or(tmp6, tmp7);

														
 
															-		const uint32_t tmp9   = uint32_srl(tmp8, 16);

														
 
															-		const uint32_t tmpA   = uint32_or(tmp8, tmp9);

														
 
															-		const uint32_t result = uint32_inc(tmpA);

														
 
															-

														
 
															-		return result;

														
 
															-	}

														
 
															-

														
 
															+/*
														
 
															+ * Copyright 2010-2012 Branimir Karadzic. All rights reserved.
														
 
															+ * License: http://www.opensource.org/licenses/BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+// Copyright 2006 Mike Acton <[email protected]>
														
 
															+//
														
 
															+// Permission is hereby granted, free of charge, to any person obtaining a
														
 
															+// copy of this software and associated documentation files (the "Software"),
														
 
															+// to deal in the Software without restriction, including without limitation
														
 
															+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
														
 
															+// and/or sell copies of the Software, and to permit persons to whom the
														
 
															+// Software is furnished to do so, subject to the following conditions:
														
 
															+//
														
 
															+// The above copyright notice and this permission notice shall be included
														
 
															+// in all copies or substantial portions of the Software.
														
 
															+//
														
 
															+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
														
 
															+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
														
 
															+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
														
 
															+// THE SOFTWARE
														
 
															+
														
 
															+#ifndef __BX_UINT32_T_H__
														
 
															+#define __BX_UINT32_T_H__
														
 
															+
														
 
															+#include "bx.h"
														
 
															+
														
 
															+#if BX_COMPILER_MSVC
														
 
															+#	if BX_PLATFORM_WINDOWS
														
 
															+#		include <math.h> // math.h is included because VS bitches:
														
 
															+						 // warning C4985: 'ceil': attributes not present on previous declaration.
														
 
															+						 // must be included before intrin.h.
														
 
															+#		include <intrin.h>
														
 
															+#		pragma intrinsic(_BitScanForward)
														
 
															+#		pragma intrinsic(_BitScanReverse)
														
 
															+#	endif // BX_PLATFORM_WINDOWS
														
 
															+#endif // BX_COMPILER_MSVC
														
 
															+
														
 
															+namespace bx
														
 
															+{
														
 
															+	inline uint32_t uint32_li(uint32_t _a)
														
 
															+	{
														
 
															+		return _a;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_dec(uint32_t _a)
														
 
															+	{
														
 
															+		return _a - 1;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_inc(uint32_t _a)
														
 
															+	{
														
 
															+		return _a + 1;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_not(uint32_t _a)
														
 
															+	{
														
 
															+		return ~_a;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_neg(uint32_t _a)
														
 
															+	{
														
 
															+		return -(int32_t)_a;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_ext(uint32_t _a)
														
 
															+	{
														
 
															+		return ( (int32_t)_a)>>31;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_and(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a & _b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_xor(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a ^ _b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_xorl(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return !_a != !_b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_andc(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a & ~_b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_or(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a | _b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_sll(uint32_t _a, int _sa)
														
 
															+	{
														
 
															+		return _a << _sa;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_srl(uint32_t _a, int _sa)
														
 
															+	{
														
 
															+		return _a >> _sa;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_sra(uint32_t _a, int _sa)
														
 
															+	{
														
 
															+		return ( (int32_t)_a) >> _sa;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_rol(uint32_t _a, int _sa)
														
 
															+	{
														
 
															+		return ( _a << _sa) | (_a >> (32-_sa) );
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_ror(uint32_t _a, int _sa)
														
 
															+	{
														
 
															+		return ( _a >> _sa) | (_a << (32-_sa) );
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_add(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a + _b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_sub(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a - _b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_mul(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a * _b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_div(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return (_a / _b);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_mod(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return (_a % _b);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cmpeq(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return -(_a == _b);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cmpneq(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return -(_a != _b);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cmplt(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return -(_a < _b);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cmple(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return -(_a <= _b);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cmpgt(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return -(_a > _b);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cmpge(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return -(_a >= _b);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_setnz(uint32_t _a)
														
 
															+	{
														
 
															+		return -!!_a;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_satadd(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		const uint32_t add    = uint32_add(_a, _b);
														
 
															+		const uint32_t lt     = uint32_cmplt(add, _a);
														
 
															+		const uint32_t result = uint32_or(add, lt);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_satsub(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		const uint32_t sub    = uint32_sub(_a, _b);
														
 
															+		const uint32_t le     = uint32_cmple(sub, _a);
														
 
															+		const uint32_t result = uint32_and(sub, le);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_satmul(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		const uint64_t mul    = (uint64_t)_a * (uint64_t)_b;
														
 
															+		const uint32_t hi     = mul >> 32;
														
 
															+		const uint32_t nz     = uint32_setnz(hi);
														
 
															+		const uint32_t result = uint32_or(uint32_t(mul), nz);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_sels(uint32_t test, uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		const uint32_t mask   = uint32_ext(test);
														
 
															+		const uint32_t sel_a  = uint32_and(_a, mask);
														
 
															+		const uint32_t sel_b  = uint32_andc(_b, mask);
														
 
															+		const uint32_t result = uint32_or(sel_a, sel_b);
														
 
															+
														
 
															+		return (result);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_selb(uint32_t _mask, uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		const uint32_t sel_a  = uint32_and(_a, _mask);
														
 
															+		const uint32_t sel_b  = uint32_andc(_b, _mask);
														
 
															+		const uint32_t result = uint32_or(sel_a, sel_b);
														
 
															+
														
 
															+		return (result);
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_imin(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		const uint32_t a_sub_b = uint32_sub(_a, _b);
														
 
															+		const uint32_t result  = uint32_sels(a_sub_b, _a, _b);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_imax(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		const uint32_t b_sub_a = uint32_sub(_b, _a);
														
 
															+		const uint32_t result  = uint32_sels(b_sub_a, _a, _b);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_min(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a > _b ? _b : _a;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_max(uint32_t _a, uint32_t _b)
														
 
															+	{
														
 
															+		return _a > _b ? _a : _b;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_incwrap(uint32_t _val, uint32_t _min, uint32_t _max)
														
 
															+	{
														
 
															+		const uint32_t inc          = uint32_inc(_val);
														
 
															+		const uint32_t max_diff     = uint32_sub(_max, _val);
														
 
															+		const uint32_t neg_max_diff = uint32_neg(max_diff);
														
 
															+		const uint32_t max_or       = uint32_or(max_diff, neg_max_diff);
														
 
															+		const uint32_t max_diff_nz  = uint32_ext(max_or);
														
 
															+		const uint32_t result       = uint32_selb(max_diff_nz, inc, _min);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_decwrap(uint32_t _val, uint32_t _min, uint32_t _max)
														
 
															+	{
														
 
															+		const uint32_t dec          = uint32_dec(_val);
														
 
															+		const uint32_t min_diff     = uint32_sub(_min, _val);
														
 
															+		const uint32_t neg_min_diff = uint32_neg(min_diff);
														
 
															+		const uint32_t min_or       = uint32_or(min_diff, neg_min_diff);
														
 
															+		const uint32_t min_diff_nz  = uint32_ext(min_or);
														
 
															+		const uint32_t result       = uint32_selb(min_diff_nz, dec, _max);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cntbits_ref(uint32_t _val)
														
 
															+	{
														
 
															+		const uint32_t tmp0   = uint32_srl(_val, 1);
														
 
															+		const uint32_t tmp1   = uint32_and(tmp0, 0x55555555);
														
 
															+		const uint32_t tmp2   = uint32_sub(_val, tmp1);
														
 
															+		const uint32_t tmp3   = uint32_and(tmp2, 0xc30c30c3);
														
 
															+		const uint32_t tmp4   = uint32_srl(tmp2, 2);
														
 
															+		const uint32_t tmp5   = uint32_and(tmp4, 0xc30c30c3);
														
 
															+		const uint32_t tmp6   = uint32_srl(tmp2, 4);
														
 
															+		const uint32_t tmp7   = uint32_and(tmp6, 0xc30c30c3);
														
 
															+		const uint32_t tmp8   = uint32_add(tmp3, tmp5);
														
 
															+		const uint32_t tmp9   = uint32_add(tmp7, tmp8);
														
 
															+		const uint32_t tmpA   = uint32_srl(tmp9, 6);
														
 
															+		const uint32_t tmpB   = uint32_add(tmp9, tmpA);
														
 
															+		const uint32_t tmpC   = uint32_srl(tmpB, 12);
														
 
															+		const uint32_t tmpD   = uint32_srl(tmpB, 24);
														
 
															+		const uint32_t tmpE   = uint32_add(tmpB, tmpC);
														
 
															+		const uint32_t tmpF   = uint32_add(tmpD, tmpE);
														
 
															+		const uint32_t result = uint32_and(tmpF, 0x3f);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	/// Count number of bits set.
														
 
															+	inline uint32_t uint32_cntbits(uint32_t _val)
														
 
															+	{
														
 
															+#if BX_COMPILER_GCC
														
 
															+		return __builtin_popcount(_val);
														
 
															+#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
														
 
															+		return __popcnt(_val);
														
 
															+#else
														
 
															+		return uint32_cntbits_ref(_val);
														
 
															+#endif // BX_COMPILER_GCC
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cntlz_ref(uint32_t _val)
														
 
															+	{
														
 
															+		const uint32_t tmp0   = uint32_srl(_val, 1);
														
 
															+		const uint32_t tmp1   = uint32_or(tmp0, _val);
														
 
															+		const uint32_t tmp2   = uint32_srl(tmp1, 2);
														
 
															+		const uint32_t tmp3   = uint32_or(tmp2, tmp1);
														
 
															+		const uint32_t tmp4   = uint32_srl(tmp3, 4);
														
 
															+		const uint32_t tmp5   = uint32_or(tmp4, tmp3);
														
 
															+		const uint32_t tmp6   = uint32_srl(tmp5, 8);
														
 
															+		const uint32_t tmp7   = uint32_or(tmp6, tmp5);
														
 
															+		const uint32_t tmp8   = uint32_srl(tmp7, 16);
														
 
															+		const uint32_t tmp9   = uint32_or(tmp8, tmp7);
														
 
															+		const uint32_t tmpA   = uint32_not(tmp9);
														
 
															+		const uint32_t result = uint32_cntbits(tmpA);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	/// Count number of leading zeros.
														
 
															+	inline uint32_t uint32_cntlz(uint32_t _val)
														
 
															+	{
														
 
															+#if BX_COMPILER_GCC
														
 
															+		return __builtin_clz(_val);
														
 
															+#elif BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
														
 
															+		unsigned long index;
														
 
															+		_BitScanReverse(&index, _val);
														
 
															+		return 31 - index;
														
 
															+#else
														
 
															+		return uint32_cntlz_ref(_val);
														
 
															+#endif // BX_COMPILER_
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cnttz_ref(uint32_t _val)
														
 
															+	{
														
 
															+		const uint32_t tmp0   = uint32_not(_val);
														
 
															+		const uint32_t tmp1   = uint32_dec(_val);
														
 
															+		const uint32_t tmp2   = uint32_and(tmp0, tmp1);
														
 
															+		const uint32_t result = uint32_cntbits(tmp2);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_cnttz(uint32_t _val)
														
 
															+	{
														
 
															+#if BX_COMPILER_MSVC && BX_PLATFORM_WINDOWS
														
 
															+		unsigned long index;
														
 
															+		_BitScanForward(&index, _val);
														
 
															+		return index;
														
 
															+#else
														
 
															+		return uint32_cnttz_ref(_val);
														
 
															+#endif // BX_COMPILER_
														
 
															+	}
														
 
															+
														
 
															+	// shuffle:
														
 
															+	// ---- ---- ---- ---- fedc ba98 7654 3210
														
 
															+	// to:
														
 
															+	// -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
														
 
															+	inline uint32_t uint32_part1by1(uint32_t _a)
														
 
															+	{
														
 
															+		const uint32_t val    = uint32_and(_a, 0xffff);
														
 
															+
														
 
															+		const uint32_t tmp0   = uint32_sll(val, 8);
														
 
															+		const uint32_t tmp1   = uint32_xor(val, tmp0);
														
 
															+		const uint32_t tmp2   = uint32_and(tmp1, 0x00ff00ff);
														
 
															+
														
 
															+		const uint32_t tmp3   = uint32_sll(tmp2, 4);
														
 
															+		const uint32_t tmp4   = uint32_xor(tmp2, tmp3);
														
 
															+		const uint32_t tmp5   = uint32_and(tmp4, 0x0f0f0f0f);
														
 
															+
														
 
															+		const uint32_t tmp6   = uint32_sll(tmp5, 2);
														
 
															+		const uint32_t tmp7   = uint32_xor(tmp5, tmp6);
														
 
															+		const uint32_t tmp8   = uint32_and(tmp7, 0x33333333);
														
 
															+
														
 
															+		const uint32_t tmp9   = uint32_sll(tmp8, 1);
														
 
															+		const uint32_t tmpA   = uint32_xor(tmp8, tmp9);
														
 
															+		const uint32_t result = uint32_and(tmpA, 0x55555555);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	// shuffle:
														
 
															+	// ---- ---- ---- ---- ---- --98 7654 3210
														
 
															+	// to:
														
 
															+	// ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
														
 
															+	inline uint32_t uint32_part1by2(uint32_t _a)
														
 
															+	{
														
 
															+		const uint32_t val    = uint32_and(_a, 0x3ff);
														
 
															+
														
 
															+		const uint32_t tmp0   = uint32_sll(val, 16);
														
 
															+		const uint32_t tmp1   = uint32_xor(val, tmp0);
														
 
															+		const uint32_t tmp2   = uint32_and(tmp1, 0xff0000ff);
														
 
															+
														
 
															+		const uint32_t tmp3   = uint32_sll(tmp2, 8);
														
 
															+		const uint32_t tmp4   = uint32_xor(tmp2, tmp3);
														
 
															+		const uint32_t tmp5   = uint32_and(tmp4, 0x0300f00f);
														
 
															+
														
 
															+		const uint32_t tmp6   = uint32_sll(tmp5, 4);
														
 
															+		const uint32_t tmp7   = uint32_xor(tmp5, tmp6);
														
 
															+		const uint32_t tmp8   = uint32_and(tmp7, 0x030c30c3);
														
 
															+
														
 
															+		const uint32_t tmp9   = uint32_sll(tmp8, 2);
														
 
															+		const uint32_t tmpA   = uint32_xor(tmp8, tmp9);
														
 
															+		const uint32_t result = uint32_and(tmpA, 0x09249249);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_testpow2(uint32_t _a)
														
 
															+	{
														
 
															+		const uint32_t tmp0   = uint32_not(_a);
														
 
															+		const uint32_t tmp1   = uint32_inc(tmp0);
														
 
															+		const uint32_t tmp2   = uint32_and(_a, tmp1);
														
 
															+		const uint32_t tmp3   = uint32_cmpeq(tmp2, _a);
														
 
															+		const uint32_t tmp4   = uint32_cmpneq(_a, 0);
														
 
															+		const uint32_t result = uint32_and(tmp3, tmp4);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															+	inline uint32_t uint32_nextpow2(uint32_t _a)
														
 
															+	{
														
 
															+		const uint32_t tmp0   = uint32_dec(_a);
														
 
															+		const uint32_t tmp1   = uint32_srl(tmp0, 1);
														
 
															+		const uint32_t tmp2   = uint32_or(tmp0, tmp1);
														
 
															+		const uint32_t tmp3   = uint32_srl(tmp2, 2);
														
 
															+		const uint32_t tmp4   = uint32_or(tmp2, tmp3);
														
 
															+		const uint32_t tmp5   = uint32_srl(tmp4, 4);
														
 
															+		const uint32_t tmp6   = uint32_or(tmp4, tmp5);
														
 
															+		const uint32_t tmp7   = uint32_srl(tmp6, 8);
														
 
															+		const uint32_t tmp8   = uint32_or(tmp6, tmp7);
														
 
															+		const uint32_t tmp9   = uint32_srl(tmp8, 16);
														
 
															+		const uint32_t tmpA   = uint32_or(tmp8, tmp9);
														
 
															+		const uint32_t result = uint32_inc(tmpA);
														
 
															+
														
 
															+		return result;
														
 
															+	}
														
 
															+
														
 
															 	inline uint16_t halfFromFloat(float _a)
														
 
															 	{
														
 
															 		union { uint32_t ui; float flt;	} ftou;
														
@@ -564,8 +564,8 @@ namespace bx
 
															 		union { uint32_t ui; float flt;	} utof;
														
 
															 		utof.ui = f_result;
														
 
															 		return utof.flt;
														
 
															-	} 

														
 
															-

														
 
															-} // namespace bx

														
 
															-

														
 
															-#endif // __BX_UINT32_T_H__

														
 
															+	} 
														
 
															+
														
 
															+} // namespace bx
														
 
															+
														
 
															+#endif // __BX_UINT32_T_H__
														
--- a/include/compat/mingw/alloca.h
+++ b/include/compat/mingw/alloca.h
@@ -1,6 +1,6 @@
 
															-#ifndef __MINGW32__ALLOCA_H__

														
 
															-#define __MINGW32__ALLOCA_H__

														
 
															-

														
 
															-#include <malloc.h>

														
 
															-

														
 
															-#endif // __MINGW32__ALLOCA_H__

														
 
															+#ifndef __MINGW32__ALLOCA_H__
														
 
															+#define __MINGW32__ALLOCA_H__
														
 
															+
														
 
															+#include <malloc.h>
														
 
															+
														
 
															+#endif // __MINGW32__ALLOCA_H__
														
--- a/include/compat/mingw/sal.h
+++ b/include/compat/mingw/sal.h
@@ -1,253 +1,253 @@
 
															-#pragma once

														
 
															-

														
 
															-#if __GNUC__ >=3

														
 
															-#pragma GCC system_header

														
 
															-#endif

														
 
															-

														
 
															-//#define __null // << Conflicts with GCC internal type __null

														
 
															-#define __notnull

														
 
															-#define __maybenull

														
 
															-#define __readonly

														
 
															-#define __notreadonly

														
 
															-#define __maybereadonly

														
 
															-#define __valid

														
 
															-#define __notvalid

														
 
															-#define __maybevalid

														
 
															-#define __readableTo(extent)

														
 
															-#define __elem_readableTo(size)

														
 
															-#define __byte_readableTo(size)

														
 
															-#define __writableTo(size)

														
 
															-#define __elem_writableTo(size)

														
 
															-#define __byte_writableTo(size)

														
 
															-#define __deref

														
 
															-#define __pre

														
 
															-#define __post

														
 
															-#define __precond(expr)

														
 
															-#define __postcond(expr)

														
 
															-#define __exceptthat

														
 
															-#define __execeptthat

														
 
															-#define __inner_success(expr)

														
 
															-#define __inner_checkReturn

														
 
															-#define __inner_typefix(ctype)

														
 
															-#define __inner_override

														
 
															-#define __inner_callback

														
 
															-#define __inner_blocksOn(resource)

														
 
															-#define __inner_fallthrough_dec

														
 
															-#define __inner_fallthrough

														
 
															-#define __refparam

														
 
															-#define __inner_control_entrypoint(category)

														
 
															-#define __inner_data_entrypoint(category)

														
 
															-

														
 
															-#define __ecount(size)

														
 
															-#define __bcount(size)

														
 
															-#define __in

														
 
															-#define __in_ecount(size)

														
 
															-#define __in_bcount(size)

														
 
															-#define __in_z

														
 
															-#define __in_ecount_z(size)

														
 
															-#define __in_bcount_z(size)

														
 
															-#define __in_nz

														
 
															-#define __in_ecount_nz(size)

														
 
															-#define __in_bcount_nz(size)

														
 
															-#define __in_xcount_opt(size)

														
 
															-#define __out

														
 
															-#define __out_ecount(size)

														
 
															-#define __out_bcount(size)

														
 
															-#define __out_ecount_part(size,length)

														
 
															-#define __out_bcount_part(size,length)

														
 
															-#define __out_ecount_full(size)

														
 
															-#define __out_bcount_full(size)

														
 
															-#define __out_z

														
 
															-#define __out_z_opt

														
 
															-#define __out_ecount_z(size)

														
 
															-#define __out_bcount_z(size)

														
 
															-#define __out_ecount_part_z(size,length)

														
 
															-#define __out_bcount_part_z(size,length)

														
 
															-#define __out_ecount_full_z(size)

														
 
															-#define __out_bcount_full_z(size)

														
 
															-#define __out_nz

														
 
															-#define __out_nz_opt

														
 
															-#define __out_ecount_nz(size)

														
 
															-#define __out_bcount_nz(size)

														
 
															-#define __inout

														
 
															-#define __inout_ecount(size)

														
 
															-#define __inout_bcount(size)

														
 
															-#define __inout_ecount_part(size,length)

														
 
															-#define __inout_bcount_part(size,length)

														
 
															-#define __inout_ecount_full(size)

														
 
															-#define __inout_bcount_full(size)

														
 
															-#define __inout_z

														
 
															-#define __inout_ecount_z(size)

														
 
															-#define __inout_bcount_z(size)

														
 
															-#define __inout_nz

														
 
															-#define __inout_ecount_nz(size)

														
 
															-#define __inout_bcount_nz(size)

														
 
															-#define __ecount_opt(size)

														
 
															-#define __bcount_opt(size)

														
 
															-#define __in_opt

														
 
															-#define __in_ecount_opt(size)

														
 
															-#define __in_bcount_opt(size)

														
 
															-#define __in_z_opt

														
 
															-#define __in_ecount_z_opt(size)

														
 
															-#define __in_bcount_z_opt(size)

														
 
															-#define __in_nz_opt

														
 
															-#define __in_ecount_nz_opt(size)

														
 
															-#define __in_bcount_nz_opt(size)

														
 
															-#define __out_opt

														
 
															-#define __out_ecount_opt(size)

														
 
															-#define __out_bcount_opt(size)

														
 
															-#define __out_ecount_part_opt(size,length)

														
 
															-#define __out_bcount_part_opt(size,length)

														
 
															-#define __out_ecount_full_opt(size)

														
 
															-#define __out_bcount_full_opt(size)

														
 
															-#define __out_ecount_z_opt(size)

														
 
															-#define __out_bcount_z_opt(size)

														
 
															-#define __out_ecount_part_z_opt(size,length)

														
 
															-#define __out_bcount_part_z_opt(size,length)

														
 
															-#define __out_ecount_full_z_opt(size)

														
 
															-#define __out_bcount_full_z_opt(size)

														
 
															-#define __out_ecount_nz_opt(size)

														
 
															-#define __out_bcount_nz_opt(size)

														
 
															-#define __inout_opt

														
 
															-#define __inout_ecount_opt(size)

														
 
															-#define __inout_bcount_opt(size)

														
 
															-#define __inout_ecount_part_opt(size,length)

														
 
															-#define __inout_bcount_part_opt(size,length)

														
 
															-#define __inout_ecount_full_opt(size)

														
 
															-#define __inout_bcount_full_opt(size)

														
 
															-#define __inout_z_opt

														
 
															-#define __inout_ecount_z_opt(size)

														
 
															-#define __inout_ecount_z_opt(size)

														
 
															-#define __inout_bcount_z_opt(size)

														
 
															-#define __inout_nz_opt

														
 
															-#define __inout_ecount_nz_opt(size)

														
 
															-#define __inout_bcount_nz_opt(size)

														
 
															-#define __deref_ecount(size)

														
 
															-#define __deref_bcount(size)

														
 
															-#define __deref_out

														
 
															-#define __deref_out_ecount(size)

														
 
															-#define __deref_out_bcount(size)

														
 
															-#define __deref_out_ecount_part(size,length)

														
 
															-#define __deref_out_bcount_part(size,length)

														
 
															-#define __deref_out_ecount_full(size)

														
 
															-#define __deref_out_bcount_full(size)

														
 
															-#define __deref_out_z

														
 
															-#define __deref_out_ecount_z(size)

														
 
															-#define __deref_out_bcount_z(size)

														
 
															-#define __deref_out_nz

														
 
															-#define __deref_out_ecount_nz(size)

														
 
															-#define __deref_out_bcount_nz(size)

														
 
															-#define __deref_inout

														
 
															-#define __deref_inout_z

														
 
															-#define __deref_inout_ecount(size)

														
 
															-#define __deref_inout_bcount(size)

														
 
															-#define __deref_inout_ecount_part(size,length)

														
 
															-#define __deref_inout_bcount_part(size,length)

														
 
															-#define __deref_inout_ecount_full(size)

														
 
															-#define __deref_inout_bcount_full(size)

														
 
															-#define __deref_inout_z

														
 
															-#define __deref_inout_ecount_z(size)

														
 
															-#define __deref_inout_bcount_z(size)

														
 
															-#define __deref_inout_nz

														
 
															-#define __deref_inout_ecount_nz(size)

														
 
															-#define __deref_inout_bcount_nz(size)

														
 
															-#define __deref_ecount_opt(size)

														
 
															-#define __deref_bcount_opt(size)

														
 
															-#define __deref_out_opt

														
 
															-#define __deref_out_ecount_opt(size)

														
 
															-#define __deref_out_bcount_opt(size)

														
 
															-#define __deref_out_ecount_part_opt(size,length)

														
 
															-#define __deref_out_bcount_part_opt(size,length)

														
 
															-#define __deref_out_ecount_full_opt(size)

														
 
															-#define __deref_out_bcount_full_opt(size)

														
 
															-#define __deref_out_z_opt

														
 
															-#define __deref_out_ecount_z_opt(size)

														
 
															-#define __deref_out_bcount_z_opt(size)

														
 
															-#define __deref_out_nz_opt

														
 
															-#define __deref_out_ecount_nz_opt(size)

														
 
															-#define __deref_out_bcount_nz_opt(size)

														
 
															-#define __deref_inout_opt

														
 
															-#define __deref_inout_ecount_opt(size)

														
 
															-#define __deref_inout_bcount_opt(size)

														
 
															-#define __deref_inout_ecount_part_opt(size,length)

														
 
															-#define __deref_inout_bcount_part_opt(size,length)

														
 
															-#define __deref_inout_ecount_full_opt(size)

														
 
															-#define __deref_inout_bcount_full_opt(size)

														
 
															-#define __deref_inout_z_opt

														
 
															-#define __deref_inout_ecount_z_opt(size)

														
 
															-#define __deref_inout_bcount_z_opt(size)

														
 
															-#define __deref_inout_nz_opt

														
 
															-#define __deref_inout_ecount_nz_opt(size)

														
 
															-#define __deref_inout_bcount_nz_opt(size)

														
 
															-#define __deref_opt_ecount(size)

														
 
															-#define __deref_opt_bcount(size)

														
 
															-#define __deref_opt_out

														
 
															-#define __deref_opt_out_z

														
 
															-#define __deref_opt_out_ecount(size)

														
 
															-#define __deref_opt_out_bcount(size)

														
 
															-#define __deref_opt_out_ecount_part(size,length)

														
 
															-#define __deref_opt_out_bcount_part(size,length)

														
 
															-#define __deref_opt_out_ecount_full(size)

														
 
															-#define __deref_opt_out_bcount_full(size)

														
 
															-#define __deref_opt_inout

														
 
															-#define __deref_opt_inout_ecount(size)

														
 
															-#define __deref_opt_inout_bcount(size)

														
 
															-#define __deref_opt_inout_ecount_part(size,length)

														
 
															-#define __deref_opt_inout_bcount_part(size,length)

														
 
															-#define __deref_opt_inout_ecount_full(size)

														
 
															-#define __deref_opt_inout_bcount_full(size)

														
 
															-#define __deref_opt_inout_z

														
 
															-#define __deref_opt_inout_ecount_z(size)

														
 
															-#define __deref_opt_inout_bcount_z(size)

														
 
															-#define __deref_opt_inout_nz

														
 
															-#define __deref_opt_inout_ecount_nz(size)

														
 
															-#define __deref_opt_inout_bcount_nz(size)

														
 
															-#define __deref_opt_ecount_opt(size)

														
 
															-#define __deref_opt_bcount_opt(size)

														
 
															-#define __deref_opt_out_opt

														
 
															-#define __deref_opt_out_ecount_opt(size)

														
 
															-#define __deref_opt_out_bcount_opt(size)

														
 
															-#define __deref_opt_out_ecount_part_opt(size,length)

														
 
															-#define __deref_opt_out_bcount_part_opt(size,length)

														
 
															-#define __deref_opt_out_ecount_full_opt(size)

														
 
															-#define __deref_opt_out_bcount_full_opt(size)

														
 
															-#define __deref_opt_out_z_opt

														
 
															-#define __deref_opt_out_ecount_z_opt(size)

														
 
															-#define __deref_opt_out_bcount_z_opt(size)

														
 
															-#define __deref_opt_out_nz_opt

														
 
															-#define __deref_opt_out_ecount_nz_opt(size)

														
 
															-#define __deref_opt_out_bcount_nz_opt(size)

														
 
															-#define __deref_opt_inout_opt

														
 
															-#define __deref_opt_inout_ecount_opt(size)

														
 
															-#define __deref_opt_inout_bcount_opt(size)

														
 
															-#define __deref_opt_inout_ecount_part_opt(size,length)

														
 
															-#define __deref_opt_inout_bcount_part_opt(size,length)

														
 
															-#define __deref_opt_inout_ecount_full_opt(size)

														
 
															-#define __deref_opt_inout_bcount_full_opt(size)

														
 
															-#define __deref_opt_inout_z_opt

														
 
															-#define __deref_opt_inout_ecount_z_opt(size)

														
 
															-#define __deref_opt_inout_bcount_z_opt(size)

														
 
															-#define __deref_opt_inout_nz_opt

														
 
															-#define __deref_opt_inout_ecount_nz_opt(size)

														
 
															-#define __deref_opt_inout_bcount_nz_opt(size)

														
 
															-

														
 
															-#define __success(expr)

														
 
															-#define __nullterminated

														
 
															-#define __nullnullterminated

														
 
															-#define __reserved

														
 
															-#define __checkReturn

														
 
															-#define __typefix(ctype)

														
 
															-#define __override

														
 
															-#define __callback

														
 
															-#define __format_string

														
 
															-#define __blocksOn(resource)

														
 
															-#define __control_entrypoint(category)

														
 
															-#define __data_entrypoint(category)

														
 
															-

														
 
															-#ifndef __fallthrough

														
 
															-    #define __fallthrough __inner_fallthrough

														
 
															-#endif

														
 
															-

														
 
															-#ifndef __analysis_assume

														
 
															-    #define __analysis_assume(expr)

														
 
															-#endif

														
 
															+#pragma once
														
 
															+
														
 
															+#if __GNUC__ >=3
														
 
															+#pragma GCC system_header
														
 
															+#endif
														
 
															+
														
 
															+//#define __null // << Conflicts with GCC internal type __null
														
 
															+#define __notnull
														
 
															+#define __maybenull
														
 
															+#define __readonly
														
 
															+#define __notreadonly
														
 
															+#define __maybereadonly
														
 
															+#define __valid
														
 
															+#define __notvalid
														
 
															+#define __maybevalid
														
 
															+#define __readableTo(extent)
														
 
															+#define __elem_readableTo(size)
														
 
															+#define __byte_readableTo(size)
														
 
															+#define __writableTo(size)
														
 
															+#define __elem_writableTo(size)
														
 
															+#define __byte_writableTo(size)
														
 
															+#define __deref
														
 
															+#define __pre
														
 
															+#define __post
														
 
															+#define __precond(expr)
														
 
															+#define __postcond(expr)
														
 
															+#define __exceptthat
														
 
															+#define __execeptthat
														
 
															+#define __inner_success(expr)
														
 
															+#define __inner_checkReturn
														
 
															+#define __inner_typefix(ctype)
														
 
															+#define __inner_override
														
 
															+#define __inner_callback
														
 
															+#define __inner_blocksOn(resource)
														
 
															+#define __inner_fallthrough_dec
														
 
															+#define __inner_fallthrough
														
 
															+#define __refparam
														
 
															+#define __inner_control_entrypoint(category)
														
 
															+#define __inner_data_entrypoint(category)
														
 
															+
														
 
															+#define __ecount(size)
														
 
															+#define __bcount(size)
														
 
															+#define __in
														
 
															+#define __in_ecount(size)
														
 
															+#define __in_bcount(size)
														
 
															+#define __in_z
														
 
															+#define __in_ecount_z(size)
														
 
															+#define __in_bcount_z(size)
														
 
															+#define __in_nz
														
 
															+#define __in_ecount_nz(size)
														
 
															+#define __in_bcount_nz(size)
														
 
															+#define __in_xcount_opt(size)
														
 
															+#define __out
														
 
															+#define __out_ecount(size)
														
 
															+#define __out_bcount(size)
														
 
															+#define __out_ecount_part(size,length)
														
 
															+#define __out_bcount_part(size,length)
														
 
															+#define __out_ecount_full(size)
														
 
															+#define __out_bcount_full(size)
														
 
															+#define __out_z
														
 
															+#define __out_z_opt
														
 
															+#define __out_ecount_z(size)
														
 
															+#define __out_bcount_z(size)
														
 
															+#define __out_ecount_part_z(size,length)
														
 
															+#define __out_bcount_part_z(size,length)
														
 
															+#define __out_ecount_full_z(size)
														
 
															+#define __out_bcount_full_z(size)
														
 
															+#define __out_nz
														
 
															+#define __out_nz_opt
														
 
															+#define __out_ecount_nz(size)
														
 
															+#define __out_bcount_nz(size)
														
 
															+#define __inout
														
 
															+#define __inout_ecount(size)
														
 
															+#define __inout_bcount(size)
														
 
															+#define __inout_ecount_part(size,length)
														
 
															+#define __inout_bcount_part(size,length)
														
 
															+#define __inout_ecount_full(size)
														
 
															+#define __inout_bcount_full(size)
														
 
															+#define __inout_z
														
 
															+#define __inout_ecount_z(size)
														
 
															+#define __inout_bcount_z(size)
														
 
															+#define __inout_nz
														
 
															+#define __inout_ecount_nz(size)
														
 
															+#define __inout_bcount_nz(size)
														
 
															+#define __ecount_opt(size)
														
 
															+#define __bcount_opt(size)
														
 
															+#define __in_opt
														
 
															+#define __in_ecount_opt(size)
														
 
															+#define __in_bcount_opt(size)
														
 
															+#define __in_z_opt
														
 
															+#define __in_ecount_z_opt(size)
														
 
															+#define __in_bcount_z_opt(size)
														
 
															+#define __in_nz_opt
														
 
															+#define __in_ecount_nz_opt(size)
														
 
															+#define __in_bcount_nz_opt(size)
														
 
															+#define __out_opt
														
 
															+#define __out_ecount_opt(size)
														
 
															+#define __out_bcount_opt(size)
														
 
															+#define __out_ecount_part_opt(size,length)
														
 
															+#define __out_bcount_part_opt(size,length)
														
 
															+#define __out_ecount_full_opt(size)
														
 
															+#define __out_bcount_full_opt(size)
														
 
															+#define __out_ecount_z_opt(size)
														
 
															+#define __out_bcount_z_opt(size)
														
 
															+#define __out_ecount_part_z_opt(size,length)
														
 
															+#define __out_bcount_part_z_opt(size,length)
														
 
															+#define __out_ecount_full_z_opt(size)
														
 
															+#define __out_bcount_full_z_opt(size)
														
 
															+#define __out_ecount_nz_opt(size)
														
 
															+#define __out_bcount_nz_opt(size)
														
 
															+#define __inout_opt
														
 
															+#define __inout_ecount_opt(size)
														
 
															+#define __inout_bcount_opt(size)
														
 
															+#define __inout_ecount_part_opt(size,length)
														
 
															+#define __inout_bcount_part_opt(size,length)
														
 
															+#define __inout_ecount_full_opt(size)
														
 
															+#define __inout_bcount_full_opt(size)
														
 
															+#define __inout_z_opt
														
 
															+#define __inout_ecount_z_opt(size)
														
 
															+#define __inout_ecount_z_opt(size)
														
 
															+#define __inout_bcount_z_opt(size)
														
 
															+#define __inout_nz_opt
														
 
															+#define __inout_ecount_nz_opt(size)
														
 
															+#define __inout_bcount_nz_opt(size)
														
 
															+#define __deref_ecount(size)
														
 
															+#define __deref_bcount(size)
														
 
															+#define __deref_out
														
 
															+#define __deref_out_ecount(size)
														
 
															+#define __deref_out_bcount(size)
														
 
															+#define __deref_out_ecount_part(size,length)
														
 
															+#define __deref_out_bcount_part(size,length)
														
 
															+#define __deref_out_ecount_full(size)
														
 
															+#define __deref_out_bcount_full(size)
														
 
															+#define __deref_out_z
														
 
															+#define __deref_out_ecount_z(size)
														
 
															+#define __deref_out_bcount_z(size)
														
 
															+#define __deref_out_nz
														
 
															+#define __deref_out_ecount_nz(size)
														
 
															+#define __deref_out_bcount_nz(size)
														
 
															+#define __deref_inout
														
 
															+#define __deref_inout_z
														
 
															+#define __deref_inout_ecount(size)
														
 
															+#define __deref_inout_bcount(size)
														
 
															+#define __deref_inout_ecount_part(size,length)
														
 
															+#define __deref_inout_bcount_part(size,length)
														
 
															+#define __deref_inout_ecount_full(size)
														
 
															+#define __deref_inout_bcount_full(size)
														
 
															+#define __deref_inout_z
														
 
															+#define __deref_inout_ecount_z(size)
														
 
															+#define __deref_inout_bcount_z(size)
														
 
															+#define __deref_inout_nz
														
 
															+#define __deref_inout_ecount_nz(size)
														
 
															+#define __deref_inout_bcount_nz(size)
														
 
															+#define __deref_ecount_opt(size)
														
 
															+#define __deref_bcount_opt(size)
														
 
															+#define __deref_out_opt
														
 
															+#define __deref_out_ecount_opt(size)
														
 
															+#define __deref_out_bcount_opt(size)
														
 
															+#define __deref_out_ecount_part_opt(size,length)
														
 
															+#define __deref_out_bcount_part_opt(size,length)
														
 
															+#define __deref_out_ecount_full_opt(size)
														
 
															+#define __deref_out_bcount_full_opt(size)
														
 
															+#define __deref_out_z_opt
														
 
															+#define __deref_out_ecount_z_opt(size)
														
 
															+#define __deref_out_bcount_z_opt(size)
														
 
															+#define __deref_out_nz_opt
														
 
															+#define __deref_out_ecount_nz_opt(size)
														
 
															+#define __deref_out_bcount_nz_opt(size)
														
 
															+#define __deref_inout_opt
														
 
															+#define __deref_inout_ecount_opt(size)
														
 
															+#define __deref_inout_bcount_opt(size)
														
 
															+#define __deref_inout_ecount_part_opt(size,length)
														
 
															+#define __deref_inout_bcount_part_opt(size,length)
														
 
															+#define __deref_inout_ecount_full_opt(size)
														
 
															+#define __deref_inout_bcount_full_opt(size)
														
 
															+#define __deref_inout_z_opt
														
 
															+#define __deref_inout_ecount_z_opt(size)
														
 
															+#define __deref_inout_bcount_z_opt(size)
														
 
															+#define __deref_inout_nz_opt
														
 
															+#define __deref_inout_ecount_nz_opt(size)
														
 
															+#define __deref_inout_bcount_nz_opt(size)
														
 
															+#define __deref_opt_ecount(size)
														
 
															+#define __deref_opt_bcount(size)
														
 
															+#define __deref_opt_out
														
 
															+#define __deref_opt_out_z
														
 
															+#define __deref_opt_out_ecount(size)
														
 
															+#define __deref_opt_out_bcount(size)
														
 
															+#define __deref_opt_out_ecount_part(size,length)
														
 
															+#define __deref_opt_out_bcount_part(size,length)
														
 
															+#define __deref_opt_out_ecount_full(size)
														
 
															+#define __deref_opt_out_bcount_full(size)
														
 
															+#define __deref_opt_inout
														
 
															+#define __deref_opt_inout_ecount(size)
														
 
															+#define __deref_opt_inout_bcount(size)
														
 
															+#define __deref_opt_inout_ecount_part(size,length)
														
 
															+#define __deref_opt_inout_bcount_part(size,length)
														
 
															+#define __deref_opt_inout_ecount_full(size)
														
 
															+#define __deref_opt_inout_bcount_full(size)
														
 
															+#define __deref_opt_inout_z
														
 
															+#define __deref_opt_inout_ecount_z(size)
														
 
															+#define __deref_opt_inout_bcount_z(size)
														
 
															+#define __deref_opt_inout_nz
														
 
															+#define __deref_opt_inout_ecount_nz(size)
														
 
															+#define __deref_opt_inout_bcount_nz(size)
														
 
															+#define __deref_opt_ecount_opt(size)
														
 
															+#define __deref_opt_bcount_opt(size)
														
 
															+#define __deref_opt_out_opt
														
 
															+#define __deref_opt_out_ecount_opt(size)
														
 
															+#define __deref_opt_out_bcount_opt(size)
														
 
															+#define __deref_opt_out_ecount_part_opt(size,length)
														
 
															+#define __deref_opt_out_bcount_part_opt(size,length)
														
 
															+#define __deref_opt_out_ecount_full_opt(size)
														
 
															+#define __deref_opt_out_bcount_full_opt(size)
														
 
															+#define __deref_opt_out_z_opt
														
 
															+#define __deref_opt_out_ecount_z_opt(size)
														
 
															+#define __deref_opt_out_bcount_z_opt(size)
														
 
															+#define __deref_opt_out_nz_opt
														
 
															+#define __deref_opt_out_ecount_nz_opt(size)
														
 
															+#define __deref_opt_out_bcount_nz_opt(size)
														
 
															+#define __deref_opt_inout_opt
														
 
															+#define __deref_opt_inout_ecount_opt(size)
														
 
															+#define __deref_opt_inout_bcount_opt(size)
														
 
															+#define __deref_opt_inout_ecount_part_opt(size,length)
														
 
															+#define __deref_opt_inout_bcount_part_opt(size,length)
														
 
															+#define __deref_opt_inout_ecount_full_opt(size)
														
 
															+#define __deref_opt_inout_bcount_full_opt(size)
														
 
															+#define __deref_opt_inout_z_opt
														
 
															+#define __deref_opt_inout_ecount_z_opt(size)
														
 
															+#define __deref_opt_inout_bcount_z_opt(size)
														
 
															+#define __deref_opt_inout_nz_opt
														
 
															+#define __deref_opt_inout_ecount_nz_opt(size)
														
 
															+#define __deref_opt_inout_bcount_nz_opt(size)
														
 
															+
														
 
															+#define __success(expr)
														
 
															+#define __nullterminated
														
 
															+#define __nullnullterminated
														
 
															+#define __reserved
														
 
															+#define __checkReturn
														
 
															+#define __typefix(ctype)
														
 
															+#define __override
														
 
															+#define __callback
														
 
															+#define __format_string
														
 
															+#define __blocksOn(resource)
														
 
															+#define __control_entrypoint(category)
														
 
															+#define __data_entrypoint(category)
														
 
															+
														
 
															+#ifndef __fallthrough
														
 
															+    #define __fallthrough __inner_fallthrough
														
 
															+#endif
														
 
															+
														
 
															+#ifndef __analysis_assume
														
 
															+    #define __analysis_assume(expr)
														
 
															+#endif
														
--- a/include/compat/mingw/specstrings_strict.h
+++ b/include/compat/mingw/specstrings_strict.h
@@ -1 +1 @@
 
															-#define __reserved

														
 
															+#define __reserved
														
--- a/include/compat/mingw/specstrings_undef.h
+++ b/include/compat/mingw/specstrings_undef.h
@@ -1,2 +1,2 @@
 
															-#undef __reserved

														
 
															-

														
 
															+#undef __reserved
														
 
															+
														
--- a/include/compat/msvc/alloca.h
+++ b/include/compat/msvc/alloca.h
@@ -1 +1 @@
 
															-#include <malloc.h>

														
 
															+#include <malloc.h>
														
--- a/include/compat/msvc/inttypes.h
+++ b/include/compat/msvc/inttypes.h
@@ -1,305 +1,305 @@
 
															-// ISO C9x  compliant inttypes.h for Microsoft Visual Studio

														
 
															-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 

														
 
															-// 

														
 
															-//  Copyright (c) 2006 Alexander Chemeris

														
 
															-// 

														
 
															-// Redistribution and use in source and binary forms, with or without

														
 
															-// modification, are permitted provided that the following conditions are met:

														
 
															-// 

														
 
															-//   1. Redistributions of source code must retain the above copyright notice,

														
 
															-//      this list of conditions and the following disclaimer.

														
 
															-// 

														
 
															-//   2. Redistributions in binary form must reproduce the above copyright

														
 
															-//      notice, this list of conditions and the following disclaimer in the

														
 
															-//      documentation and/or other materials provided with the distribution.

														
 
															-// 

														
 
															-//   3. The name of the author may be used to endorse or promote products

														
 
															-//      derived from this software without specific prior written permission.

														
 
															-// 

														
 
															-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED

														
 
															-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF

														
 
															-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO

														
 
															-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

														
 
															-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

														
 
															-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

														
 
															-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 

														
 
															-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

														
 
															-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF

														
 
															-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

														
 
															-// 

														
 
															-///////////////////////////////////////////////////////////////////////////////

														
 
															-

														
 
															-#ifndef _MSC_VER // [

														
 
															-#error "Use this header only with Microsoft Visual C++ compilers!"

														
 
															-#endif // _MSC_VER ]

														
 
															-

														
 
															-#ifndef _MSC_INTTYPES_H_ // [

														
 
															-#define _MSC_INTTYPES_H_

														
 
															-

														
 
															-#if _MSC_VER > 1000

														
 
															-#pragma once

														
 
															-#endif

														
 
															-

														
 
															-#include "stdint.h"

														
 
															-

														
 
															-// 7.8 Format conversion of integer types

														
 
															-

														
 
															-typedef struct {

														
 
															-   intmax_t quot;

														
 
															-   intmax_t rem;

														
 
															-} imaxdiv_t;

														
 
															-

														
 
															-// 7.8.1 Macros for format specifiers

														
 
															-

														
 
															-#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [   See footnote 185 at page 198

														
 
															-

														
 
															-// The fprintf macros for signed integers are:

														
 
															-#define PRId8       "d"

														
 
															-#define PRIi8       "i"

														
 
															-#define PRIdLEAST8  "d"

														
 
															-#define PRIiLEAST8  "i"

														
 
															-#define PRIdFAST8   "d"

														
 
															-#define PRIiFAST8   "i"

														
 
															-

														
 
															-#define PRId16       "hd"

														
 
															-#define PRIi16       "hi"

														
 
															-#define PRIdLEAST16  "hd"

														
 
															-#define PRIiLEAST16  "hi"

														
 
															-#define PRIdFAST16   "hd"

														
 
															-#define PRIiFAST16   "hi"

														
 
															-

														
 
															-#define PRId32       "I32d"

														
 
															-#define PRIi32       "I32i"

														
 
															-#define PRIdLEAST32  "I32d"

														
 
															-#define PRIiLEAST32  "I32i"

														
 
															-#define PRIdFAST32   "I32d"

														
 
															-#define PRIiFAST32   "I32i"

														
 
															-

														
 
															-#define PRId64       "I64d"

														
 
															-#define PRIi64       "I64i"

														
 
															-#define PRIdLEAST64  "I64d"

														
 
															-#define PRIiLEAST64  "I64i"

														
 
															-#define PRIdFAST64   "I64d"

														
 
															-#define PRIiFAST64   "I64i"

														
 
															-

														
 
															-#define PRIdMAX     "I64d"

														
 
															-#define PRIiMAX     "I64i"

														
 
															-

														
 
															-#define PRIdPTR     "Id"

														
 
															-#define PRIiPTR     "Ii"

														
 
															-

														
 
															-// The fprintf macros for unsigned integers are:

														
 
															-#define PRIo8       "o"

														
 
															-#define PRIu8       "u"

														
 
															-#define PRIx8       "x"

														
 
															-#define PRIX8       "X"

														
 
															-#define PRIoLEAST8  "o"

														
 
															-#define PRIuLEAST8  "u"

														
 
															-#define PRIxLEAST8  "x"

														
 
															-#define PRIXLEAST8  "X"

														
 
															-#define PRIoFAST8   "o"

														
 
															-#define PRIuFAST8   "u"

														
 
															-#define PRIxFAST8   "x"

														
 
															-#define PRIXFAST8   "X"

														
 
															-

														
 
															-#define PRIo16       "ho"

														
 
															-#define PRIu16       "hu"

														
 
															-#define PRIx16       "hx"

														
 
															-#define PRIX16       "hX"

														
 
															-#define PRIoLEAST16  "ho"

														
 
															-#define PRIuLEAST16  "hu"

														
 
															-#define PRIxLEAST16  "hx"

														
 
															-#define PRIXLEAST16  "hX"

														
 
															-#define PRIoFAST16   "ho"

														
 
															-#define PRIuFAST16   "hu"

														
 
															-#define PRIxFAST16   "hx"

														
 
															-#define PRIXFAST16   "hX"

														
 
															-

														
 
															-#define PRIo32       "I32o"

														
 
															-#define PRIu32       "I32u"

														
 
															-#define PRIx32       "I32x"

														
 
															-#define PRIX32       "I32X"

														
 
															-#define PRIoLEAST32  "I32o"

														
 
															-#define PRIuLEAST32  "I32u"

														
 
															-#define PRIxLEAST32  "I32x"

														
 
															-#define PRIXLEAST32  "I32X"

														
 
															-#define PRIoFAST32   "I32o"

														
 
															-#define PRIuFAST32   "I32u"

														
 
															-#define PRIxFAST32   "I32x"

														
 
															-#define PRIXFAST32   "I32X"

														
 
															-

														
 
															-#define PRIo64       "I64o"

														
 
															-#define PRIu64       "I64u"

														
 
															-#define PRIx64       "I64x"

														
 
															-#define PRIX64       "I64X"

														
 
															-#define PRIoLEAST64  "I64o"

														
 
															-#define PRIuLEAST64  "I64u"

														
 
															-#define PRIxLEAST64  "I64x"

														
 
															-#define PRIXLEAST64  "I64X"

														
 
															-#define PRIoFAST64   "I64o"

														
 
															-#define PRIuFAST64   "I64u"

														
 
															-#define PRIxFAST64   "I64x"

														
 
															-#define PRIXFAST64   "I64X"

														
 
															-

														
 
															-#define PRIoMAX     "I64o"

														
 
															-#define PRIuMAX     "I64u"

														
 
															-#define PRIxMAX     "I64x"

														
 
															-#define PRIXMAX     "I64X"

														
 
															-

														
 
															-#define PRIoPTR     "Io"

														
 
															-#define PRIuPTR     "Iu"

														
 
															-#define PRIxPTR     "Ix"

														
 
															-#define PRIXPTR     "IX"

														
 
															-

														
 
															-// The fscanf macros for signed integers are:

														
 
															-#define SCNd8       "d"

														
 
															-#define SCNi8       "i"

														
 
															-#define SCNdLEAST8  "d"

														
 
															-#define SCNiLEAST8  "i"

														
 
															-#define SCNdFAST8   "d"

														
 
															-#define SCNiFAST8   "i"

														
 
															-

														
 
															-#define SCNd16       "hd"

														
 
															-#define SCNi16       "hi"

														
 
															-#define SCNdLEAST16  "hd"

														
 
															-#define SCNiLEAST16  "hi"

														
 
															-#define SCNdFAST16   "hd"

														
 
															-#define SCNiFAST16   "hi"

														
 
															-

														
 
															-#define SCNd32       "ld"

														
 
															-#define SCNi32       "li"

														
 
															-#define SCNdLEAST32  "ld"

														
 
															-#define SCNiLEAST32  "li"

														
 
															-#define SCNdFAST32   "ld"

														
 
															-#define SCNiFAST32   "li"

														
 
															-

														
 
															-#define SCNd64       "I64d"

														
 
															-#define SCNi64       "I64i"

														
 
															-#define SCNdLEAST64  "I64d"

														
 
															-#define SCNiLEAST64  "I64i"

														
 
															-#define SCNdFAST64   "I64d"

														
 
															-#define SCNiFAST64   "I64i"

														
 
															-

														
 
															-#define SCNdMAX     "I64d"

														
 
															-#define SCNiMAX     "I64i"

														
 
															-

														
 
															-#ifdef _WIN64 // [

														
 
															-#  define SCNdPTR     "I64d"

														
 
															-#  define SCNiPTR     "I64i"

														
 
															-#else  // _WIN64 ][

														
 
															-#  define SCNdPTR     "ld"

														
 
															-#  define SCNiPTR     "li"

														
 
															-#endif  // _WIN64 ]

														
 
															-

														
 
															-// The fscanf macros for unsigned integers are:

														
 
															-#define SCNo8       "o"

														
 
															-#define SCNu8       "u"

														
 
															-#define SCNx8       "x"

														
 
															-#define SCNX8       "X"

														
 
															-#define SCNoLEAST8  "o"

														
 
															-#define SCNuLEAST8  "u"

														
 
															-#define SCNxLEAST8  "x"

														
 
															-#define SCNXLEAST8  "X"

														
 
															-#define SCNoFAST8   "o"

														
 
															-#define SCNuFAST8   "u"

														
 
															-#define SCNxFAST8   "x"

														
 
															-#define SCNXFAST8   "X"

														
 
															-

														
 
															-#define SCNo16       "ho"

														
 
															-#define SCNu16       "hu"

														
 
															-#define SCNx16       "hx"

														
 
															-#define SCNX16       "hX"

														
 
															-#define SCNoLEAST16  "ho"

														
 
															-#define SCNuLEAST16  "hu"

														
 
															-#define SCNxLEAST16  "hx"

														
 
															-#define SCNXLEAST16  "hX"

														
 
															-#define SCNoFAST16   "ho"

														
 
															-#define SCNuFAST16   "hu"

														
 
															-#define SCNxFAST16   "hx"

														
 
															-#define SCNXFAST16   "hX"

														
 
															-

														
 
															-#define SCNo32       "lo"

														
 
															-#define SCNu32       "lu"

														
 
															-#define SCNx32       "lx"

														
 
															-#define SCNX32       "lX"

														
 
															-#define SCNoLEAST32  "lo"

														
 
															-#define SCNuLEAST32  "lu"

														
 
															-#define SCNxLEAST32  "lx"

														
 
															-#define SCNXLEAST32  "lX"

														
 
															-#define SCNoFAST32   "lo"

														
 
															-#define SCNuFAST32   "lu"

														
 
															-#define SCNxFAST32   "lx"

														
 
															-#define SCNXFAST32   "lX"

														
 
															-

														
 
															-#define SCNo64       "I64o"

														
 
															-#define SCNu64       "I64u"

														
 
															-#define SCNx64       "I64x"

														
 
															-#define SCNX64       "I64X"

														
 
															-#define SCNoLEAST64  "I64o"

														
 
															-#define SCNuLEAST64  "I64u"

														
 
															-#define SCNxLEAST64  "I64x"

														
 
															-#define SCNXLEAST64  "I64X"

														
 
															-#define SCNoFAST64   "I64o"

														
 
															-#define SCNuFAST64   "I64u"

														
 
															-#define SCNxFAST64   "I64x"

														
 
															-#define SCNXFAST64   "I64X"

														
 
															-

														
 
															-#define SCNoMAX     "I64o"

														
 
															-#define SCNuMAX     "I64u"

														
 
															-#define SCNxMAX     "I64x"

														
 
															-#define SCNXMAX     "I64X"

														
 
															-

														
 
															-#ifdef _WIN64 // [

														
 
															-#  define SCNoPTR     "I64o"

														
 
															-#  define SCNuPTR     "I64u"

														
 
															-#  define SCNxPTR     "I64x"

														
 
															-#  define SCNXPTR     "I64X"

														
 
															-#else  // _WIN64 ][

														
 
															-#  define SCNoPTR     "lo"

														
 
															-#  define SCNuPTR     "lu"

														
 
															-#  define SCNxPTR     "lx"

														
 
															-#  define SCNXPTR     "lX"

														
 
															-#endif  // _WIN64 ]

														
 
															-

														
 
															-#endif // __STDC_FORMAT_MACROS ]

														
 
															-

														
 
															-// 7.8.2 Functions for greatest-width integer types

														
 
															-

														
 
															-// 7.8.2.1 The imaxabs function

														
 
															-#define imaxabs _abs64

														
 
															-

														
 
															-// 7.8.2.2 The imaxdiv function

														
 
															-

														
 
															-// This is modified version of div() function from Microsoft's div.c found

														
 
															-// in %MSVC.NET%\crt\src\div.c

														
 
															-#ifdef STATIC_IMAXDIV // [

														
 
															-static

														
 
															-#else // STATIC_IMAXDIV ][

														
 
															-_inline

														
 
															-#endif // STATIC_IMAXDIV ]

														
 
															-imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)

														
 
															-{

														
 
															-   imaxdiv_t result;

														
 
															-

														
 
															-   result.quot = numer / denom;

														
 
															-   result.rem = numer % denom;

														
 
															-

														
 
															-   if (numer < 0 && result.rem > 0) {

														
 
															-      // did division wrong; must fix up

														
 
															-      ++result.quot;

														
 
															-      result.rem -= denom;

														
 
															-   }

														
 
															-

														
 
															-   return result;

														
 
															-}

														
 
															-

														
 
															-// 7.8.2.3 The strtoimax and strtoumax functions

														
 
															-#define strtoimax _strtoi64

														
 
															-#define strtoumax _strtoui64

														
 
															-

														
 
															-// 7.8.2.4 The wcstoimax and wcstoumax functions

														
 
															-#define wcstoimax _wcstoi64

														
 
															-#define wcstoumax _wcstoui64

														
 
															-

														
 
															-

														
 
															-#endif // _MSC_INTTYPES_H_ ]

														
 
															+// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
														
 
															+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
														
 
															+// 
														
 
															+//  Copyright (c) 2006 Alexander Chemeris
														
 
															+// 
														
 
															+// Redistribution and use in source and binary forms, with or without
														
 
															+// modification, are permitted provided that the following conditions are met:
														
 
															+// 
														
 
															+//   1. Redistributions of source code must retain the above copyright notice,
														
 
															+//      this list of conditions and the following disclaimer.
														
 
															+// 
														
 
															+//   2. Redistributions in binary form must reproduce the above copyright
														
 
															+//      notice, this list of conditions and the following disclaimer in the
														
 
															+//      documentation and/or other materials provided with the distribution.
														
 
															+// 
														
 
															+//   3. The name of the author may be used to endorse or promote products
														
 
															+//      derived from this software without specific prior written permission.
														
 
															+// 
														
 
															+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
														
 
															+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
														
 
															+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
														
 
															+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
														
 
															+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
														
 
															+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
														
 
															+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
														
 
															+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
														
 
															+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															+// 
														
 
															+///////////////////////////////////////////////////////////////////////////////
														
 
															+
														
 
															+#ifndef _MSC_VER // [
														
 
															+#error "Use this header only with Microsoft Visual C++ compilers!"
														
 
															+#endif // _MSC_VER ]
														
 
															+
														
 
															+#ifndef _MSC_INTTYPES_H_ // [
														
 
															+#define _MSC_INTTYPES_H_
														
 
															+
														
 
															+#if _MSC_VER > 1000
														
 
															+#pragma once
														
 
															+#endif
														
 
															+
														
 
															+#include "stdint.h"
														
 
															+
														
 
															+// 7.8 Format conversion of integer types
														
 
															+
														
 
															+typedef struct {
														
 
															+   intmax_t quot;
														
 
															+   intmax_t rem;
														
 
															+} imaxdiv_t;
														
 
															+
														
 
															+// 7.8.1 Macros for format specifiers
														
 
															+
														
 
															+#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [   See footnote 185 at page 198
														
 
															+
														
 
															+// The fprintf macros for signed integers are:
														
 
															+#define PRId8       "d"
														
 
															+#define PRIi8       "i"
														
 
															+#define PRIdLEAST8  "d"
														
 
															+#define PRIiLEAST8  "i"
														
 
															+#define PRIdFAST8   "d"
														
 
															+#define PRIiFAST8   "i"
														
 
															+
														
 
															+#define PRId16       "hd"
														
 
															+#define PRIi16       "hi"
														
 
															+#define PRIdLEAST16  "hd"
														
 
															+#define PRIiLEAST16  "hi"
														
 
															+#define PRIdFAST16   "hd"
														
 
															+#define PRIiFAST16   "hi"
														
 
															+
														
 
															+#define PRId32       "I32d"
														
 
															+#define PRIi32       "I32i"
														
 
															+#define PRIdLEAST32  "I32d"
														
 
															+#define PRIiLEAST32  "I32i"
														
 
															+#define PRIdFAST32   "I32d"
														
 
															+#define PRIiFAST32   "I32i"
														
 
															+
														
 
															+#define PRId64       "I64d"
														
 
															+#define PRIi64       "I64i"
														
 
															+#define PRIdLEAST64  "I64d"
														
 
															+#define PRIiLEAST64  "I64i"
														
 
															+#define PRIdFAST64   "I64d"
														
 
															+#define PRIiFAST64   "I64i"
														
 
															+
														
 
															+#define PRIdMAX     "I64d"
														
 
															+#define PRIiMAX     "I64i"
														
 
															+
														
 
															+#define PRIdPTR     "Id"
														
 
															+#define PRIiPTR     "Ii"
														
 
															+
														
 
															+// The fprintf macros for unsigned integers are:
														
 
															+#define PRIo8       "o"
														
 
															+#define PRIu8       "u"
														
 
															+#define PRIx8       "x"
														
 
															+#define PRIX8       "X"
														
 
															+#define PRIoLEAST8  "o"
														
 
															+#define PRIuLEAST8  "u"
														
 
															+#define PRIxLEAST8  "x"
														
 
															+#define PRIXLEAST8  "X"
														
 
															+#define PRIoFAST8   "o"
														
 
															+#define PRIuFAST8   "u"
														
 
															+#define PRIxFAST8   "x"
														
 
															+#define PRIXFAST8   "X"
														
 
															+
														
 
															+#define PRIo16       "ho"
														
 
															+#define PRIu16       "hu"
														
 
															+#define PRIx16       "hx"
														
 
															+#define PRIX16       "hX"
														
 
															+#define PRIoLEAST16  "ho"
														
 
															+#define PRIuLEAST16  "hu"
														
 
															+#define PRIxLEAST16  "hx"
														
 
															+#define PRIXLEAST16  "hX"
														
 
															+#define PRIoFAST16   "ho"
														
 
															+#define PRIuFAST16   "hu"
														
 
															+#define PRIxFAST16   "hx"
														
 
															+#define PRIXFAST16   "hX"
														
 
															+
														
 
															+#define PRIo32       "I32o"
														
 
															+#define PRIu32       "I32u"
														
 
															+#define PRIx32       "I32x"
														
 
															+#define PRIX32       "I32X"
														
 
															+#define PRIoLEAST32  "I32o"
														
 
															+#define PRIuLEAST32  "I32u"
														
 
															+#define PRIxLEAST32  "I32x"
														
 
															+#define PRIXLEAST32  "I32X"
														
 
															+#define PRIoFAST32   "I32o"
														
 
															+#define PRIuFAST32   "I32u"
														
 
															+#define PRIxFAST32   "I32x"
														
 
															+#define PRIXFAST32   "I32X"
														
 
															+
														
 
															+#define PRIo64       "I64o"
														
 
															+#define PRIu64       "I64u"
														
 
															+#define PRIx64       "I64x"
														
 
															+#define PRIX64       "I64X"
														
 
															+#define PRIoLEAST64  "I64o"
														
 
															+#define PRIuLEAST64  "I64u"
														
 
															+#define PRIxLEAST64  "I64x"
														
 
															+#define PRIXLEAST64  "I64X"
														
 
															+#define PRIoFAST64   "I64o"
														
 
															+#define PRIuFAST64   "I64u"
														
 
															+#define PRIxFAST64   "I64x"
														
 
															+#define PRIXFAST64   "I64X"
														
 
															+
														
 
															+#define PRIoMAX     "I64o"
														
 
															+#define PRIuMAX     "I64u"
														
 
															+#define PRIxMAX     "I64x"
														
 
															+#define PRIXMAX     "I64X"
														
 
															+
														
 
															+#define PRIoPTR     "Io"
														
 
															+#define PRIuPTR     "Iu"
														
 
															+#define PRIxPTR     "Ix"
														
 
															+#define PRIXPTR     "IX"
														
 
															+
														
 
															+// The fscanf macros for signed integers are:
														
 
															+#define SCNd8       "d"
														
 
															+#define SCNi8       "i"
														
 
															+#define SCNdLEAST8  "d"
														
 
															+#define SCNiLEAST8  "i"
														
 
															+#define SCNdFAST8   "d"
														
 
															+#define SCNiFAST8   "i"
														
 
															+
														
 
															+#define SCNd16       "hd"
														
 
															+#define SCNi16       "hi"
														
 
															+#define SCNdLEAST16  "hd"
														
 
															+#define SCNiLEAST16  "hi"
														
 
															+#define SCNdFAST16   "hd"
														
 
															+#define SCNiFAST16   "hi"
														
 
															+
														
 
															+#define SCNd32       "ld"
														
 
															+#define SCNi32       "li"
														
 
															+#define SCNdLEAST32  "ld"
														
 
															+#define SCNiLEAST32  "li"
														
 
															+#define SCNdFAST32   "ld"
														
 
															+#define SCNiFAST32   "li"
														
 
															+
														
 
															+#define SCNd64       "I64d"
														
 
															+#define SCNi64       "I64i"
														
 
															+#define SCNdLEAST64  "I64d"
														
 
															+#define SCNiLEAST64  "I64i"
														
 
															+#define SCNdFAST64   "I64d"
														
 
															+#define SCNiFAST64   "I64i"
														
 
															+
														
 
															+#define SCNdMAX     "I64d"
														
 
															+#define SCNiMAX     "I64i"
														
 
															+
														
 
															+#ifdef _WIN64 // [
														
 
															+#  define SCNdPTR     "I64d"
														
 
															+#  define SCNiPTR     "I64i"
														
 
															+#else  // _WIN64 ][
														
 
															+#  define SCNdPTR     "ld"
														
 
															+#  define SCNiPTR     "li"
														
 
															+#endif  // _WIN64 ]
														
 
															+
														
 
															+// The fscanf macros for unsigned integers are:
														
 
															+#define SCNo8       "o"
														
 
															+#define SCNu8       "u"
														
 
															+#define SCNx8       "x"
														
 
															+#define SCNX8       "X"
														
 
															+#define SCNoLEAST8  "o"
														
 
															+#define SCNuLEAST8  "u"
														
 
															+#define SCNxLEAST8  "x"
														
 
															+#define SCNXLEAST8  "X"
														
 
															+#define SCNoFAST8   "o"
														
 
															+#define SCNuFAST8   "u"
														
 
															+#define SCNxFAST8   "x"
														
 
															+#define SCNXFAST8   "X"
														
 
															+
														
 
															+#define SCNo16       "ho"
														
 
															+#define SCNu16       "hu"
														
 
															+#define SCNx16       "hx"
														
 
															+#define SCNX16       "hX"
														
 
															+#define SCNoLEAST16  "ho"
														
 
															+#define SCNuLEAST16  "hu"
														
 
															+#define SCNxLEAST16  "hx"
														
 
															+#define SCNXLEAST16  "hX"
														
 
															+#define SCNoFAST16   "ho"
														
 
															+#define SCNuFAST16   "hu"
														
 
															+#define SCNxFAST16   "hx"
														
 
															+#define SCNXFAST16   "hX"
														
 
															+
														
 
															+#define SCNo32       "lo"
														
 
															+#define SCNu32       "lu"
														
 
															+#define SCNx32       "lx"
														
 
															+#define SCNX32       "lX"
														
 
															+#define SCNoLEAST32  "lo"
														
 
															+#define SCNuLEAST32  "lu"
														
 
															+#define SCNxLEAST32  "lx"
														
 
															+#define SCNXLEAST32  "lX"
														
 
															+#define SCNoFAST32   "lo"
														
 
															+#define SCNuFAST32   "lu"
														
 
															+#define SCNxFAST32   "lx"
														
 
															+#define SCNXFAST32   "lX"
														
 
															+
														
 
															+#define SCNo64       "I64o"
														
 
															+#define SCNu64       "I64u"
														
 
															+#define SCNx64       "I64x"
														
 
															+#define SCNX64       "I64X"
														
 
															+#define SCNoLEAST64  "I64o"
														
 
															+#define SCNuLEAST64  "I64u"
														
 
															+#define SCNxLEAST64  "I64x"
														
 
															+#define SCNXLEAST64  "I64X"
														
 
															+#define SCNoFAST64   "I64o"
														
 
															+#define SCNuFAST64   "I64u"
														
 
															+#define SCNxFAST64   "I64x"
														
 
															+#define SCNXFAST64   "I64X"
														
 
															+
														
 
															+#define SCNoMAX     "I64o"
														
 
															+#define SCNuMAX     "I64u"
														
 
															+#define SCNxMAX     "I64x"
														
 
															+#define SCNXMAX     "I64X"
														
 
															+
														
 
															+#ifdef _WIN64 // [
														
 
															+#  define SCNoPTR     "I64o"
														
 
															+#  define SCNuPTR     "I64u"
														
 
															+#  define SCNxPTR     "I64x"
														
 
															+#  define SCNXPTR     "I64X"
														
 
															+#else  // _WIN64 ][
														
 
															+#  define SCNoPTR     "lo"
														
 
															+#  define SCNuPTR     "lu"
														
 
															+#  define SCNxPTR     "lx"
														
 
															+#  define SCNXPTR     "lX"
														
 
															+#endif  // _WIN64 ]
														
 
															+
														
 
															+#endif // __STDC_FORMAT_MACROS ]
														
 
															+
														
 
															+// 7.8.2 Functions for greatest-width integer types
														
 
															+
														
 
															+// 7.8.2.1 The imaxabs function
														
 
															+#define imaxabs _abs64
														
 
															+
														
 
															+// 7.8.2.2 The imaxdiv function
														
 
															+
														
 
															+// This is modified version of div() function from Microsoft's div.c found
														
 
															+// in %MSVC.NET%\crt\src\div.c
														
 
															+#ifdef STATIC_IMAXDIV // [
														
 
															+static
														
 
															+#else // STATIC_IMAXDIV ][
														
 
															+_inline
														
 
															+#endif // STATIC_IMAXDIV ]
														
 
															+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
														
 
															+{
														
 
															+   imaxdiv_t result;
														
 
															+
														
 
															+   result.quot = numer / denom;
														
 
															+   result.rem = numer % denom;
														
 
															+
														
 
															+   if (numer < 0 && result.rem > 0) {
														
 
															+      // did division wrong; must fix up
														
 
															+      ++result.quot;
														
 
															+      result.rem -= denom;
														
 
															+   }
														
 
															+
														
 
															+   return result;
														
 
															+}
														
 
															+
														
 
															+// 7.8.2.3 The strtoimax and strtoumax functions
														
 
															+#define strtoimax _strtoi64
														
 
															+#define strtoumax _strtoui64
														
 
															+
														
 
															+// 7.8.2.4 The wcstoimax and wcstoumax functions
														
 
															+#define wcstoimax _wcstoi64
														
 
															+#define wcstoumax _wcstoui64
														
 
															+
														
 
															+
														
 
															+#endif // _MSC_INTTYPES_H_ ]
														
--- a/include/compat/msvc/stdint.h
+++ b/include/compat/msvc/stdint.h
@@ -1,247 +1,247 @@
 
															-// ISO C9x  compliant stdint.h for Microsoft Visual Studio

														
 
															-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 

														
 
															-// 

														
 
															-//  Copyright (c) 2006-2008 Alexander Chemeris

														
 
															-// 

														
 
															-// Redistribution and use in source and binary forms, with or without

														
 
															-// modification, are permitted provided that the following conditions are met:

														
 
															-// 

														
 
															-//   1. Redistributions of source code must retain the above copyright notice,

														
 
															-//      this list of conditions and the following disclaimer.

														
 
															-// 

														
 
															-//   2. Redistributions in binary form must reproduce the above copyright

														
 
															-//      notice, this list of conditions and the following disclaimer in the

														
 
															-//      documentation and/or other materials provided with the distribution.

														
 
															-// 

														
 
															-//   3. The name of the author may be used to endorse or promote products

														
 
															-//      derived from this software without specific prior written permission.

														
 
															-// 

														
 
															-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED

														
 
															-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF

														
 
															-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO

														
 
															-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

														
 
															-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

														
 
															-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

														
 
															-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 

														
 
															-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

														
 
															-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF

														
 
															-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

														
 
															-// 

														
 
															-///////////////////////////////////////////////////////////////////////////////

														
 
															-

														
 
															-#ifndef _MSC_VER // [

														
 
															-#error "Use this header only with Microsoft Visual C++ compilers!"

														
 
															-#endif // _MSC_VER ]

														
 
															-

														
 
															-#ifndef _MSC_STDINT_H_ // [

														
 
															-#define _MSC_STDINT_H_

														
 
															-

														
 
															-#if _MSC_VER > 1000

														
 
															-#pragma once

														
 
															-#endif

														
 
															-

														
 
															-#include <limits.h>

														
 
															-

														
 
															-// For Visual Studio 6 in C++ mode and for many Visual Studio versions when

														
 
															-// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'

														
 
															-// or compiler give many errors like this:

														
 
															-//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed

														
 
															-#ifdef __cplusplus

														
 
															-extern "C" {

														
 
															-#endif

														
 
															-#  include <wchar.h>

														
 
															-#ifdef __cplusplus

														
 
															-}

														
 
															-#endif

														
 
															-

														
 
															-// Define _W64 macros to mark types changing their size, like intptr_t.

														
 
															-#ifndef _W64

														
 
															-#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300

														
 
															-#     define _W64 __w64

														
 
															-#  else

														
 
															-#     define _W64

														
 
															-#  endif

														
 
															-#endif

														
 
															-

														
 
															-

														
 
															-// 7.18.1 Integer types

														
 
															-

														
 
															-// 7.18.1.1 Exact-width integer types

														
 
															-

														
 
															-// Visual Studio 6 and Embedded Visual C++ 4 doesn't

														
 
															-// realize that, e.g. char has the same size as __int8

														
 
															-// so we give up on __intX for them.

														
 
															-#if (_MSC_VER < 1300)

														
 
															-   typedef signed char       int8_t;

														
 
															-   typedef signed short      int16_t;

														
 
															-   typedef signed int        int32_t;

														
 
															-   typedef unsigned char     uint8_t;

														
 
															-   typedef unsigned short    uint16_t;

														
 
															-   typedef unsigned int      uint32_t;

														
 
															-#else

														
 
															-   typedef signed __int8     int8_t;

														
 
															-   typedef signed __int16    int16_t;

														
 
															-   typedef signed __int32    int32_t;

														
 
															-   typedef unsigned __int8   uint8_t;

														
 
															-   typedef unsigned __int16  uint16_t;

														
 
															-   typedef unsigned __int32  uint32_t;

														
 
															-#endif

														
 
															-typedef signed __int64       int64_t;

														
 
															-typedef unsigned __int64     uint64_t;

														
 
															-

														
 
															-

														
 
															-// 7.18.1.2 Minimum-width integer types

														
 
															-typedef int8_t    int_least8_t;

														
 
															-typedef int16_t   int_least16_t;

														
 
															-typedef int32_t   int_least32_t;

														
 
															-typedef int64_t   int_least64_t;

														
 
															-typedef uint8_t   uint_least8_t;

														
 
															-typedef uint16_t  uint_least16_t;

														
 
															-typedef uint32_t  uint_least32_t;

														
 
															-typedef uint64_t  uint_least64_t;

														
 
															-

														
 
															-// 7.18.1.3 Fastest minimum-width integer types

														
 
															-typedef int8_t    int_fast8_t;

														
 
															-typedef int16_t   int_fast16_t;

														
 
															-typedef int32_t   int_fast32_t;

														
 
															-typedef int64_t   int_fast64_t;

														
 
															-typedef uint8_t   uint_fast8_t;

														
 
															-typedef uint16_t  uint_fast16_t;

														
 
															-typedef uint32_t  uint_fast32_t;

														
 
															-typedef uint64_t  uint_fast64_t;

														
 
															-

														
 
															-// 7.18.1.4 Integer types capable of holding object pointers

														
 
															-#ifdef _WIN64 // [

														
 
															-   typedef signed __int64    intptr_t;

														
 
															-   typedef unsigned __int64  uintptr_t;

														
 
															-#else // _WIN64 ][

														
 
															-   typedef _W64 signed int   intptr_t;

														
 
															-   typedef _W64 unsigned int uintptr_t;

														
 
															-#endif // _WIN64 ]

														
 
															-

														
 
															-// 7.18.1.5 Greatest-width integer types

														
 
															-typedef int64_t   intmax_t;

														
 
															-typedef uint64_t  uintmax_t;

														
 
															-

														
 
															-

														
 
															-// 7.18.2 Limits of specified-width integer types

														
 
															-

														
 
															-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259

														
 
															-

														
 
															-// 7.18.2.1 Limits of exact-width integer types

														
 
															-#define INT8_MIN     ((int8_t)_I8_MIN)

														
 
															-#define INT8_MAX     _I8_MAX

														
 
															-#define INT16_MIN    ((int16_t)_I16_MIN)

														
 
															-#define INT16_MAX    _I16_MAX

														
 
															-#define INT32_MIN    ((int32_t)_I32_MIN)

														
 
															-#define INT32_MAX    _I32_MAX

														
 
															-#define INT64_MIN    ((int64_t)_I64_MIN)

														
 
															-#define INT64_MAX    _I64_MAX

														
 
															-#define UINT8_MAX    _UI8_MAX

														
 
															-#define UINT16_MAX   _UI16_MAX

														
 
															-#define UINT32_MAX   _UI32_MAX

														
 
															-#define UINT64_MAX   _UI64_MAX

														
 
															-

														
 
															-// 7.18.2.2 Limits of minimum-width integer types

														
 
															-#define INT_LEAST8_MIN    INT8_MIN

														
 
															-#define INT_LEAST8_MAX    INT8_MAX

														
 
															-#define INT_LEAST16_MIN   INT16_MIN

														
 
															-#define INT_LEAST16_MAX   INT16_MAX

														
 
															-#define INT_LEAST32_MIN   INT32_MIN

														
 
															-#define INT_LEAST32_MAX   INT32_MAX

														
 
															-#define INT_LEAST64_MIN   INT64_MIN

														
 
															-#define INT_LEAST64_MAX   INT64_MAX

														
 
															-#define UINT_LEAST8_MAX   UINT8_MAX

														
 
															-#define UINT_LEAST16_MAX  UINT16_MAX

														
 
															-#define UINT_LEAST32_MAX  UINT32_MAX

														
 
															-#define UINT_LEAST64_MAX  UINT64_MAX

														
 
															-

														
 
															-// 7.18.2.3 Limits of fastest minimum-width integer types

														
 
															-#define INT_FAST8_MIN    INT8_MIN

														
 
															-#define INT_FAST8_MAX    INT8_MAX

														
 
															-#define INT_FAST16_MIN   INT16_MIN

														
 
															-#define INT_FAST16_MAX   INT16_MAX

														
 
															-#define INT_FAST32_MIN   INT32_MIN

														
 
															-#define INT_FAST32_MAX   INT32_MAX

														
 
															-#define INT_FAST64_MIN   INT64_MIN

														
 
															-#define INT_FAST64_MAX   INT64_MAX

														
 
															-#define UINT_FAST8_MAX   UINT8_MAX

														
 
															-#define UINT_FAST16_MAX  UINT16_MAX

														
 
															-#define UINT_FAST32_MAX  UINT32_MAX

														
 
															-#define UINT_FAST64_MAX  UINT64_MAX

														
 
															-

														
 
															-// 7.18.2.4 Limits of integer types capable of holding object pointers

														
 
															-#ifdef _WIN64 // [

														
 
															-#  define INTPTR_MIN   INT64_MIN

														
 
															-#  define INTPTR_MAX   INT64_MAX

														
 
															-#  define UINTPTR_MAX  UINT64_MAX

														
 
															-#else // _WIN64 ][

														
 
															-#  define INTPTR_MIN   INT32_MIN

														
 
															-#  define INTPTR_MAX   INT32_MAX

														
 
															-#  define UINTPTR_MAX  UINT32_MAX

														
 
															-#endif // _WIN64 ]

														
 
															-

														
 
															-// 7.18.2.5 Limits of greatest-width integer types

														
 
															-#define INTMAX_MIN   INT64_MIN

														
 
															-#define INTMAX_MAX   INT64_MAX

														
 
															-#define UINTMAX_MAX  UINT64_MAX

														
 
															-

														
 
															-// 7.18.3 Limits of other integer types

														
 
															-

														
 
															-#ifdef _WIN64 // [

														
 
															-#  define PTRDIFF_MIN  _I64_MIN

														
 
															-#  define PTRDIFF_MAX  _I64_MAX

														
 
															-#else  // _WIN64 ][

														
 
															-#  define PTRDIFF_MIN  _I32_MIN

														
 
															-#  define PTRDIFF_MAX  _I32_MAX

														
 
															-#endif  // _WIN64 ]

														
 
															-

														
 
															-#define SIG_ATOMIC_MIN  INT_MIN

														
 
															-#define SIG_ATOMIC_MAX  INT_MAX

														
 
															-

														
 
															-#ifndef SIZE_MAX // [

														
 
															-#  ifdef _WIN64 // [

														
 
															-#     define SIZE_MAX  _UI64_MAX

														
 
															-#  else // _WIN64 ][

														
 
															-#     define SIZE_MAX  _UI32_MAX

														
 
															-#  endif // _WIN64 ]

														
 
															-#endif // SIZE_MAX ]

														
 
															-

														
 
															-// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>

														
 
															-#ifndef WCHAR_MIN // [

														
 
															-#  define WCHAR_MIN  0

														
 
															-#endif  // WCHAR_MIN ]

														
 
															-#ifndef WCHAR_MAX // [

														
 
															-#  define WCHAR_MAX  _UI16_MAX

														
 
															-#endif  // WCHAR_MAX ]

														
 
															-

														
 
															-#define WINT_MIN  0

														
 
															-#define WINT_MAX  _UI16_MAX

														
 
															-

														
 
															-#endif // __STDC_LIMIT_MACROS ]

														
 
															-

														
 
															-

														
 
															-// 7.18.4 Limits of other integer types

														
 
															-

														
 
															-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260

														
 
															-

														
 
															-// 7.18.4.1 Macros for minimum-width integer constants

														
 
															-

														
 
															-#define INT8_C(val)  val##i8

														
 
															-#define INT16_C(val) val##i16

														
 
															-#define INT32_C(val) val##i32

														
 
															-#define INT64_C(val) val##i64

														
 
															-

														
 
															-#define UINT8_C(val)  val##ui8

														
 
															-#define UINT16_C(val) val##ui16

														
 
															-#define UINT32_C(val) val##ui32

														
 
															-#define UINT64_C(val) val##ui64

														
 
															-

														
 
															-// 7.18.4.2 Macros for greatest-width integer constants

														
 
															-#define INTMAX_C   INT64_C

														
 
															-#define UINTMAX_C  UINT64_C

														
 
															-

														
 
															-#endif // __STDC_CONSTANT_MACROS ]

														
 
															-

														
 
															-

														
 
															-#endif // _MSC_STDINT_H_ ]

														
 
															+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
														
 
															+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
														
 
															+// 
														
 
															+//  Copyright (c) 2006-2008 Alexander Chemeris
														
 
															+// 
														
 
															+// Redistribution and use in source and binary forms, with or without
														
 
															+// modification, are permitted provided that the following conditions are met:
														
 
															+// 
														
 
															+//   1. Redistributions of source code must retain the above copyright notice,
														
 
															+//      this list of conditions and the following disclaimer.
														
 
															+// 
														
 
															+//   2. Redistributions in binary form must reproduce the above copyright
														
 
															+//      notice, this list of conditions and the following disclaimer in the
														
 
															+//      documentation and/or other materials provided with the distribution.
														
 
															+// 
														
 
															+//   3. The name of the author may be used to endorse or promote products
														
 
															+//      derived from this software without specific prior written permission.
														
 
															+// 
														
 
															+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
														
 
															+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
														
 
															+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
														
 
															+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
														
 
															+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
														
 
															+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
														
 
															+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
														
 
															+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
														
 
															+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
														
 
															+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															+// 
														
 
															+///////////////////////////////////////////////////////////////////////////////
														
 
															+
														
 
															+#ifndef _MSC_VER // [
														
 
															+#error "Use this header only with Microsoft Visual C++ compilers!"
														
 
															+#endif // _MSC_VER ]
														
 
															+
														
 
															+#ifndef _MSC_STDINT_H_ // [
														
 
															+#define _MSC_STDINT_H_
														
 
															+
														
 
															+#if _MSC_VER > 1000
														
 
															+#pragma once
														
 
															+#endif
														
 
															+
														
 
															+#include <limits.h>
														
 
															+
														
 
															+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
														
 
															+// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
														
 
															+// or compiler give many errors like this:
														
 
															+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
														
 
															+#ifdef __cplusplus
														
 
															+extern "C" {
														
 
															+#endif
														
 
															+#  include <wchar.h>
														
 
															+#ifdef __cplusplus
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+// Define _W64 macros to mark types changing their size, like intptr_t.
														
 
															+#ifndef _W64
														
 
															+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
														
 
															+#     define _W64 __w64
														
 
															+#  else
														
 
															+#     define _W64
														
 
															+#  endif
														
 
															+#endif
														
 
															+
														
 
															+
														
 
															+// 7.18.1 Integer types
														
 
															+
														
 
															+// 7.18.1.1 Exact-width integer types
														
 
															+
														
 
															+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
														
 
															+// realize that, e.g. char has the same size as __int8
														
 
															+// so we give up on __intX for them.
														
 
															+#if (_MSC_VER < 1300)
														
 
															+   typedef signed char       int8_t;
														
 
															+   typedef signed short      int16_t;
														
 
															+   typedef signed int        int32_t;
														
 
															+   typedef unsigned char     uint8_t;
														
 
															+   typedef unsigned short    uint16_t;
														
 
															+   typedef unsigned int      uint32_t;
														
 
															+#else
														
 
															+   typedef signed __int8     int8_t;
														
 
															+   typedef signed __int16    int16_t;
														
 
															+   typedef signed __int32    int32_t;
														
 
															+   typedef unsigned __int8   uint8_t;
														
 
															+   typedef unsigned __int16  uint16_t;
														
 
															+   typedef unsigned __int32  uint32_t;
														
 
															+#endif
														
 
															+typedef signed __int64       int64_t;
														
 
															+typedef unsigned __int64     uint64_t;
														
 
															+
														
 
															+
														
 
															+// 7.18.1.2 Minimum-width integer types
														
 
															+typedef int8_t    int_least8_t;
														
 
															+typedef int16_t   int_least16_t;
														
 
															+typedef int32_t   int_least32_t;
														
 
															+typedef int64_t   int_least64_t;
														
 
															+typedef uint8_t   uint_least8_t;
														
 
															+typedef uint16_t  uint_least16_t;
														
 
															+typedef uint32_t  uint_least32_t;
														
 
															+typedef uint64_t  uint_least64_t;
														
 
															+
														
 
															+// 7.18.1.3 Fastest minimum-width integer types
														
 
															+typedef int8_t    int_fast8_t;
														
 
															+typedef int16_t   int_fast16_t;
														
 
															+typedef int32_t   int_fast32_t;
														
 
															+typedef int64_t   int_fast64_t;
														
 
															+typedef uint8_t   uint_fast8_t;
														
 
															+typedef uint16_t  uint_fast16_t;
														
 
															+typedef uint32_t  uint_fast32_t;
														
 
															+typedef uint64_t  uint_fast64_t;
														
 
															+
														
 
															+// 7.18.1.4 Integer types capable of holding object pointers
														
 
															+#ifdef _WIN64 // [
														
 
															+   typedef signed __int64    intptr_t;
														
 
															+   typedef unsigned __int64  uintptr_t;
														
 
															+#else // _WIN64 ][
														
 
															+   typedef _W64 signed int   intptr_t;
														
 
															+   typedef _W64 unsigned int uintptr_t;
														
 
															+#endif // _WIN64 ]
														
 
															+
														
 
															+// 7.18.1.5 Greatest-width integer types
														
 
															+typedef int64_t   intmax_t;
														
 
															+typedef uint64_t  uintmax_t;
														
 
															+
														
 
															+
														
 
															+// 7.18.2 Limits of specified-width integer types
														
 
															+
														
 
															+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
														
 
															+
														
 
															+// 7.18.2.1 Limits of exact-width integer types
														
 
															+#define INT8_MIN     ((int8_t)_I8_MIN)
														
 
															+#define INT8_MAX     _I8_MAX
														
 
															+#define INT16_MIN    ((int16_t)_I16_MIN)
														
 
															+#define INT16_MAX    _I16_MAX
														
 
															+#define INT32_MIN    ((int32_t)_I32_MIN)
														
 
															+#define INT32_MAX    _I32_MAX
														
 
															+#define INT64_MIN    ((int64_t)_I64_MIN)
														
 
															+#define INT64_MAX    _I64_MAX
														
 
															+#define UINT8_MAX    _UI8_MAX
														
 
															+#define UINT16_MAX   _UI16_MAX
														
 
															+#define UINT32_MAX   _UI32_MAX
														
 
															+#define UINT64_MAX   _UI64_MAX
														
 
															+
														
 
															+// 7.18.2.2 Limits of minimum-width integer types
														
 
															+#define INT_LEAST8_MIN    INT8_MIN
														
 
															+#define INT_LEAST8_MAX    INT8_MAX
														
 
															+#define INT_LEAST16_MIN   INT16_MIN
														
 
															+#define INT_LEAST16_MAX   INT16_MAX
														
 
															+#define INT_LEAST32_MIN   INT32_MIN
														
 
															+#define INT_LEAST32_MAX   INT32_MAX
														
 
															+#define INT_LEAST64_MIN   INT64_MIN
														
 
															+#define INT_LEAST64_MAX   INT64_MAX
														
 
															+#define UINT_LEAST8_MAX   UINT8_MAX
														
 
															+#define UINT_LEAST16_MAX  UINT16_MAX
														
 
															+#define UINT_LEAST32_MAX  UINT32_MAX
														
 
															+#define UINT_LEAST64_MAX  UINT64_MAX
														
 
															+
														
 
															+// 7.18.2.3 Limits of fastest minimum-width integer types
														
 
															+#define INT_FAST8_MIN    INT8_MIN
														
 
															+#define INT_FAST8_MAX    INT8_MAX
														
 
															+#define INT_FAST16_MIN   INT16_MIN
														
 
															+#define INT_FAST16_MAX   INT16_MAX
														
 
															+#define INT_FAST32_MIN   INT32_MIN
														
 
															+#define INT_FAST32_MAX   INT32_MAX
														
 
															+#define INT_FAST64_MIN   INT64_MIN
														
 
															+#define INT_FAST64_MAX   INT64_MAX
														
 
															+#define UINT_FAST8_MAX   UINT8_MAX
														
 
															+#define UINT_FAST16_MAX  UINT16_MAX
														
 
															+#define UINT_FAST32_MAX  UINT32_MAX
														
 
															+#define UINT_FAST64_MAX  UINT64_MAX
														
 
															+
														
 
															+// 7.18.2.4 Limits of integer types capable of holding object pointers
														
 
															+#ifdef _WIN64 // [
														
 
															+#  define INTPTR_MIN   INT64_MIN
														
 
															+#  define INTPTR_MAX   INT64_MAX
														
 
															+#  define UINTPTR_MAX  UINT64_MAX
														
 
															+#else // _WIN64 ][
														
 
															+#  define INTPTR_MIN   INT32_MIN
														
 
															+#  define INTPTR_MAX   INT32_MAX
														
 
															+#  define UINTPTR_MAX  UINT32_MAX
														
 
															+#endif // _WIN64 ]
														
 
															+
														
 
															+// 7.18.2.5 Limits of greatest-width integer types
														
 
															+#define INTMAX_MIN   INT64_MIN
														
 
															+#define INTMAX_MAX   INT64_MAX
														
 
															+#define UINTMAX_MAX  UINT64_MAX
														
 
															+
														
 
															+// 7.18.3 Limits of other integer types
														
 
															+
														
 
															+#ifdef _WIN64 // [
														
 
															+#  define PTRDIFF_MIN  _I64_MIN
														
 
															+#  define PTRDIFF_MAX  _I64_MAX
														
 
															+#else  // _WIN64 ][
														
 
															+#  define PTRDIFF_MIN  _I32_MIN
														
 
															+#  define PTRDIFF_MAX  _I32_MAX
														
 
															+#endif  // _WIN64 ]
														
 
															+
														
 
															+#define SIG_ATOMIC_MIN  INT_MIN
														
 
															+#define SIG_ATOMIC_MAX  INT_MAX
														
 
															+
														
 
															+#ifndef SIZE_MAX // [
														
 
															+#  ifdef _WIN64 // [
														
 
															+#     define SIZE_MAX  _UI64_MAX
														
 
															+#  else // _WIN64 ][
														
 
															+#     define SIZE_MAX  _UI32_MAX
														
 
															+#  endif // _WIN64 ]
														
 
															+#endif // SIZE_MAX ]
														
 
															+
														
 
															+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
														
 
															+#ifndef WCHAR_MIN // [
														
 
															+#  define WCHAR_MIN  0
														
 
															+#endif  // WCHAR_MIN ]
														
 
															+#ifndef WCHAR_MAX // [
														
 
															+#  define WCHAR_MAX  _UI16_MAX
														
 
															+#endif  // WCHAR_MAX ]
														
 
															+
														
 
															+#define WINT_MIN  0
														
 
															+#define WINT_MAX  _UI16_MAX
														
 
															+
														
 
															+#endif // __STDC_LIMIT_MACROS ]
														
 
															+
														
 
															+
														
 
															+// 7.18.4 Limits of other integer types
														
 
															+
														
 
															+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
														
 
															+
														
 
															+// 7.18.4.1 Macros for minimum-width integer constants
														
 
															+
														
 
															+#define INT8_C(val)  val##i8
														
 
															+#define INT16_C(val) val##i16
														
 
															+#define INT32_C(val) val##i32
														
 
															+#define INT64_C(val) val##i64
														
 
															+
														
 
															+#define UINT8_C(val)  val##ui8
														
 
															+#define UINT16_C(val) val##ui16
														
 
															+#define UINT32_C(val) val##ui32
														
 
															+#define UINT64_C(val) val##ui64
														
 
															+
														
 
															+// 7.18.4.2 Macros for greatest-width integer constants
														
 
															+#define INTMAX_C   INT64_C
														
 
															+#define UINTMAX_C  UINT64_C
														
 
															+
														
 
															+#endif // __STDC_CONSTANT_MACROS ]
														
 
															+
														
 
															+
														
 
															+#endif // _MSC_STDINT_H_ ]
														
--- a/include/compat/nacl/memory.h
+++ b/include/compat/nacl/memory.h
@@ -1 +1 @@
 
															-#include <string.h>

														
 
															+#include <string.h>
														
--- a/premake/bx.lua
+++ b/premake/bx.lua
@@ -1,7 +1,7 @@
 
															-project "bx"

														
 
															-	uuid "4db0b09e-d6df-11e1-a0ec-65ccdd6a022f"

														
 
															-	kind "StaticLib"

														
 
															-

														
 
															-	files {

														
 
															-		"../include/**.h",

														
 
															-	}

														
 
															+project "bx"
														
 
															+	uuid "4db0b09e-d6df-11e1-a0ec-65ccdd6a022f"
														
 
															+	kind "StaticLib"
														
 
															+
														
 
															+	files {
														
 
															+		"../include/**.h",
														
 
															+	}