8 лет назад · d5ed0632f2
--- a/Source/BansheeUtility/CMakeLists.txt
+++ b/Source/BansheeUtility/CMakeLists.txt
@@ -17,10 +17,13 @@ if(LINUX)
 
															 	endif()
														
 
															 endif()
														
 
															+# Third party (non-package) libraries
														
 
															+add_library(ThirdParty INTERFACE)
														
 
															+target_include_directories(ThirdParty INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/ThirdParty")
														
 
															+
														
 
															 # Includes
														
 
															 set(BansheeUtility_INC 
														
 
															-	"./" 
														
 
															-	"ThirdParty")
														
 
															+	"./")
														
 
															 if(WIN32)
														
 
															 	set(BansheeUtility_INC ${BansheeUtility_INC} "Win32")
														
@@ -41,6 +44,9 @@ target_compile_definitions(BansheeUtility PRIVATE -DBS_UTILITY_EXPORTS)
 
															 ## External lib: Snappy
														
 
															 target_link_libraries(BansheeUtility PRIVATE ${snappy_LIBRARIES})	
														
 
															+## External libs: Header only libraries
														
 
															+target_link_libraries(BansheeUtility PUBLIC ThirdParty)
														
 
															+
														
 
															 if(WIN32)
														
 
															 	## OS libs
														
 
															 	target_link_libraries(BansheeUtility PRIVATE DbgHelp IPHLPAPI Rpcrt4)
														
--- a/Source/BansheeUtility/CMakeSources.cmake
+++ b/Source/BansheeUtility/CMakeSources.cmake
@@ -216,6 +216,7 @@ set(BS_BANSHEEUTILITY_INC_MATH
 
															 	"Math/BsCapsule.h"
														
 
															 	"Math/BsMatrixNxM.h"
														
 
															 	"Math/BsLine2.h"
														
 
															+	"Math/BsSIMD.h"
														
 
															 )
														
 
															 set(BS_BANSHEEUTILITY_SRC_ERROR
														
--- a/Source/BansheeUtility/Math/BsSIMD.h
+++ b/Source/BansheeUtility/Math/BsSIMD.h
@@ -0,0 +1,89 @@
 
															+//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
														
 
															+//**************** Copyright (c) 2017 Marko Pintera ([email protected]). All rights reserved. **********************//
														
 
															+#pragma once
														
 
															+
														
 
															+#include "Prerequisites/BsPrerequisitesUtil.h"
														
 
															+#include "Math/BsVector4.h"
														
 
															+#include "Math/BsAABox.h"
														
 
															+#include "Math/BsSphere.h"
														
 
															+
														
 
															+#define SIMDPP_ARCH_X86_SSE4_1
														
 
															+
														
 
															+#if BS_COMPILER == BS_COMPILER_MSVC
														
 
															+#pragma warning(disable: 4244)
														
 
															+#endif
														
 
															+
														
 
															+#include "ThirdParty/simdpp/simd.h"
														
 
															+
														
 
															+#if BS_COMPILER == BS_COMPILER_MSVC
														
 
															+#pragma warning(default: 4244)
														
 
															+#endif
														
 
															+
														
 
															+namespace bs
														
 
															+{
														
 
															+	namespace simd
														
 
															+	{
														
 
															+		using namespace simdpp;
														
 
															+
														
 
															+		/** @addtogroup Math
														
 
															+		 *  @{
														
 
															+		 */
														
 
															+
														
 
															+		/** 
														
 
															+		 * Version of bs::AABox suitable for SIMD use. Takes up a bit more memory than standard AABox and is always 16-byte
														
 
															+		 * aligned.
														
 
															+		 */
														
 
															+		struct AABox
														
 
															+		{
														
 
															+			/** Center of the bounds, W component unused. */
														
 
															+			SIMDPP_ALIGN(16) Vector4 center;
														
 
															+
														
 
															+			/** Extents (half-size) of the bounds, W component unused. */
														
 
															+			SIMDPP_ALIGN(16) Vector4 extents;
														
 
															+
														
 
															+			AABox()
														
 
															+			{ }
														
 
															+
														
 
															+			/** Initializes bounds from an AABox. */
														
 
															+			AABox(const bs::AABox& box)
														
 
															+			{
														
 
															+				center = Vector4(box.getCenter());
														
 
															+				extents = Vector4(box.getHalfSize());
														
 
															+			}
														
 
															+
														
 
															+			/** Initializes bounds from a Sphere. */
														
 
															+			AABox(const Sphere& sphere)
														
 
															+			{
														
 
															+				center = Vector4(sphere.getCenter());
														
 
															+
														
 
															+				float radius = sphere.getRadius();
														
 
															+				extents = Vector4(radius, radius, radius, 0.0f);
														
 
															+			}
														
 
															+
														
 
															+			/** Initializes bounds from a vector representing the center and equal extents in all directions. */
														
 
															+			AABox(const Vector3& center, float extent)
														
 
															+			{
														
 
															+				this->center = Vector4(center);
														
 
															+				extents = Vector4(extent, extent, extent, 0.0f);
														
 
															+			}
														
 
															+
														
 
															+			/** Returns true if the current bounds object intersects the provided object. */
														
 
															+			bool intersects(const AABox& other) const
														
 
															+			{
														
 
															+				auto myCenter = load<float32x4>(&center);
														
 
															+				auto otherCenter = load<float32x4>(&other.center);
														
 
															+
														
 
															+				float32x4 diff = abs(sub(myCenter, otherCenter));
														
 
															+
														
 
															+				auto myExtents = simd::load<float32x4>(&extents);
														
 
															+				auto otherExtents = simd::load<float32x4>(&other.extents);
														
 
															+
														
 
															+				float32x4 extents = add(myExtents, otherExtents);
														
 
															+
														
 
															+				return test_bits_any(bit_cast<uint32x4>(cmp_gt(diff, extents))) == false;
														
 
															+			}
														
 
															+		};
														
 
															+
														
 
															+		/** @} */
														
 
															+	}
														
 
															+}
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/CMakeLists.txt
+++ b/Source/BansheeUtility/ThirdParty/simdpp/CMakeLists.txt
@@ -0,0 +1,57 @@
 
															+#   Copyright (C) 2013  Povilas Kanapickas <[email protected]>
														
 
															+#
														
 
															+#   Distributed under the Boost Software License, Version 1.0.
														
 
															+#       (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+#           http://www.boost.org/LICENSE_1_0.txt)
														
 
															+
														
 
															+file(GLOB_RECURSE HEADERS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.h *.inl)
														
 
															+
														
 
															+foreach(FILE ${HEADERS})
														
 
															+    get_filename_component(FILE_PATH "${FILE}" PATH)
														
 
															+    install(FILES "${FILE}" DESTINATION "${SIMDPP_INCLUDEDIR}/simdpp/${FILE_PATH}")
														
 
															+endforeach()
														
 
															+
														
 
															+# Don't enable header tests by default because configuring it takes excessive
														
 
															+# amount of time
														
 
															+set(ENABLE_HEADER_TESTS "0")
														
 
															+
														
 
															+if(${ENABLE_HEADER_TESTS} STREQUAL "1")
														
 
															+
														
 
															+    simdpp_get_compilable_archs(COMPILABLE_ARCHS)
														
 
															+
														
 
															+    set(HEADER_TESTS "")
														
 
															+    add_custom_target(check_headers)
														
 
															+
														
 
															+    foreach(ARCH ${COMPILABLE_ARCHS})
														
 
															+        simdpp_get_arch_info(CXX_FLAGS DEFINES_LIST SUFFIX ${ARCH})
														
 
															+        foreach(FILE ${HEADERS})
														
 
															+
														
 
															+            if("${FILE}" STREQUAL ".inl")
														
 
															+                continue()
														
 
															+            endif()
														
 
															+
														
 
															+            string(REPLACE "/" "_" TEST "${FILE}")
														
 
															+            string(REPLACE "." "_" TEST "${TEST}")
														
 
															+            set(TEST "${TEST}${SUFFIX}")
														
 
															+            set(TEST_OUT "check_headers/test_header_compiles_${TEST}")
														
 
															+            string(REPLACE "-" "_" TEST_TARGET "check_headers_${TEST}")
														
 
															+
														
 
															+
														
 
															+            file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/check_headers")
														
 
															+            separate_arguments(CXX_FLAGS)
														
 
															+            add_custom_command(
														
 
															+                OUTPUT ${TEST_OUT}
														
 
															+                COMMAND ${CMAKE_CXX_COMPILER}
														
 
															+                        -DLIBSIMDPP_SIMD_H
														
 
															+                        -I "${CMAKE_SOURCE_DIR}"
														
 
															+                        ${CXX_FLAGS} -x c++ -std=c++11 -g2 -Wall
														
 
															+                        ${CMAKE_SOURCE_DIR}/simdpp/${FILE}
														
 
															+                        -c -o ${CMAKE_BINARY_DIR}/${TEST_OUT}
														
 
															+                DEPENDS ${FILE} )
														
 
															+            add_custom_target(${TEST_TARGET} DEPENDS ${TEST_OUT})
														
 
															+            add_dependencies(check_headers "${TEST_TARGET}")
														
 
															+        endforeach()
														
 
															+    endforeach()
														
 
															+
														
 
															+endif()
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/capabilities.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/capabilities.h
@@ -0,0 +1,118 @@
 
															+/*  Copyright (C) 2017  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_CAPABILITIES_H
														
 
															+#define LIBSIMDPP_SIMD_CAPABILITIES_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+#include <simdpp/setup_arch.h>
														
 
															+
														
 
															+#if SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_INT8_SIMD 1
														
 
															+#define SIMDPP_HAS_INT16_SIMD 1
														
 
															+#define SIMDPP_HAS_INT32_SIMD 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_INT8_SIMD 0
														
 
															+#define SIMDPP_HAS_INT16_SIMD 0
														
 
															+#define SIMDPP_HAS_INT32_SIMD 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_INT64_SIMD 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_INT64_SIMD 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_SSE2 || SIMDPP_USE_NEON_FLT_SP || (SIMDPP_USE_NEON && SIMDPP_64_BITS) || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_FLOAT32_SIMD 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_FLOAT32_SIMD 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_SSE2 || (SIMDPP_USE_NEON && SIMDPP_64_BITS) || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_FLOAT64_SIMD 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_FLOAT64_SIMD 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_AVX512F || (SIMDPP_USE_NEON && SIMDPP_64_BITS) || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_FLOAT64_TO_UINT32_CONVERSION 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_FLOAT64_TO_UINT32_CONVERSION 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_AVX512DQ || (SIMDPP_USE_NEON && SIMDPP_64_BITS) || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_INT64_TO_FLOAT64_CONVERSION 1
														
 
															+#define SIMDPP_HAS_INT64_TO_FLOAT32_CONVERSION 1
														
 
															+#define SIMDPP_HAS_UINT64_TO_FLOAT64_CONVERSION 1
														
 
															+#define SIMDPP_HAS_UINT64_TO_FLOAT32_CONVERSION 1
														
 
															+
														
 
															+#define SIMDPP_HAS_FLOAT32_TO_INT64_CONVERSION 1
														
 
															+#define SIMDPP_HAS_FLOAT32_TO_UINT64_CONVERSION 1
														
 
															+
														
 
															+#define SIMDPP_HAS_FLOAT64_TO_INT64_CONVERSION 1
														
 
															+#define SIMDPP_HAS_FLOAT64_TO_UINT64_CONVERSION 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_INT64_TO_FLOAT64_CONVERSION 0
														
 
															+#define SIMDPP_HAS_INT64_TO_FLOAT32_CONVERSION 0
														
 
															+#define SIMDPP_HAS_UINT64_TO_FLOAT64_CONVERSION 0
														
 
															+#define SIMDPP_HAS_UINT64_TO_FLOAT32_CONVERSION 0
														
 
															+
														
 
															+#define SIMDPP_HAS_FLOAT32_TO_INT64_CONVERSION 0
														
 
															+#define SIMDPP_HAS_FLOAT32_TO_UINT64_CONVERSION 0
														
 
															+
														
 
															+#define SIMDPP_HAS_FLOAT64_TO_INT64_CONVERSION 0
														
 
															+#define SIMDPP_HAS_FLOAT64_TO_UINT64_CONVERSION 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_SSSE3 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_INT8_SHIFT_L_BY_VECTOR 1
														
 
															+#define SIMDPP_HAS_UINT8_SHIFT_L_BY_VECTOR 1
														
 
															+#define SIMDPP_HAS_INT16_SHIFT_L_BY_VECTOR 1
														
 
															+#define SIMDPP_HAS_UINT16_SHIFT_L_BY_VECTOR 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_INT8_SHIFT_L_BY_VECTOR 0
														
 
															+#define SIMDPP_HAS_UINT8_SHIFT_L_BY_VECTOR 0
														
 
															+#define SIMDPP_HAS_INT16_SHIFT_L_BY_VECTOR 0
														
 
															+#define SIMDPP_HAS_UINT16_SHIFT_L_BY_VECTOR 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_INT32_SHIFT_L_BY_VECTOR 1
														
 
															+#define SIMDPP_HAS_UINT32_SHIFT_L_BY_VECTOR 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_INT32_SHIFT_L_BY_VECTOR 0
														
 
															+#define SIMDPP_HAS_UINT32_SHIFT_L_BY_VECTOR 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_SSSE3 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_INT8_SHIFT_R_BY_VECTOR 1
														
 
															+#define SIMDPP_HAS_UINT8_SHIFT_R_BY_VECTOR 1
														
 
															+#define SIMDPP_HAS_UINT16_SHIFT_R_BY_VECTOR 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_INT8_SHIFT_R_BY_VECTOR 0
														
 
															+#define SIMDPP_HAS_UINT8_SHIFT_R_BY_VECTOR 0
														
 
															+#define SIMDPP_HAS_UINT16_SHIFT_R_BY_VECTOR 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_AVX512BW || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_INT16_SHIFT_R_BY_VECTOR 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_INT16_SHIFT_R_BY_VECTOR 0
														
 
															+#endif
														
 
															+
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#define SIMDPP_HAS_INT32_SHIFT_R_BY_VECTOR 1
														
 
															+#define SIMDPP_HAS_UINT32_SHIFT_R_BY_VECTOR 1
														
 
															+#else
														
 
															+#define SIMDPP_HAS_INT32_SHIFT_R_BY_VECTOR 0
														
 
															+#define SIMDPP_HAS_UINT32_SHIFT_R_BY_VECTOR 0
														
 
															+#endif
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/align.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/align.h
@@ -0,0 +1,200 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_ALIGN_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_ALIGN_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/align.h>
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Extracts a int8x16 vector from two concatenated int8x16 vectors
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0   1    .  14  15  |
														
 
															+     0      r = [ l0  l1   .  l14 l15 ]
														
 
															+     1      r = [ l1  l2   .  l15 u0  ]
														
 
															+     2      r = [ l2  l3   .  u0  l1  ]
														
 
															+      ...    ..   .. ..  ... .. ..
														
 
															+     15     r = [ l15 u0   .  u13 u14 ]
														
 
															+     16     r = [ u0  u1   .  u14 u15 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    align16(const any_vec8<N,V1>& lower,
														
 
															+            const any_vec8<N,V2>& upper)
														
 
															+{
														
 
															+    static_assert(shift <= 16, "Shift out of bounds");
														
 
															+    if (shift == 0) return lower.wrapped().eval();
														
 
															+    if (shift == 16) return upper.wrapped().eval();
														
 
															+
														
 
															+    typename detail::get_expr2_nomask_nosign<V1, V2>::type qlower, qupper;
														
 
															+    qlower = lower.wrapped().eval();
														
 
															+    qupper = upper.wrapped().eval();
														
 
															+    return detail::insn::i_align16<shift>(qlower, qupper);
														
 
															+}
														
 
															+
														
 
															+/** Extracts a int16x8 vector from two concatenated int16x8 vectors
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1    .  6  7  |
														
 
															+     0      r = [ l0 l1   .  l6 l7 ]
														
 
															+     1      r = [ l1 l2   .  l7 u0 ]
														
 
															+     2      r = [ l2 l3   .  u0 l1 ]
														
 
															+      ...    ..   .. ..  ... .. ..
														
 
															+     7      r = [ l3 u0   .  u5 u6 ]
														
 
															+     8      r = [ u0 u1   .  u6 u7 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-AVX, NEON, ALTIVEC, 2}
														
 
															+
														
 
															+    The all 128-bit sub-vectors are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    align8(const any_vec16<N,V1>& lower,
														
 
															+           const any_vec16<N,V2>& upper)
														
 
															+{
														
 
															+    static_assert(shift <= 8, "Shift out of bounds");
														
 
															+    if (shift == 0) return lower.wrapped().eval();
														
 
															+    if (shift == 8) return upper.wrapped().eval();
														
 
															+
														
 
															+    typename detail::get_expr2_nomask_nosign<V1, V2>::type qlower, qupper;
														
 
															+    qlower = lower.wrapped().eval();
														
 
															+    qupper = upper.wrapped().eval();
														
 
															+    return detail::insn::i_align8<shift>(qlower, qupper);
														
 
															+}
														
 
															+
														
 
															+/** Extracts a int32x4 vector from two concatenated int32x4 vectors
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1  2  3  |
														
 
															+     0      r = [ l0 l1 l2 l3 ]
														
 
															+     1      r = [ l1 l2 l3 u0 ]
														
 
															+     2      r = [ l2 l3 u0 u1 ]
														
 
															+     3      r = [ l3 u0 u1 u2 ]
														
 
															+     4      r = [ u0 u1 u2 u3 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par int32
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-AVX, NEON, ALTIVEC, 2}
														
 
															+
														
 
															+    @par float32
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-SSE4.1 NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    align4(const any_vec32<N,V1>& lower,
														
 
															+           const any_vec32<N,V2>& upper)
														
 
															+{
														
 
															+    static_assert(shift <= 4, "Shift out of bounds");
														
 
															+    if (shift == 0) return lower.wrapped().eval();
														
 
															+    if (shift == 4) return upper.wrapped().eval();
														
 
															+
														
 
															+    typename detail::get_expr2_nomask_nosign<V1, V2>::type qlower, qupper;
														
 
															+    qlower = lower.wrapped().eval();
														
 
															+    qupper = upper.wrapped().eval();
														
 
															+    return detail::insn::i_align4<shift>(qlower, qupper);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/** Extracts a int64x2 vector from two concatenated int64x2 vectors
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1  |
														
 
															+     0      r = [ l0 l1 ]
														
 
															+     1      r = [ l1 u0 ]
														
 
															+     2      r = [ u0 u1 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par int64
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-AVX, NEON, ALTIVEC, 2}
														
 
															+
														
 
															+    @par float64
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-SSE4.1 NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    align2(const any_vec64<N,V1>& lower,
														
 
															+           const any_vec64<N,V2>& upper)
														
 
															+{
														
 
															+    static_assert(shift <= 2, "Shift out of bounds");
														
 
															+    if (shift == 0) return lower.wrapped().eval();
														
 
															+    if (shift == 2) return upper.wrapped().eval();
														
 
															+
														
 
															+    typename detail::get_expr2_nomask_nosign<V1, V2>::type qlower, qupper;
														
 
															+    qlower = lower.wrapped().eval();
														
 
															+    qupper = upper.wrapped().eval();
														
 
															+    return detail::insn::i_align2<shift>(qlower, qupper);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/aligned_allocator.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/aligned_allocator.h
@@ -0,0 +1,127 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_CORE_ALIGNED_ALLOCATOR_H
														
 
															+#define LIBSIMDPP_CORE_ALIGNED_ALLOCATOR_H
														
 
															+
														
 
															+#include <memory>
														
 
															+#include <cstddef>
														
 
															+#include <cstdint>
														
 
															+#include <stdexcept>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** An allocator that allocates memory with stricter alignment requirements than
														
 
															+    the defaults. @a A must be a power of two.
														
 
															+*/
														
 
															+template<class T, std::size_t A>
														
 
															+class aligned_allocator {
														
 
															+private:
														
 
															+
														
 
															+    static_assert(!(A & (A - 1)), "A is not a power of two");
														
 
															+
														
 
															+public:
														
 
															+    using value_type = T;
														
 
															+    using pointer = T*;
														
 
															+    using const_pointer = const T*;
														
 
															+    using reference = T&;
														
 
															+    using const_reference = const T&;
														
 
															+    using size_type = std::size_t;
														
 
															+    using difference_type = std::ptrdiff_t;
														
 
															+
														
 
															+    aligned_allocator() = default;
														
 
															+    aligned_allocator(const aligned_allocator&) = default;
														
 
															+
														
 
															+    template<class U>
														
 
															+    aligned_allocator(const aligned_allocator<U,A>&) {}
														
 
															+
														
 
															+    ~aligned_allocator() = default;
														
 
															+
														
 
															+    aligned_allocator& operator=(const aligned_allocator&) = delete;
														
 
															+
														
 
															+    template<class U>
														
 
															+    struct rebind {
														
 
															+        using other = aligned_allocator<U,A>;
														
 
															+    };
														
 
															+
														
 
															+    T* address(T& x) const
														
 
															+    {
														
 
															+        return &x;
														
 
															+    }
														
 
															+
														
 
															+    std::size_t max_size() const
														
 
															+    {
														
 
															+        return (static_cast<std::size_t>(0) - static_cast<std::size_t>(1)) / sizeof(T);
														
 
															+    }
														
 
															+
														
 
															+    // stateless
														
 
															+    bool operator!=(const aligned_allocator&) const { return false; }
														
 
															+    bool operator==(const aligned_allocator&) const { return true; }
														
 
															+
														
 
															+    void construct(T* p, const T& t) const
														
 
															+    {
														
 
															+        void* pv = static_cast<void*>(p);
														
 
															+        new (pv) T(t);
														
 
															+    }
														
 
															+
														
 
															+    void destroy(T* p) const
														
 
															+    {
														
 
															+        p->~T();
														
 
															+    }
														
 
															+
														
 
															+    T* allocate(std::size_t n) const
														
 
															+    {
														
 
															+        if (n == 0) {
														
 
															+            return nullptr;
														
 
															+        }
														
 
															+
														
 
															+        if (n > max_size()) {
														
 
															+            throw std::length_error("aligned_allocator<T,A>::allocate() - Integer overflow.");
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+        /* We align the storage by adding @a alignment bytes and truncating the
														
 
															+            pointer. The pointer to the original location returned by @a new is
														
 
															+            stored just before the location the returned pointer refers to.
														
 
															+            To ensure that there is always at least @a sizeof(void*) space
														
 
															+            there, @a alignment is at least 2*sizoef(void*)
														
 
															+        */
														
 
															+        std::size_t al = A < 2*sizeof(void*) ? 2*sizeof(void*) : A;
														
 
															+
														
 
															+        char* pv = new char[n*sizeof(T) + al];
														
 
															+        std::uintptr_t upv = reinterpret_cast<std::uintptr_t>(pv);
														
 
															+        upv = (upv + al) & ~(al - 1);
														
 
															+        char** aligned_pv = reinterpret_cast<char**>(upv);
														
 
															+
														
 
															+        *(aligned_pv-1) = pv; // original pointer
														
 
															+
														
 
															+        return reinterpret_cast<T*>(aligned_pv);
														
 
															+    }
														
 
															+
														
 
															+    void deallocate(T* p, std::size_t n) const
														
 
															+    {
														
 
															+        (void) n;
														
 
															+        if (!p) {
														
 
															+            return;
														
 
															+        }
														
 
															+        char** pptr = reinterpret_cast<char**>(p);
														
 
															+        delete[](*(pptr - 1));
														
 
															+    }
														
 
															+
														
 
															+    template<class U>
														
 
															+    T * allocate(std::size_t n, const U* hint) const
														
 
															+    {
														
 
															+        (void) hint;
														
 
															+        return allocate(n);
														
 
															+    }
														
 
															+};
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/bit_and.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/bit_and.h
@@ -0,0 +1,123 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_BIT_AND_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_BIT_AND_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/bit_and.h>
														
 
															+#include <simdpp/detail/expr/bit_and.h>
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+#include <simdpp/core/detail/get_expr_bitwise.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes bitwise AND of integer or floating-point vectors.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 & b0
														
 
															+    ...
														
 
															+    rN = aN & bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @todo: icost
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, V1, V2>::type
														
 
															+        bit_and(const any_vec<N,V1>& a,
														
 
															+                const any_vec<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+// support scalar arguments
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, unsigned, V>::type
														
 
															+        bit_and(const unsigned& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, unsigned long, V>::type
														
 
															+        bit_and(const unsigned long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, unsigned long long, V>::type
														
 
															+        bit_and(const unsigned long long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, int, V>::type
														
 
															+        bit_and(const int& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, long, V>::type
														
 
															+        bit_and(const long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, long long, V>::type
														
 
															+        bit_and(const long long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, V, unsigned>::type
														
 
															+        bit_and(const any_vec<N,V>& a, const unsigned& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, V, unsigned long>::type
														
 
															+        bit_and(const any_vec<N,V>& a, const unsigned long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, V, unsigned long long>::type
														
 
															+        bit_and(const any_vec<N,V>& a, const unsigned long long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, V, int>::type
														
 
															+        bit_and(const any_vec<N,V>& a, const int& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, V, long>::type
														
 
															+        bit_and(const any_vec<N,V>& a, const long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_and, V, long long>::type
														
 
															+        bit_and(const any_vec<N,V>& a, const long long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/bit_andnot.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/bit_andnot.h
@@ -0,0 +1,120 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_BIT_ANDNOT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_BIT_ANDNOT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/bit_andnot.h>
														
 
															+#include <simdpp/detail/expr/bit_andnot.h>
														
 
															+#include <simdpp/core/detail/get_expr_bitwise.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes bitwise AND NOT of two integer or floating-point vectors.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 & ~b0
														
 
															+    ...
														
 
															+    rN = aN & ~bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @todo: icost
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, V1, V2>::type
														
 
															+        bit_andnot(const any_vec<N,V1>& a, const any_vec<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+// support scalar arguments
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, unsigned, V>::type
														
 
															+        bit_andnot(const unsigned& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, unsigned long, V>::type
														
 
															+        bit_andnot(const unsigned long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, unsigned long long, V>::type
														
 
															+        bit_andnot(const unsigned long long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, int, V>::type
														
 
															+        bit_andnot(const int& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, long, V>::type
														
 
															+        bit_andnot(const long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, long long, V>::type
														
 
															+        bit_andnot(const long long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, V, unsigned>::type
														
 
															+        bit_andnot(const any_vec<N,V>& a, const unsigned& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, V, unsigned long>::type
														
 
															+        bit_andnot(const any_vec<N,V>& a, const unsigned long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, V, unsigned long long>::type
														
 
															+        bit_andnot(const any_vec<N,V>& a, const unsigned long long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, V, int>::type
														
 
															+        bit_andnot(const any_vec<N,V>& a, const int& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, V, long>::type
														
 
															+        bit_andnot(const any_vec<N,V>& a, const long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_andnot, V, long long>::type
														
 
															+        bit_andnot(const any_vec<N,V>& a, const long long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/bit_not.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/bit_not.h
@@ -0,0 +1,68 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_BIT_NOT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_BIT_NOT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/bit_not.h>
														
 
															+#include <simdpp/detail/expr/bit_not.h>
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes bitwise NOT of an integer or floating-point vector
														
 
															+
														
 
															+    @code
														
 
															+    r = ~a
														
 
															+    @endcode
														
 
															+
														
 
															+    @todo icost
														
 
															+*/
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr<V, expr_bit_not<V>>::empty
														
 
															+    bit_not(const any_vec<N,V>& a)
														
 
															+{
														
 
															+    typename detail::get_expr_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_bit_not(ra);
														
 
															+}
														
 
															+
														
 
															+/* FIXME
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+mask_int32<N, expr_bit_not<mask_int32<N,E>>> bit_not(mask_int32<N,E> a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+mask_int64<N, expr_bit_not<mask_int64<N,E>>> bit_not(mask_int64<N,E> a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+mask_float32<N, expr_bit_not<mask_float32<N,E>>> bit_not(mask_float32<N,E> a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+mask_float64<N, expr_bit_not<mask_float64<N,E>>> bit_not(mask_float64<N,E> a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+*/
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/bit_or.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/bit_or.h
@@ -0,0 +1,122 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_BIT_OR_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_BIT_OR_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/bit_or.h>
														
 
															+#include <simdpp/detail/expr/bit_or.h>
														
 
															+#include <simdpp/core/detail/get_expr_bitwise.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes bitwise OR of integer vectors.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 | b0
														
 
															+    ...
														
 
															+    rN = aN | bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @todo icost
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_bit_or<V1, V2>::type
														
 
															+        bit_or(const any_vec<N,V1>& a, const any_vec<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+// support scalar arguments
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned, V>::type
														
 
															+        bit_or(const unsigned& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned long, V>::type
														
 
															+        bit_or(const unsigned long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, unsigned long long, V>::type
														
 
															+        bit_or(const unsigned long long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, int, V>::type
														
 
															+        bit_or(const int& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, long, V>::type
														
 
															+        bit_or(const long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, long long, V>::type
														
 
															+        bit_or(const long long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return { { a, b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned>::type
														
 
															+        bit_or(const any_vec<N,V>& a, const unsigned& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned long>::type
														
 
															+        bit_or(const any_vec<N,V>& a, const unsigned long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, V, unsigned long long>::type
														
 
															+        bit_or(const any_vec<N,V>& a, const unsigned long long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, V, int>::type
														
 
															+        bit_or(const any_vec<N,V>& a, const int& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, V, long>::type
														
 
															+        bit_or(const any_vec<N,V>& a, const long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_bitwise2_and<expr_bit_or, V, long long>::type
														
 
															+        bit_or(const any_vec<N,V>& a, const long long& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b } };
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/bit_xor.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/bit_xor.h
@@ -0,0 +1,126 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_BIT_XOR_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_BIT_XOR_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+#include <simdpp/detail/insn/bit_xor.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes bitwise XOR of integer or floating-point vectors.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 ^ b0
														
 
															+    ...
														
 
															+    rN = aN ^ bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2<V1, V2>::empty
														
 
															+    bit_xor(const any_vec<N,V1>& a, const any_vec<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_bit_xor(ra, rb);
														
 
															+}
														
 
															+
														
 
															+// support scalar arguments
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<typename detail::get_expr_nomask<V>::type, V>::empty
														
 
															+        bit_xor(const unsigned& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return bit_xor(detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(a), b);
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<typename detail::get_expr_nomask<V>::type, V>::empty
														
 
															+        bit_xor(const unsigned long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return bit_xor(detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(a), b);
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<typename detail::get_expr_nomask<V>::type, V>::empty
														
 
															+        bit_xor(const unsigned long long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return bit_xor(detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(a), b);
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<typename detail::get_expr_nomask<V>::type, V>::empty
														
 
															+        bit_xor(const int& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return bit_xor(detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(a), b);
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<typename detail::get_expr_nomask<V>::type, V>::empty
														
 
															+        bit_xor(const long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return bit_xor(detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(a), b);
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<typename detail::get_expr_nomask<V>::type, V>::empty
														
 
															+        bit_xor(const long long& a, const any_vec<N,V>& b)
														
 
															+{
														
 
															+    return bit_xor(detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(a), b);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<V, typename detail::get_expr_nomask<V>::type>::empty
														
 
															+        bit_xor(const any_vec<N,V>& a, const unsigned& b)
														
 
															+{
														
 
															+    return bit_xor(a, detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(b));
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<V, typename detail::get_expr_nomask<V>::type>::empty
														
 
															+        bit_xor(const any_vec<N,V>& a, const unsigned long& b)
														
 
															+{
														
 
															+    return bit_xor(a, detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(b));
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<V, typename detail::get_expr_nomask<V>::type>::empty
														
 
															+        bit_xor(const any_vec<N,V>& a, const unsigned long long& b)
														
 
															+{
														
 
															+    return bit_xor(a, detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(b));
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<V, typename detail::get_expr_nomask<V>::type>::empty
														
 
															+        bit_xor(const any_vec<N,V>& a, const int& b)
														
 
															+{
														
 
															+    return bit_xor(a, detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(b));
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<V, typename detail::get_expr_nomask<V>::type>::empty
														
 
															+        bit_xor(const any_vec<N,V>& a, const long& b)
														
 
															+{
														
 
															+    return bit_xor(a, detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(b));
														
 
															+}
														
 
															+template<unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr2<V, typename detail::get_expr_nomask<V>::type>::empty
														
 
															+        bit_xor(const any_vec<N,V>& a, const long long& b)
														
 
															+{
														
 
															+    return bit_xor(a, detail::make_const_bitwise<typename detail::get_expr_nomask<V>::type>(b));
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/blend.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/blend.h
@@ -0,0 +1,193 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_BLEND_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_BLEND_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/blend.h>
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+namespace detail {
														
 
															+
														
 
															+/*  Blend is a special function in that the type of the returned expression
														
 
															+    depends on three arguments.
														
 
															+
														
 
															+    As always, we want to reduce the number of overloads that need to be
														
 
															+    created in order to match a specific case of an expression tree containing
														
 
															+    'blend' nodes. In this case we do the following in an attempt to achieve
														
 
															+    that:
														
 
															+
														
 
															+     * the first and the second types have the same type as the expression
														
 
															+        itself, except that signed integer vectors are converted to unsigned
														
 
															+     * the third type is the same as the expression itself, except when it is
														
 
															+        a mask. In that case it is converted to floating-point mask if the
														
 
															+        expression is floating-point expression and to integer mask otherwise
														
 
															+     * TODO
														
 
															+
														
 
															+     So, as a result, the following tuples of types will appear as the arguments
														
 
															+     of the returned expression:
														
 
															+
														
 
															+      * mask_int8, mask_int8, mask_int8
														
 
															+      * uint8, uint8, uint8
														
 
															+      * uint8, uint8, mask_int8
														
 
															+      * mask_int16, mask_int16, mask_int16
														
 
															+      * uint16, uint16, uint16
														
 
															+      * uint16, uint16, mask_uint16
														
 
															+      * mask_int32, mask_int32, mask_int32
														
 
															+      * mask_float32, mask_float32, mask_float32
														
 
															+      * uint32, uint32, uint32
														
 
															+      * uint32, uint32, mask_int32
														
 
															+      * float32, float32, float32
														
 
															+      * float32, float32, mask_float32
														
 
															+      * mask_int64, mask_int64, mask_int64
														
 
															+      * mask_float64, mask_float64, mask_float64
														
 
															+      * uint64, uint64, uint64
														
 
															+      * uint64, uint64, mask_int64
														
 
															+      * float64, float64, float64
														
 
															+      * float64, float64, mask_float64
														
 
															+
														
 
															+    The type of the returned expression is governed by the usual rules
														
 
															+    (see simdpp/types/tag.h)
														
 
															+*/
														
 
															+
														
 
															+template<class V1, class V2, class V3>
														
 
															+class get_expr_blend {
														
 
															+
														
 
															+    // (size_tag) get the size tag of the resulting expression
														
 
															+    static const unsigned size_tag_t1 = V1::size_tag > V2::size_tag ? V1::size_tag : V2::size_tag;
														
 
															+    static const unsigned size_tag = size_tag_t1 > V3::size_tag ? size_tag_t1 : V3::size_tag;
														
 
															+
														
 
															+    // (type_tag_t2) get the type tag of the first pair of parameters. We
														
 
															+    // compute it by applying the promotion rules to the first two parameters,
														
 
															+    // i.e. type_tag_t2 == get_expr2<V1,V2>::type::type_tag
														
 
															+    static const unsigned type_tag_t1 = V1::type_tag > V2::type_tag ? V1::type_tag : V2::type_tag;
														
 
															+    static const bool is_mask_op1 = type_tag_t1 == SIMDPP_TAG_MASK_INT ||
														
 
															+                                    type_tag_t1 == SIMDPP_TAG_MASK_FLOAT;
														
 
															+    static const unsigned type_tag_t2 = (is_mask_op1 && V1::size_tag != V2::size_tag)
														
 
															+                                    ? SIMDPP_TAG_UINT : type_tag_t1;
														
 
															+
														
 
															+    // (type_tag) get the type tag of the expression. We compute it by applying
														
 
															+    // the promotion rules to the pair that includes the third parameter and
														
 
															+    // the result of the first promotion.
														
 
															+    // I.e. type_tag == get_expr2<get_expr2<V1,V2>::type, V3>::type::type_tag
														
 
															+    static const unsigned type_tag_t3 = type_tag_t2 > V3::type_tag ? type_tag_t2 : V3::type_tag;
														
 
															+    static const bool is_mask_op2 = type_tag_t3 == SIMDPP_TAG_MASK_INT ||
														
 
															+                                    type_tag_t3 == SIMDPP_TAG_MASK_FLOAT;
														
 
															+    static const unsigned type_tag = (is_mask_op2 && V3::size_tag != size_tag_t1)
														
 
															+                                    ? SIMDPP_TAG_UINT : type_tag_t3;
														
 
															+
														
 
															+    // strip signed types
														
 
															+    static const unsigned v12_type_tag = type_tag == SIMDPP_TAG_INT ? SIMDPP_TAG_UINT : type_tag;
														
 
															+
														
 
															+
														
 
															+    static const bool is_v3_mask = V3::type_tag == SIMDPP_TAG_MASK_INT ||
														
 
															+                                   V3::type_tag == SIMDPP_TAG_MASK_FLOAT;
														
 
															+    static const bool is_v12_float = v12_type_tag == SIMDPP_TAG_FLOAT ||
														
 
															+                                     v12_type_tag == SIMDPP_TAG_MASK_FLOAT;
														
 
															+
														
 
															+    // if third parameter is a mask and its size tag matches the size tag of the
														
 
															+    // first two parameters, then convert the mask to float mask if the
														
 
															+    // expression is float and to integer mask otherwise
														
 
															+    static const unsigned v3_type_tag = (!is_v3_mask || size_tag != V3::size_tag) ? v12_type_tag :
														
 
															+                                        is_v12_float ? SIMDPP_TAG_MASK_FLOAT :
														
 
															+                                        SIMDPP_TAG_MASK_INT;
														
 
															+
														
 
															+
														
 
															+public:
														
 
															+    using v1_final_type = typename type_of_tag<v12_type_tag + size_tag,
														
 
															+                                               V1::length_bytes, void>::type;
														
 
															+
														
 
															+    using v2_final_type = typename type_of_tag<v12_type_tag + size_tag,
														
 
															+                                               V1::length_bytes, void>::type;
														
 
															+
														
 
															+    using v3_final_type = typename type_of_tag<v3_type_tag + size_tag,
														
 
															+                                               V1::length_bytes, void>::type;
														
 
															+
														
 
															+    using type = typename type_of_tag<type_tag + size_tag, V1::length_bytes,
														
 
															+                                      expr_blend<V1, V2, V3>>::type;
														
 
															+};
														
 
															+
														
 
															+} // namespace detail
														
 
															+
														
 
															+/** Composes a vector from two sources according to a mask. Each element within
														
 
															+    the mask must have either all bits set or all bits unset.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (mask0 == 0xff ) ? on0 : off0
														
 
															+    ...
														
 
															+    rN = (maskN == 0xff ) ? onN : offN
														
 
															+    @endcode
														
 
															+
														
 
															+    @todo icost
														
 
															+
														
 
															+    @par int16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+
														
 
															+    @par int32
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+
														
 
															+    @par int64
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+
														
 
															+    @par float32
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 6}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+
														
 
															+    @par float64
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 3}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 6}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2, class V3> SIMDPP_INL
														
 
															+typename detail::get_expr_blend<V1, V2, V3>::type
														
 
															+        blend(const any_vec<N,V1>& on, const any_vec<N,V2>& off,
														
 
															+              const any_vec<N,V3>& mask)
														
 
															+{
														
 
															+    return { { on.wrapped(), off.wrapped(), mask.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/cache.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/cache.h
@@ -0,0 +1,63 @@
 
															+/*  Copyright (C) 2011-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CACHE_H
														
 
															+#define LIBSIMDPP_SIMDPP_CACHE_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+#include <simdpp/setup_arch.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Prefetches data to the lowest level cache for reading.
														
 
															+
														
 
															+    @param ptr pointer to the data to prefetch
														
 
															+*/
														
 
															+template<class T>
														
 
															+SIMDPP_INL void prefetch_read(const T* ptr)
														
 
															+{
														
 
															+#if SIMDPP_USE_SSE2
														
 
															+    _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0);
														
 
															+#elif SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#if __GNUC__
														
 
															+    // on NEON results in PLD
														
 
															+    // on Altivec results in DST
														
 
															+    // on MSA results in PREF
														
 
															+    __builtin_prefetch(ptr, 0);
														
 
															+#endif
														
 
															+#endif
														
 
															+    (void) ptr;
														
 
															+}
														
 
															+
														
 
															+/** Prefetches data to the lowest level cache for writing.
														
 
															+
														
 
															+    @param ptr pointer to the data to prefetch
														
 
															+*/
														
 
															+template<class T>
														
 
															+SIMDPP_INL void prefetch_write(const T* ptr)
														
 
															+{
														
 
															+#if SIMDPP_USE_SSE2
														
 
															+    _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0);
														
 
															+#elif SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
														
 
															+#if __GNUC__
														
 
															+    // on NEON results in PLDW
														
 
															+    // on Altivec results in DSTST
														
 
															+    // on MSA results in PREF
														
 
															+    __builtin_prefetch(ptr, 1);
														
 
															+#endif
														
 
															+#endif
														
 
															+    (void) ptr;
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/cast.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/cast.h
@@ -0,0 +1,104 @@
 
															+/*  Copyright (C) 2011-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_CAST_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_CAST_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/setup_arch.h>
														
 
															+#include <simdpp/detail/cast.h>
														
 
															+#include <simdpp/types/traits.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+namespace detail {
														
 
															+
														
 
															+// on certain architectures mask-mask conversions may need unmasking or remasking
														
 
															+template<class R, class T> struct cast_mask_override { static const unsigned value = CAST_MASK_MEMCPY; };
														
 
															+#if SIMDPP_USE_NEON_NO_FLT_SP
														
 
															+template<unsigned N>
														
 
															+struct cast_mask_override<mask_float32<N>, mask_int32<N>> { static const unsigned value = CAST_MASK_UNMASK; };
														
 
															+template<unsigned N>
														
 
															+struct cast_mask_override<mask_int32<N>, mask_float32<N>> { static const unsigned value = CAST_MASK_REMASK; };
														
 
															+#endif
														
 
															+#if SIMDPP_USE_NEON && SIMDPP_32_BITS
														
 
															+template<unsigned N>
														
 
															+struct cast_mask_override<mask_int64<N>, mask_float64<N>> { static const unsigned value = CAST_MASK_UNMASK; };
														
 
															+template<unsigned N>
														
 
															+struct cast_mask_override<mask_float64<N>, mask_int64<N>> { static const unsigned value = CAST_MASK_REMASK; };
														
 
															+#endif
														
 
															+#if SIMDPP_USE_VSX_206 && !SIMDPP_USE_VSX_207
														
 
															+template<unsigned N>
														
 
															+struct cast_mask_override<mask_int64<N>, mask_float64<N>> { static const unsigned value = CAST_MASK_REMASK; };
														
 
															+template<unsigned N>
														
 
															+struct cast_mask_override<mask_float64<N>, mask_int64<N>> { static const unsigned value = CAST_MASK_UNMASK; };
														
 
															+#endif
														
 
															+
														
 
															+template<class R, class T> SIMDPP_INL
														
 
															+void bit_cast_impl(const T& t, R& r)
														
 
															+{
														
 
															+    const bool is_vector_r = is_vector<R>::value;
														
 
															+    const bool is_vector_t = is_vector<T>::value;
														
 
															+    const bool is_mask_r = is_mask<R>::value;
														
 
															+    const bool is_mask_t = is_mask<T>::value;
														
 
															+    const unsigned mask_mask_cast_override = detail::cast_mask_override<R,T>::value;
														
 
															+
														
 
															+    const unsigned cast_type =
														
 
															+            (!is_vector_t && !is_vector_r) ? CAST_TYPE_OTHER :
														
 
															+            (!is_mask_t && !is_mask_r) ? CAST_TYPE_VECTOR_TO_VECTOR :
														
 
															+            (is_mask_t && !is_mask_r) ? CAST_TYPE_MASK_TO_VECTOR :
														
 
															+            (!is_mask_t && is_mask_r) ? CAST_TYPE_VECTOR_TO_MASK :
														
 
															+            // remaining cases deal with is_mask_t && is_mask_r
														
 
															+            (mask_mask_cast_override == CAST_MASK_REMASK) ? CAST_TYPE_MASK_TO_MASK_REMASK :
														
 
															+            (mask_mask_cast_override == CAST_MASK_UNMASK) ? CAST_TYPE_MASK_TO_MASK_UNMASK :
														
 
															+                                                            CAST_TYPE_MASK_TO_MASK_BITWISE;
														
 
															+
														
 
															+    static_assert(is_vector_r == is_vector_t,
														
 
															+                  "bit_cast can't convert between vector and non-vector types");
														
 
															+
														
 
															+    detail::cast_wrapper<cast_type>::run(t, r);
														
 
															+}
														
 
															+
														
 
															+template<class T> SIMDPP_INL
														
 
															+void bit_cast_impl(const T& t, T& r)
														
 
															+{
														
 
															+    // Simple implementation for the common case
														
 
															+    r = t;
														
 
															+}
														
 
															+
														
 
															+} // namespace detail
														
 
															+
														
 
															+/** Casts between unrelated types. No changes to the stored values are
														
 
															+    performed.
														
 
															+
														
 
															+    Conversions between vector and non-vector types are not allowed.
														
 
															+
														
 
															+    Conversion from non-mask type to mask type is not allowed.
														
 
															+
														
 
															+    Conversion from mask type to a non-mask type is not a costless operation
														
 
															+    because masks may have different logical and physical layout (e.g., in
														
 
															+    some implementations one bit represents entire element in a vector).
														
 
															+
														
 
															+    Conversions between mask types is only allowed if the element size is the
														
 
															+    same.
														
 
															+*/
														
 
															+template<class R, class T> SIMDPP_INL
														
 
															+R bit_cast(const T& t)
														
 
															+{
														
 
															+    R r;
														
 
															+    detail::bit_cast_impl(t, r);
														
 
															+    return r;
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_eq.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_eq.h
@@ -0,0 +1,173 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_CMP_EQ_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_CMP_EQ_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/cmp_eq.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Compares 8-bit values for equality.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 == b0) ? 0xff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN == bN) ? 0xff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_eq(const any_int8<N,V1>& a,
														
 
															+                               const any_int8<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_cmp_eq(ra, rb);
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_int8, any_int8)
														
 
															+
														
 
															+/** Compares 16-bit values for equality.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 == b0) ? 0xffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN == bN) ? 0xffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_eq(const any_int16<N,V1>& a,
														
 
															+                                const any_int16<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_cmp_eq(ra, rb);
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_int16, any_int16)
														
 
															+
														
 
															+/** Compares the values of two int32x4 vectors for equality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 == b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN == bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_eq(const any_int32<N,V1>& a,
														
 
															+                                const any_int32<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_cmp_eq(ra, rb);
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_int32, any_int32)
														
 
															+
														
 
															+/** Compares the values of two int64x2 vectors for equality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 == b0) ? 0xffffffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN == bN) ? 0xffffffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 5}
														
 
															+    @icost{XOP, 1}
														
 
															+    @icost{NEON, 3}
														
 
															+    @icost{ALTIVEC, 3-4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, AVX, 10}
														
 
															+    @icost{XOP, SSE4.1, 2}
														
 
															+    @icost{NEON, 6}
														
 
															+    @icost{ALTIVEC, 6-7}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_eq(const any_int64<N,V1>& a,
														
 
															+                                const any_int64<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_cmp_eq(ra, rb);
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_int64, any_int64)
														
 
															+
														
 
															+/** Compares the values of two float32x4 vectors for equality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 == b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN == bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_float32<N,expr_empty> cmp_eq(const any_float32<N,V1>& a,
														
 
															+                                  const any_float32<N,V2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_eq(a.wrapped().eval(), b.wrapped().eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_float32, any_float32)
														
 
															+
														
 
															+/** Compares the values of two float64x2 vectors for equality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 == b0) ? 0xffffffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN == bN) ? 0xffffffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_float64<N,expr_empty> cmp_eq(const any_float64<N,V1>& a,
														
 
															+                                  const any_float64<N,V2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_eq(a.wrapped().eval(), b.wrapped().eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_eq, mask_float64, any_float64)
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_ge.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_ge.h
@@ -0,0 +1,149 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_CMP_GE_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_CMP_GE_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/cmp_ge.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Compares the values of two signed int16x8 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 >= b0) ? ~0x0 : 0x0
														
 
															+    ...
														
 
															+    rN = (aN >= bN) ? ~0x0 : 0x0
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_ge(const int8<N,E1>& a,
														
 
															+                               const int8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_int8, int8)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_ge(const uint8<N,E1>& a,
														
 
															+                               const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_int8, uint8)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_ge(const int16<N,E1>& a,
														
 
															+                                const int16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_int16, int16)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_ge(const uint16<N,E1>& a,
														
 
															+                                const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_int16, uint16)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_ge(const int32<N,E1>& a,
														
 
															+                                const int32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_int32, int32)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_ge(const uint32<N,E1>& a,
														
 
															+                                const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_int32, uint32)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_ge(const int64<N,E1>& a,
														
 
															+                                const int64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_int64, int64)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_ge(const uint64<N,E1>& a,
														
 
															+                                const uint64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_int64, uint64)
														
 
															+
														
 
															+/** Compares the values of two float32x4 vectors for greater-than or equal
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 >= b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN >= bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float32<N,expr_empty> cmp_ge(const float32<N,E1>& a,
														
 
															+                                  const float32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_float32, float32)
														
 
															+
														
 
															+/** Compares the values of two float64x2 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 >= b0) ? 0xffffffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN >= bN) ? 0xffffffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float64<N,expr_empty> cmp_ge(const float64<N,E1>& a,
														
 
															+                                  const float64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_ge(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_ge, mask_float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_gt.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_gt.h
@@ -0,0 +1,248 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_CMP_GT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_CMP_GT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/cmp_gt.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Compares the values of two signed int16x8 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_gt(const int8<N,E1>& a,
														
 
															+                               const int8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_int8, int8)
														
 
															+
														
 
															+
														
 
															+/** Compares the values of two unsigned int16x8 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 3-4}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6-7}
														
 
															+    @icost{AVX2, 3-4}
														
 
															+    @icost{XOP, 2}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_gt(const uint8<N,E1>& a,
														
 
															+                               const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_int8, uint8)
														
 
															+
														
 
															+/** Compares the values of two signed int16x8 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_gt(const int16<N,E1>& a,
														
 
															+                                const int16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_int16, int16)
														
 
															+
														
 
															+/** Compares the values of two unsigned int16x8 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 3-4}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6-7}
														
 
															+    @icost{AVX2, 3-4}
														
 
															+    @icost{XOP, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_gt(const uint16<N,E1>& a,
														
 
															+                                const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_int16, uint16)
														
 
															+
														
 
															+/** Compares the values of two signed int32x4 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_gt(const int32<N,E1>& a,
														
 
															+                                const int32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_int32, int32)
														
 
															+
														
 
															+/** Compares the values of two unsigned int32x4 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 3-4}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6-7}
														
 
															+    @icost{AVX2, 3-4}
														
 
															+    @icost{XOP, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_gt(const uint32<N,E1>& a,
														
 
															+                                const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_int32, uint32)
														
 
															+
														
 
															+/** Compares the values of two signed int64 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    Supported since AVX2, NEON64. Not supported on ALTIVEC.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_gt(const int64<N,E1>& a,
														
 
															+                                const int64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_int64, int64)
														
 
															+
														
 
															+/** Compares the values of two unsigned int64 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    Supported since AVX2, NEON64. Not supported on ALTIVEC.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_gt(const uint64<N,E1>& a,
														
 
															+                                const uint64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_int64, uint64)
														
 
															+
														
 
															+/** Compares the values of two float32x4 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2>
														
 
															+mask_float32<N,expr_empty> cmp_gt(const float32<N,E1>& a,
														
 
															+                                  const float32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_float32, float32)
														
 
															+
														
 
															+/** Compares the values of two float64x2 vectors for greater-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffffffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffffffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float64<N,expr_empty> cmp_gt(const float64<N,E1>& a,
														
 
															+                                  const float64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_gt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_gt, mask_float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_le.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_le.h
@@ -0,0 +1,141 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_CMP_LE_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_CMP_LE_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/cmp_le.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_le(const int8<N,E1>& a,
														
 
															+                               const int8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_int8, int8)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_le(const uint8<N,E1>& a,
														
 
															+                               const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_int8, uint8)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_le(const int16<N,E1>& a,
														
 
															+                                const int16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_int16, int16)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_le(const uint16<N,E1>& a,
														
 
															+                                const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_int16, uint16)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_le(const int32<N,E1>& a,
														
 
															+                                const int32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_int32, int32)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_le(const uint32<N,E1>& a,
														
 
															+                                const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_int32, uint32)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_le(const int64<N,E1>& a,
														
 
															+                                const int64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_int64, int64)
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_le(const uint64<N,E1>& a,
														
 
															+                                const uint64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_int64, uint64)
														
 
															+
														
 
															+/** Compares the values of two float32x4 vectors for less-than or equal
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 <= b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN <= bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float32<N,expr_empty> cmp_le(const float32<N,E1>& a,
														
 
															+                                        const float32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_float32, float32)
														
 
															+
														
 
															+/** Compares the values of two float64x2 vectors for less-than or equal
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 <= b0) ? 0xffffffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN <= bN) ? 0xffffffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float64<N,expr_empty> cmp_le(const float64<N,E1>& a,
														
 
															+                                        const float64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_le(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_le, mask_float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_lt.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_lt.h
@@ -0,0 +1,246 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_CMP_LT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_CMP_LT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/cmp_lt.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Compares the values of two signed int8x16 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 < b0) ? 0xff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN < bN) ? 0xff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_lt(const int8<N,E1>& a,
														
 
															+                               const int8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_int8, int8)
														
 
															+
														
 
															+/** Compares the values of two unsigned int8x16 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 < b0) ? 0xff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN < bN) ? 0xff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 3-4}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6-7}
														
 
															+    @icost{AVX2, 3-4}
														
 
															+    @icost{XOP, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_lt(const uint8<N,E1>& a,
														
 
															+                               const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_int8, uint8)
														
 
															+
														
 
															+/** Compares the values of two signed int16x8 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 < b0) ? 0xffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN < bN) ? 0xffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_lt(const int16<N,E1>& a,
														
 
															+                                const int16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_int16, int16)
														
 
															+
														
 
															+/** Compares the values of two unsigned int16x8 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 < b0) ? 0xffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN < bN) ? 0xffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 3-4}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6-7}
														
 
															+    @icost{AVX2, 3-4}
														
 
															+    @icost{XOP, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_lt(const uint16<N,E1>& a,
														
 
															+                                const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_int16, uint16)
														
 
															+
														
 
															+/** Compares the values of two signed int32x4 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 < b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN < bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_lt(const int32<N,E1>& a,
														
 
															+                                const int32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_int32, int32)
														
 
															+
														
 
															+/** Compares the values of two unsigned int32x4 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 < b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN < bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 3-4}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 6-7}
														
 
															+    @icost{AVX2, 3-4}
														
 
															+    @icost{XOP, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_lt(const uint32<N,E1>& a,
														
 
															+                                const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_int32, uint32)
														
 
															+
														
 
															+/** Compares the values of two signed int64 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    Supported since AVX2, NEON64. Not supported on ALTIVEC.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_lt(const int64<N,E1>& a,
														
 
															+                                const int64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_int64, int64)
														
 
															+
														
 
															+/** Compares the values of two unsigned int64 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 > b0) ? 0xffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN > bN) ? 0xffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    Supported since AVX2, NEON64. Not supported on ALTIVEC.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_lt(const uint64<N,E1>& a,
														
 
															+                                const uint64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_int64, uint64)
														
 
															+
														
 
															+/** Compares the values of two float32x4 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 < b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN < bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float32<N,expr_empty> cmp_lt(const float32<N,E1>& a,
														
 
															+                                  const float32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_float32, float32)
														
 
															+
														
 
															+/** Compares the values of two float64x2 vectors for less-than
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 < b0) ? 0xffffffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN < bN) ? 0xffffffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float64<N,expr_empty> cmp_lt(const float64<N,E1>& a,
														
 
															+                                  const float64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_lt(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(cmp_lt, mask_float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_neq.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/cmp_neq.h
@@ -0,0 +1,196 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_CMP_NEQ_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_CMP_NEQ_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/cmp_neq.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Compares the values of two int8x16 vectors for inequality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 != b0) ? 0xff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN != bN) ? 0xff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 4}
														
 
															+    @icost{AVX2, 2}
														
 
															+    @icost{XOP, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_int8<N,expr_empty> cmp_neq(const any_int8<N,V1>& a,
														
 
															+                                const any_int8<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_cmp_neq(ra, rb);
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int8, any_int8)
														
 
															+
														
 
															+/** Compares the values of two int16x8 vectors for inequality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 != b0) ? 0xffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN != bN) ? 0xffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 4}
														
 
															+    @icost{AVX2, 2}
														
 
															+    @icost{XOP, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_int16<N,expr_empty> cmp_neq(const any_int16<N,V1>& a,
														
 
															+                                 const any_int16<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_cmp_neq(ra, rb);
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int16, any_int16)
														
 
															+
														
 
															+/** Compares the values of two int32x4 vectors for inequality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 != b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN != bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+    @icost{XOP, 1}
														
 
															+
														
 
															+    @par 256-bit version
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 4}
														
 
															+    @icost{AVX2, 2}
														
 
															+    @icost{XOP, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_int32<N,expr_empty> cmp_neq(const any_int32<N,V1>& a,
														
 
															+                                 const any_int32<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_cmp_neq(ra, rb);
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int32, any_int32)
														
 
															+
														
 
															+/** Compares the values of two int64x2 vectors for inequality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 != b0) ? 0xffffffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN != bN) ? 0xffffffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 5}
														
 
															+    @icost{SSE4.1, AVX, 2}
														
 
															+    @icost{XOP, 1}
														
 
															+    @icost{NEON, 4}
														
 
															+    @icost{ALTIVEC, 3-5}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, AVX, 10}
														
 
															+    @icost{SSE4.1, NEON, 4}
														
 
															+    @icost{AVX2, XOP, 2}
														
 
															+    @icost{NEON, 8}
														
 
															+    @icost{ALTIVEC, 6-8}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_int64<N,expr_empty> cmp_neq(const any_int64<N,V1>& a,
														
 
															+                                 const any_int64<N,V2>& b)
														
 
															+{
														
 
															+    typename detail::get_expr2_nosign<V1, V2>::type ra, rb;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_cmp_neq(ra, rb);
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_int64, any_int64)
														
 
															+
														
 
															+/** Compares the values of two float32x4 vectors for inequality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 != b0) ? 0xffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN != bN) ? 0xffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+
														
 
															+    @par 256-bit version
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @icost{NEON, ALTIVEC, 4}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_float32<N,expr_empty> cmp_neq(const any_float32<N,V1>& a,
														
 
															+                                         const any_float32<N,V2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_neq(a.wrapped().eval(), b.wrapped().eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_float32, any_float32)
														
 
															+
														
 
															+/** Compares the values of two float64x2 vectors for inequality
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 != b0) ? 0xffffffffffffffff : 0x0
														
 
															+    ...
														
 
															+    rN = (aN != bN) ? 0xffffffffffffffff : 0x0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+mask_float64<N,expr_empty> cmp_neq(const any_float64<N,V1>& a,
														
 
															+                                         const any_float64<N,V2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_cmp_neq(a.wrapped().eval(), b.wrapped().eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(cmp_neq, mask_float64, any_float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/combine.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/combine.h
@@ -0,0 +1,97 @@
 
															+/*  Copyright (C) 2011-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_COMBINE_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_COMBINE_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/combine.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Combines two vectors into one twice as large. This function is useful when
														
 
															+    the ISA supports multiple vector sizes and the user does some operations
														
 
															+    with vectors that are narrower than the widest native vector.
														
 
															+
														
 
															+    For example, on AVX, two __m128 vectors can be combined into a __m256
														
 
															+    vector.
														
 
															+
														
 
															+    @todo icost
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint8<N*2> combine(const uint8<N,E1>& a1, const uint8<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<uint8<N*2>>(a1.eval(), a2.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint16<N*2> combine(const uint16<N,E1>& a1, const uint16<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<uint16<N*2>>(a1.eval(), a2.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint32<N*2> combine(const uint32<N,E1>& a1, const uint32<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<uint32<N*2>>(a1.eval(), a2.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint64<N*2> combine(const uint64<N,E1>& a1, const uint64<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<uint64<N*2>>(a1.eval(), a2.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int8<N*2> combine(const int8<N,E1>& a1, const int8<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<uint8<N*2>>(uint8<N>(a1.eval()),
														
 
															+                                               uint8<N>(a2.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int16<N*2> combine(const int16<N,E1>& a1, const int16<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<uint16<N*2>>(uint16<N>(a1.eval()),
														
 
															+                                                uint16<N>(a2.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int32<N*2> combine(const int32<N,E1>& a1, const int32<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<uint32<N*2>>(uint32<N>(a1.eval()),
														
 
															+                                                uint32<N>(a2.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int64<N*2> combine(const int64<N,E1>& a1, const int64<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<uint64<N*2>>(uint64<N>(a1.eval()),
														
 
															+                                                uint64<N>(a2.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float32<N*2> combine(const float32<N,E1>& a1, const float32<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<float32<N*2>>(a1.eval(), a2.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float64<N*2> combine(const float64<N,E1>& a1, const float64<N,E2>& a2)
														
 
															+{
														
 
															+    return detail::insn::i_combine<float64<N*2>>(a1.eval(), a2.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/detail/get_expr_bitwise.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/detail/get_expr_bitwise.h
@@ -0,0 +1,169 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_DETAIL_GET_EXPR_BITWISE_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_DETAIL_GET_EXPR_BITWISE_H
														
 
															+
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+namespace detail {
														
 
															+
														
 
															+
														
 
															+/*  We want to reduce the number of overloads that need to be created in order
														
 
															+    to match a specific case of an expression tree containing 'bit_and',
														
 
															+    or 'bit_andnot'. nodes. The following "optimizations"
														
 
															+    are performed:
														
 
															+
														
 
															+      * If the parameters are types have different element sizes then both
														
 
															+        expression arguments have the same type as the expression itself, except
														
 
															+        that signed vectors are converted to unsigned vectors.
														
 
															+      * Otherwise if the expression is of a mask type then both types are the
														
 
															+        same as the expression itself.
														
 
															+      * Otherwise both types have the same type as the expression itself, except
														
 
															+        that signed vectors are converted to unsigned vectors and if the second
														
 
															+        type is a mask type then it is converted to floating-point mask if the
														
 
															+        expression is floating-point expression and to integer mask otherwise.
														
 
															+
														
 
															+     So, as a result, the following tuples of types will appear as the arguments
														
 
															+     of the returned expression:
														
 
															+
														
 
															+      * mask_int8, mask_int8
														
 
															+      * uint8, mask_int8
														
 
															+      * uint8, uint8
														
 
															+      * mask_int16, mask_int16
														
 
															+      * uint16, mask_int16
														
 
															+      * uint16, uint16
														
 
															+      * mask_int32, mask_int32
														
 
															+      * uint32, mask_int32
														
 
															+      * uint32, uint32
														
 
															+      * mask_int64, mask_int64
														
 
															+      * uint64, mask_int64
														
 
															+      * uint64, uint64
														
 
															+      * mask_float32, mask_float32
														
 
															+      * float32, mask_float32
														
 
															+      * float32, float32
														
 
															+      * mask_float64, mask_float64
														
 
															+      * float64, mask_float64
														
 
															+      * float64, float64
														
 
															+
														
 
															+    The type of the returned expression is governed by the usual rules
														
 
															+    (see simdpp/types/tag.h)
														
 
															+*/
														
 
															+
														
 
															+template<class V1, class V2>
														
 
															+struct get_expr_bitwise2_and_impl {
														
 
															+    using tags = expr2_maybe_scalar_tags<V1, V2>;
														
 
															+
														
 
															+    // (size_tag) get the size tag of the resulting expression
														
 
															+    static const unsigned size_tag = tags::v1_size_tag > tags::v2_size_tag
														
 
															+                                    ? tags::v1_size_tag : tags::v2_size_tag;
														
 
															+
														
 
															+    // (type_tag) get the type tag of the expression. We compute it in the same
														
 
															+    // way get_expr2 computes them, i.e.
														
 
															+    // type_tag == get_expr2<V1,V2>::type::type_tag
														
 
															+    static const unsigned type_tag_t1 = tags::v1_type_tag > tags::v2_type_tag
														
 
															+                                    ? tags::v1_type_tag : tags::v2_type_tag;
														
 
															+    static const bool is_mask_op1 = type_tag_t1 == SIMDPP_TAG_MASK_INT ||
														
 
															+                                    type_tag_t1 == SIMDPP_TAG_MASK_FLOAT;
														
 
															+    static const unsigned type_tag = (is_mask_op1 && tags::v1_size_tag != tags::v2_size_tag)
														
 
															+                                    ? SIMDPP_TAG_UINT : type_tag_t1;
														
 
															+
														
 
															+    // strip signed integer types
														
 
															+    static const unsigned v1_type_tag = type_tag == SIMDPP_TAG_INT ? SIMDPP_TAG_UINT : type_tag;
														
 
															+
														
 
															+
														
 
															+    static const bool is_v2_mask = tags::v2_type_tag == SIMDPP_TAG_MASK_INT ||
														
 
															+                                   tags::v2_type_tag == SIMDPP_TAG_MASK_FLOAT;
														
 
															+    static const bool is_v1_float = type_tag == SIMDPP_TAG_FLOAT ||
														
 
															+                                     type_tag == SIMDPP_TAG_MASK_FLOAT;
														
 
															+
														
 
															+    // if second parameter is a mask, then:
														
 
															+    //    - convert the mask to float mask if the expression is float
														
 
															+    //    - convert the mask to integer mask otherwise
														
 
															+    static const unsigned v2_type_tag = (!is_v2_mask) ? v1_type_tag :
														
 
															+                                        is_v1_float ? SIMDPP_TAG_MASK_FLOAT :
														
 
															+                                        SIMDPP_TAG_MASK_INT;
														
 
															+
														
 
															+    using v1_final_type = typename type_of_tag<v1_type_tag + size_tag,
														
 
															+                                               tags::length_bytes, void>::type;
														
 
															+    using v2_final_type = typename type_of_tag<v2_type_tag + size_tag,
														
 
															+                                               tags::length_bytes, void>::type;
														
 
															+};
														
 
															+
														
 
															+template<template<class, class> class E, class V1, class V2>
														
 
															+struct get_expr_bitwise2_and {
														
 
															+    using impl = get_expr_bitwise2_and_impl<V1, V2>;
														
 
															+    using type = typename type_of_tag<impl::type_tag + impl::size_tag,
														
 
															+                                      impl::tags::length_bytes,
														
 
															+                                      E<V1, V2>>::type;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+/*  The case with bit_or is similar to bit_and and bit_andnot except that the
														
 
															+    expression types are either leave two masks or none.
														
 
															+
														
 
															+      * Both expression arguments have the same type as the expression itself,
														
 
															+        except that signed vectors are converted to unsigned vectors.
														
 
															+
														
 
															+     So, as a result, the following tuples of types will appear as the arguments
														
 
															+     of the returned expression:
														
 
															+
														
 
															+      * mask_int8, mask_int8
														
 
															+      * uint8, uint8
														
 
															+      * mask_int16, mask_int16
														
 
															+      * uint16, uint16
														
 
															+      * mask_int32, mask_int32
														
 
															+      * uint32, uint32
														
 
															+      * mask_int64, mask_int64
														
 
															+      * uint64, uint64
														
 
															+      * mask_float32, mask_float32
														
 
															+      * float32, float32
														
 
															+      * mask_float64, mask_float64
														
 
															+      * float64, float64
														
 
															+
														
 
															+    The type of the returned expression is governed by the usual rules
														
 
															+    (see simdpp/types/tag.h)
														
 
															+*/
														
 
															+
														
 
															+template<class V1, class V2>
														
 
															+class get_expr_bit_or {
														
 
															+
														
 
															+    // (size_tag) get the size tag of the resulting expression
														
 
															+    static const unsigned size_tag = V1::size_tag > V2::size_tag ? V1::size_tag : V2::size_tag;
														
 
															+
														
 
															+    // (type_tag) get the type tag of the expression. We compute it in the same
														
 
															+    // way get_expr2 computes them, i.e.
														
 
															+    // type_tag == get_expr2<V1,V2>::type::type_tag
														
 
															+    static const unsigned type_tag_t1 = V1::type_tag > V2::type_tag ? V1::type_tag : V2::type_tag;
														
 
															+    static const bool is_mask_op1 = type_tag_t1 == SIMDPP_TAG_MASK_INT ||
														
 
															+                                    type_tag_t1 == SIMDPP_TAG_MASK_FLOAT;
														
 
															+    static const unsigned type_tag = (is_mask_op1 && V1::size_tag != V2::size_tag)
														
 
															+                                    ? SIMDPP_TAG_UINT : type_tag_t1;
														
 
															+
														
 
															+    // strip signed integer types
														
 
															+    static const unsigned v12_type_tag = type_tag == SIMDPP_TAG_INT ? SIMDPP_TAG_UINT : type_tag;
														
 
															+
														
 
															+
														
 
															+public:
														
 
															+    using v1_final_type = typename type_of_tag<v12_type_tag + size_tag,
														
 
															+                                               V1::length_bytes, void>::type;
														
 
															+    using v2_final_type = typename type_of_tag<v12_type_tag + size_tag,
														
 
															+                                               V1::length_bytes, void>::type;
														
 
															+
														
 
															+    using type = typename type_of_tag<type_tag + size_tag, V1::length_bytes,
														
 
															+                                      expr_bit_or<V1, V2>>::type;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+
														
 
															+} // namespace detail
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/detail/get_expr_uint.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/detail/get_expr_uint.h
@@ -0,0 +1,233 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_DETAIL_GET_EXPR_UINT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_DETAIL_GET_EXPR_UINT_H
														
 
															+
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+namespace detail {
														
 
															+
														
 
															+
														
 
															+/*  We want to reduce the number of overloads that need to be created in order
														
 
															+    to match a specific case of an expression tree containing various integer
														
 
															+    operation nodes, such as add(int), mul_lo(int), etc. For particular
														
 
															+    vector size each of these operations are equivalent regardless of the
														
 
															+    argument types. Thus we simply convert the arguments of the expression to
														
 
															+    uint expressions of certain configuration.
														
 
															+
														
 
															+    As a result, the following tuples of types will appear as the arguments
														
 
															+     of the returned expression:
														
 
															+
														
 
															+      * uint8, uint8
														
 
															+      * uint16, uint16
														
 
															+      * uint32, uint32
														
 
															+      * uint64, uint64
														
 
															+*/
														
 
															+
														
 
															+template<class V1, class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags<int, V2> {
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v1_size_tag = V2::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V2::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags<long, V2> {
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v1_size_tag = V2::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V2::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags<long long, V2> {
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v1_size_tag = V2::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V2::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags<unsigned, V2> {
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_UINT;
														
 
															+    static const unsigned v1_size_tag = V2::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V2::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags<unsigned long, V2> {
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_UINT;
														
 
															+    static const unsigned v1_size_tag = V2::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V2::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags<unsigned long long, V2> {
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_UINT;
														
 
															+    static const unsigned v1_size_tag = V2::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V2::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags<float, V2> {
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v1_size_tag = V2::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V2::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V2>
														
 
															+struct expr2_uint_maybe_scalar_tags<double, V2> {
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v1_size_tag = V2::size_tag;
														
 
															+    static const unsigned v2_type_tag = V2::type_tag;
														
 
															+    static const unsigned v2_size_tag = V2::size_tag;
														
 
															+    static const unsigned length_bytes = V2::length_bytes;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+template<class V1>
														
 
															+struct expr2_uint_maybe_scalar_tags<V1, int> {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v2_size_tag = V1::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V1>
														
 
															+struct expr2_uint_maybe_scalar_tags<V1, long> {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v2_size_tag = V1::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V1>
														
 
															+struct expr2_uint_maybe_scalar_tags<V1, long long> {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v2_size_tag = V1::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V1>
														
 
															+struct expr2_uint_maybe_scalar_tags<V1, unsigned> {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_UINT;
														
 
															+    static const unsigned v2_size_tag = V1::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V1>
														
 
															+struct expr2_uint_maybe_scalar_tags<V1, unsigned long> {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_UINT;
														
 
															+    static const unsigned v2_size_tag = V1::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V1>
														
 
															+struct expr2_uint_maybe_scalar_tags<V1, unsigned long long> {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_UINT;
														
 
															+    static const unsigned v2_size_tag = V1::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V1>
														
 
															+struct expr2_uint_maybe_scalar_tags<V1, float> {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v2_size_tag = V1::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V1>
														
 
															+struct expr2_uint_maybe_scalar_tags<V1, double> {
														
 
															+    static const unsigned v1_type_tag = V1::type_tag;
														
 
															+    static const unsigned v1_size_tag = V1::size_tag;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_INT;
														
 
															+    static const unsigned v2_size_tag = V1::size_tag;
														
 
															+    static const unsigned length_bytes = V1::length_bytes;
														
 
															+};
														
 
															+
														
 
															+template<class V1, class V2>
														
 
															+struct get_expr_uint_impl {
														
 
															+    using tags = expr2_uint_maybe_scalar_tags<V1, V2>;
														
 
															+
														
 
															+#if SIMDPP_EXPR_DEBUG
														
 
															+    static_assert(tags::v1_size_tag == tags::v2_size_tag, "Mismatching vector sizes");
														
 
															+    static_assert(tags::v1_type_tag == SIMDPP_TAG_MASK_INT ||
														
 
															+                  tags::v1_type_tag == SIMDPP_TAG_UINT ||
														
 
															+                  tags::v1_type_tag == SIMDPP_TAG_INT, "Incorrect type parameter");
														
 
															+    static_assert(tags::v2_type_tag == SIMDPP_TAG_MASK_INT ||
														
 
															+                  tags::v2_type_tag == SIMDPP_TAG_UINT ||
														
 
															+                  tags::v2_type_tag == SIMDPP_TAG_INT, "Incorrect type parameter");
														
 
															+#endif
														
 
															+
														
 
															+    // the size tag of the expression
														
 
															+    static const unsigned size_tag = tags::v1_size_tag;
														
 
															+
														
 
															+    // (type_tag) get the type tag of the expression. Pretty much the same as
														
 
															+    // get_expr2_nomask does
														
 
															+    static const unsigned type_tag_t1 = tags::v1_type_tag > tags::v2_type_tag ? tags::v1_type_tag : tags::v2_type_tag;
														
 
															+    static const unsigned type_tag = (type_tag_t1 == SIMDPP_TAG_MASK_INT) ? SIMDPP_TAG_UINT : type_tag_t1;
														
 
															+
														
 
															+    // strip signed integer types and masks
														
 
															+    static const unsigned v1_type_tag = SIMDPP_TAG_UINT;
														
 
															+    static const unsigned v2_type_tag = SIMDPP_TAG_UINT;
														
 
															+
														
 
															+    using v1_final_type = typename type_of_tag<v1_type_tag + size_tag,
														
 
															+                                               tags::length_bytes, void>::type;
														
 
															+    using v2_final_type = typename type_of_tag<v2_type_tag + size_tag,
														
 
															+                                               tags::length_bytes, void>::type;
														
 
															+};
														
 
															+
														
 
															+template<template<class, class> class E, class V1, class V2>
														
 
															+struct get_expr_uint {
														
 
															+    using impl = get_expr_uint_impl<V1, V2>;
														
 
															+
														
 
															+    using type = typename type_of_tag<impl::type_tag + impl::size_tag,
														
 
															+                                      impl::tags::length_bytes,
														
 
															+                                      E<V1, V2>>::type;
														
 
															+};
														
 
															+
														
 
															+} // namespace detail
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/detail/scalar_arg_impl.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/detail/scalar_arg_impl.h
@@ -0,0 +1,216 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_DETAIL_SCALAR_ARG_IMPL_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_DETAIL_SCALAR_ARG_IMPL_H
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/expr.h>
														
 
															+#include <simdpp/core/make_float.h>
														
 
															+#include <simdpp/core/make_int.h>
														
 
															+#include <simdpp/core/make_uint.h>
														
 
															+#include <simdpp/detail/expr/scalar.h>
														
 
															+
														
 
															+/*  The following implements the boilerplate for binary function wrappers that
														
 
															+    accept values as scalar arguments.
														
 
															+*/
														
 
															+
														
 
															+// simple implementation returning empty expression
														
 
															+#define SIMDPP_SCALAR_ARG_IMPL_VEC_IMPL(FUNC, RET_VEC, EXPR, NEW_VEC)                                                                   \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const unsigned& a,              const EXPR<N,V>& b) { return FUNC(make_uint<NEW_VEC>(a), b); }   \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const unsigned long& a,         const EXPR<N,V>& b) { return FUNC(make_uint<NEW_VEC>(a), b); }   \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const unsigned long long& a,    const EXPR<N,V>& b) { return FUNC(make_uint<NEW_VEC>(a), b); }   \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const int& a,                   const EXPR<N,V>& b) { return FUNC(make_int<NEW_VEC>(a), b); }    \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const long& a,                  const EXPR<N,V>& b) { return FUNC(make_int<NEW_VEC>(a), b); }    \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const long long& a,             const EXPR<N,V>& b) { return FUNC(make_int<NEW_VEC>(a), b); }    \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const float& a,                 const EXPR<N,V>& b) { return FUNC(make_float<NEW_VEC>(a), b); }  \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const double& a,                const EXPR<N,V>& b) { return FUNC(make_float<NEW_VEC>(a), b); }  \
														
 
															+                                                                                                                                                        \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const EXPR<N,V>& a, const unsigned& b          ) { return FUNC(a, make_uint<NEW_VEC>(b)); }   \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const EXPR<N,V>& a, const unsigned long& b     ) { return FUNC(a, make_uint<NEW_VEC>(b)); }   \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const EXPR<N,V>& a, const unsigned long long& b) { return FUNC(a, make_uint<NEW_VEC>(b)); }   \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const EXPR<N,V>& a, const int& b               ) { return FUNC(a, make_int<NEW_VEC>(b)); }    \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const EXPR<N,V>& a, const long& b              ) { return FUNC(a, make_int<NEW_VEC>(b)); }    \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const EXPR<N,V>& a, const long long& b         ) { return FUNC(a, make_int<NEW_VEC>(b)); }    \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const EXPR<N,V>& a, const float& b             ) { return FUNC(a, make_float<NEW_VEC>(b)); }  \
														
 
															+template<unsigned N, class V> SIMDPP_INL RET_VEC<N,expr_empty> FUNC(const EXPR<N,V>& a, const double& b            ) { return FUNC(a, make_float<NEW_VEC>(b)); }
														
 
															+// end #define
														
 
															+
														
 
															+
														
 
															+#define SIMDPP_SCALAR_ARG_IMPL_VEC(FUNC, RET_VEC, VEC) \
														
 
															+    SIMDPP_SCALAR_ARG_IMPL_VEC_IMPL(FUNC, RET_VEC, VEC, VEC<N>)
														
 
															+#define SIMDPP_SCALAR_ARG_IMPL_VEC_EXPR(FUNC, RET_VEC, EXPR) \
														
 
															+    SIMDPP_SCALAR_ARG_IMPL_VEC_IMPL(FUNC, RET_VEC, EXPR, typename detail::get_expr<V>::type)
														
 
															+// end #define
														
 
															+
														
 
															+// implementation returning an expression for vector arguments
														
 
															+#define SIMDPP_SCALAR_ARG_IMPL_EXPR(FUNC, EXPR, RET_VEC, VEC)                   \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<unsigned, VEC<N,V>>>                                            \
														
 
															+    FUNC(const unsigned& a, const VEC<N,V>& b)                                  \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<unsigned long, VEC<N,V>>>                                       \
														
 
															+    FUNC(const unsigned long& a, const VEC<N,V>& b)                             \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<unsigned long long, VEC<N,V>>>                                  \
														
 
															+    FUNC(const unsigned long long& a, const VEC<N,V>& b)                        \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<int, VEC<N,V>>>                                                 \
														
 
															+    FUNC(const int& a, const VEC<N,V>& b)                                       \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<long, VEC<N,V>>>                                                \
														
 
															+    FUNC(const long& a, const VEC<N,V>& b)                                      \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<long long, VEC<N,V>>>                                           \
														
 
															+    FUNC(const long long& a, const VEC<N,V>& b)                                 \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<float, VEC<N,V>>>                                               \
														
 
															+    FUNC(const float& a, const VEC<N,V>& b)                                     \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<double, VEC<N,V>>>                                              \
														
 
															+    FUNC(const double& a, const VEC<N,V>& b)                                    \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<VEC<N,V>, unsigned>>                                            \
														
 
															+    FUNC(const VEC<N,V>& a, const unsigned& b)                                  \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<VEC<N,V>, unsigned long>>                                       \
														
 
															+    FUNC(const VEC<N,V>& a, const unsigned long& b)                             \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<VEC<N,V>, unsigned long long>>                                  \
														
 
															+    FUNC(const VEC<N,V>& a, const unsigned long long& b)                        \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<VEC<N,V>, int>>                                                 \
														
 
															+    FUNC(const VEC<N,V>& a, const int& b)                                       \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<VEC<N,V>, long>>                                                \
														
 
															+    FUNC(const VEC<N,V>& a, const long& b)                                      \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<VEC<N,V>, long long>>                                           \
														
 
															+    FUNC(const VEC<N,V>& a, const long long& b)                                 \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<VEC<N,V>, float>>                                               \
														
 
															+    FUNC(const VEC<N,V>& a, const float& b)                                     \
														
 
															+{ return { { a, b } }; }                                                        \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+RET_VEC<N, EXPR<VEC<N,V>, double>>                                              \
														
 
															+    FUNC(const VEC<N,V>& a, const double& b)                                    \
														
 
															+{ return { { a, b } }; }
														
 
															+// end #define
														
 
															+
														
 
															+// a implementation for integer operations that use get_expr_uint
														
 
															+#define SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(FUNC, EXPR, VEC, INT_VEC)           \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, V, unsigned>::type                         \
														
 
															+        FUNC(const VEC<N,V>& a, const unsigned& b)                              \
														
 
															+{ return { { a.wrapped(), b } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, V, unsigned long>::type                    \
														
 
															+        FUNC(const VEC<N,V>& a, const unsigned long& b)                         \
														
 
															+{ return { { a.wrapped(), b } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, V, unsigned long long>::type               \
														
 
															+        FUNC(const VEC<N,V>& a, const unsigned long long& b)                    \
														
 
															+{ return { { a.wrapped(), b } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, V, int>::type                              \
														
 
															+        FUNC(const VEC<N,V>& a, const int& b)                                   \
														
 
															+{ return { { a.wrapped(), b } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, V, long>::type                             \
														
 
															+        FUNC(const VEC<N,V>& a, const long& b)                                  \
														
 
															+{ return { { a.wrapped(), b } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, V, long long>::type                        \
														
 
															+        FUNC(const VEC<N,V>& a, const long long& b)                             \
														
 
															+{ return { { a.wrapped(), b } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, V, float>::type                            \
														
 
															+        FUNC(const VEC<N,V>& a, const float& b)                                 \
														
 
															+{ return { { a.wrapped(), b } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, V, double>::type                           \
														
 
															+        FUNC(const VEC<N,V>& a, const double& b)                                \
														
 
															+{ return { { a.wrapped(), b } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, unsigned, V>::type                         \
														
 
															+        FUNC(const unsigned& a, const VEC<N,V>& b)                              \
														
 
															+{ return { { a, b.wrapped() } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, unsigned long, V>::type                    \
														
 
															+        FUNC(const unsigned long& a, const VEC<N,V>& b)                         \
														
 
															+{ return { { a, b.wrapped() } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, unsigned long long, V>::type               \
														
 
															+        FUNC(const unsigned long long& a, const VEC<N,V>& b)                    \
														
 
															+{ return { { a, b.wrapped() } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, int, V>::type                              \
														
 
															+        FUNC(const int& a, const VEC<N,V>& b)                                   \
														
 
															+{ return { { a, b.wrapped() } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, long, V>::type                             \
														
 
															+        FUNC(const long& a, const VEC<N,V>& b)                                  \
														
 
															+{ return { { a, b.wrapped() } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, long long, V>::type                        \
														
 
															+        FUNC(const long long& a, const VEC<N,V>& b)                             \
														
 
															+{ return { { a, b.wrapped() } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, float, V>::type                            \
														
 
															+        FUNC(const float& a, const VEC<N,V>& b)                                 \
														
 
															+{ return { { a, b.wrapped() } }; }                                              \
														
 
															+                                                                                \
														
 
															+template<unsigned N, class V> SIMDPP_INL                                        \
														
 
															+typename detail::get_expr_uint<EXPR, double, V>::type                           \
														
 
															+        FUNC(const double& a, const VEC<N,V>& b)                                \
														
 
															+{ return { { a, b.wrapped() } }; }
														
 
															+// end #define
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/detail/subvec_extract.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/detail/subvec_extract.h
@@ -0,0 +1,103 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_DETAIL_VEC_EXTRACT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_DETAIL_VEC_EXTRACT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/setup_arch.h>
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/core/insert.h>
														
 
															+#include <simdpp/core/extract.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+namespace detail {
														
 
															+
														
 
															+template<class R, class V> SIMDPP_INL
														
 
															+R subvec_extract_impl(const V& a, unsigned n)
														
 
															+{
														
 
															+    static_assert(R::length >= V::base_length, "Too small vector to extract");
														
 
															+
														
 
															+    R r;
														
 
															+    for (unsigned i = 0; i < r.vec_length; ++i) {
														
 
															+        r.vec(i) = a.vec(n*r.vec_length + i);
														
 
															+    }
														
 
															+    return r;
														
 
															+
														
 
															+}
														
 
															+
														
 
															+// extract a sub-vector consisting of [M*n .. M*(n+1)) elements
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+uint8<M> subvec_extract(const uint8<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<uint8<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+uint16<M> subvec_extract(const uint16<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<uint16<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+uint32<M> subvec_extract(const uint32<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<uint32<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+uint64<M> subvec_extract(const uint64<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<uint64<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+int8<M> subvec_extract(const int8<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<int8<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+int16<M> subvec_extract(const int16<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<int16<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+int32<M> subvec_extract(const int32<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<int32<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+int64<M> subvec_extract(const int64<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<int64<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+float32<M> subvec_extract(const float32<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<float32<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+template<unsigned M, unsigned N> SIMDPP_INL
														
 
															+float64<M> subvec_extract(const float64<N>& a, unsigned n)
														
 
															+{
														
 
															+    return subvec_extract_impl<float64<M>>(a, n);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace detail
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/detail/subvec_insert.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/detail/subvec_insert.h
@@ -0,0 +1,62 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_DETAIL_VEC_INSERT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_DETAIL_VEC_INSERT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/setup_arch.h>
														
 
															+#include <simdpp/types.h>
														
 
															+
														
 
															+#include <cstring>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+namespace detail {
														
 
															+
														
 
															+template<class R, class V> SIMDPP_INL
														
 
															+void subvec_insert_impl(R& r, const V& v, unsigned n)
														
 
															+{
														
 
															+    static_assert(V::length >= R::base_length, "Too small vector to insert");
														
 
															+
														
 
															+    for (unsigned i = 0; i < V::vec_length; ++i) {
														
 
															+        r.vec(n*v.vec_length + i) = v.vec(i); //TODO combine or split as needed
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+// Sets the elements [M*n .. M*(n+1)) of @a a to the contents of @a x
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(uint8<N>& a, const uint8<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(uint16<N>& a, const uint16<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(uint32<N>& a, const uint32<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(uint64<N>& a, const uint64<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(int8<N>& a, const int8<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(int16<N>& a, const int16<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(int32<N>& a, const int32<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(int64<N>& a, const int64<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(float32<N>& a, const float32<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+template<unsigned N, unsigned M> SIMDPP_INL
														
 
															+void subvec_insert(float64<N>& a, const float64<M>& x, unsigned n) { subvec_insert_impl(a, x, n); }
														
 
															+
														
 
															+} // namespace detail
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/extract.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/extract.h
@@ -0,0 +1,103 @@
 
															+/*  Copyright (C) 2011-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_EXTRACT_H
														
 
															+#define LIBSIMDPP_SIMD_EXTRACT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/extract.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Extracts the @a id-th element from a vector.
														
 
															+
														
 
															+    @code
														
 
															+    r = a[id]
														
 
															+    @endcode
														
 
															+
														
 
															+    This function may have very high latency.
														
 
															+*/
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+uint8_t extract(const uint8<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+int8_t extract(const int8<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+uint16_t extract(const uint16<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+int16_t extract(const int16<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+uint32_t extract(const uint32<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+int32_t extract(const int32<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+uint64_t extract(const uint64<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+int64_t extract(const int64<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+float extract(const float32<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+double extract(const float64<N>& a)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_extract<id>(a);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/extract_bits.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/extract_bits.h
@@ -0,0 +1,67 @@
 
															+/*  Copyright (C) 2011-2017  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_CORE_EXTRACT_BITS_H
														
 
															+#define LIBSIMDPP_SIMD_CORE_EXTRACT_BITS_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/extract_bits.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Extracts a bit from each byte of each element of a vector containing 8-bit
														
 
															+    elements.
														
 
															+
														
 
															+    This operation is only sensible if each byte within the vector is either
														
 
															+    0x00 or 0xff.
														
 
															+
														
 
															+    @code
														
 
															+    r = ((a[0] & 0x??) ? 0x01 : 0) |
														
 
															+        ((a[1] & 0x??) ? 0x02 : 0) |
														
 
															+        ...
														
 
															+        ((a[15] & 0x??) ? 0x80 : 0)
														
 
															+    @endcode
														
 
															+*/
														
 
															+SIMDPP_INL uint16_t extract_bits_any(const uint8<16>& a)
														
 
															+{
														
 
															+    return detail::insn::i_extract_bits_any(a);
														
 
															+}
														
 
															+SIMDPP_INL uint32_t extract_bits_any(const uint8<32>& a)
														
 
															+{
														
 
															+    return detail::insn::i_extract_bits_any(a);
														
 
															+}
														
 
															+
														
 
															+/** Extracts specific bit from each byte of each element of a int8x16 vector.
														
 
															+
														
 
															+    @code
														
 
															+    r = (a[0] & 0x80 >> 7) | (a[1] & 0x80 >> 6) | ...  | (a[15] & 0x80 << 8)
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned id> SIMDPP_INL
														
 
															+uint16_t extract_bits(const uint8<16>& a)
														
 
															+{
														
 
															+    static_assert(id < 8, "index out of bounds");
														
 
															+    return detail::insn::i_extract_bits<id>(a);
														
 
															+}
														
 
															+template<unsigned id> SIMDPP_INL
														
 
															+uint32_t extract_bits(const uint8<32>& a)
														
 
															+{
														
 
															+    static_assert(id < 8, "index out of bounds");
														
 
															+    return detail::insn::i_extract_bits<id>(a);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_abs.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_abs.h
@@ -0,0 +1,73 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_ABS_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_ABS_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/f_abs.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes absolute value of floating point values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = abs(a0)
														
 
															+    ...
														
 
															+    rN = abs(aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 1-2}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2-3}
														
 
															+    @icost{NEON, 2}
														
 
															+    @icost{AVX-AVX2, 1-2}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N, expr_fabs<float32<N,E>>> abs(const float32<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+/** Computes absolute value of floating point values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = abs(a0)
														
 
															+    ...
														
 
															+    rN = abs(aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-AVX2, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2-3}
														
 
															+    @icost{AVX-AVX2, 1-2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float64<N, expr_fabs<float64<N,E>>> abs(const float64<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_add.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_add.h
@@ -0,0 +1,71 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_ADD_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_ADD_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/f_add.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Adds the values of two vectors
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 + b0
														
 
															+    ...
														
 
															+    rN = aN + bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float32<N, expr_fadd<float32<N,E1>,
														
 
															+                     float32<N,E2>>> add(const float32<N,E1>& a, const float32<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(add, expr_fadd, float32, float32)
														
 
															+
														
 
															+/** Adds the values of two vectors
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 + b0
														
 
															+    ...
														
 
															+    rN = aN + bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float64<N, expr_fadd<float64<N,E1>,
														
 
															+                     float64<N,E2>>> add(const float64<N,E1>& a, const float64<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(add, expr_fadd, float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_ceil.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_ceil.h
@@ -0,0 +1,54 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_CEIL_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_CEIL_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_ceil.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Rounds the values a vector towards positive infinity
														
 
															+
														
 
															+    @code
														
 
															+    r0 = ceil(a0)
														
 
															+    ...
														
 
															+    rN = ceil(aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2, SSE3, SSSE3, 13-15}
														
 
															+    @icost{NEON, 11-13}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2, SSE3, SSSE3, 26-28}
														
 
															+    @icost{NEON, 22-24}
														
 
															+    @icost{ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N,expr_empty> ceil(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_ceil(a.eval());
														
 
															+}
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float64<N,expr_empty> ceil(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_ceil(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_div.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_div.h
@@ -0,0 +1,73 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_DIV_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_DIV_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_div.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Divides the values of two vectors.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 / b0
														
 
															+    ...
														
 
															+    rN = aN / bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @icost{NEON, 6}
														
 
															+    @icost{ALTIVEC, 10}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @icost{NEON, 12}
														
 
															+    @icost{ALTIVEC, 19}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float32<N,expr_empty> div(const float32<N,E1>& a, const float32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_div(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(div, float32, float32)
														
 
															+
														
 
															+/** Divides the values of two vectors
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 / b0
														
 
															+    ...
														
 
															+    rN = aN / bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float64<N,expr_empty> div(const float64<N,E1>& a, const float64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_div(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(div, float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_floor.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_floor.h
@@ -0,0 +1,55 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_FLOOR_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_FLOOR_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <cmath>
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_floor.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Rounds the values of a vector towards negative infinity
														
 
															+
														
 
															+    @code
														
 
															+    r0 = floor(a0)
														
 
															+    ...
														
 
															+    rN = floor(aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 12-14}
														
 
															+    @icost{NEON, 10-11}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 24-26}
														
 
															+    @icost{NEON, 20-21}
														
 
															+    @icost{ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N,expr_empty> floor(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_floor(a.eval());
														
 
															+}
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float64<N,expr_empty> floor(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_floor(a.eval());
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_fmadd.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_fmadd.h
@@ -0,0 +1,56 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_FMADD_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_FMADD_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/f_fmadd.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Performs a fused multiply-add operation
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 * b0 + c0
														
 
															+    ...
														
 
															+    rN = aN * bN + cN
														
 
															+    @endcode
														
 
															+
														
 
															+    Implemented only on architectures with either @c X86_FMA3 or @c X86_FMA4
														
 
															+    support.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2, class E3> SIMDPP_INL
														
 
															+float32<N, expr_fmadd<float32<N,E1>,
														
 
															+                      float32<N,E2>,
														
 
															+                      float32<N,E3>>> fmadd(const float32<N,E1>& a,
														
 
															+                                            const float32<N,E2>& b,
														
 
															+                                            const float32<N,E3>& c)
														
 
															+{
														
 
															+    return { { a, b, c } };
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2, class E3> SIMDPP_INL
														
 
															+float64<N, expr_fmadd<float64<N,E1>,
														
 
															+                      float64<N,E2>,
														
 
															+                      float64<N,E3>>> fmadd(const float64<N,E1>& a,
														
 
															+                                            const float64<N,E2>& b,
														
 
															+                                            const float64<N,E3>& c)
														
 
															+{
														
 
															+    return { { a, b, c } };
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_fmsub.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_fmsub.h
@@ -0,0 +1,56 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_FMSUB_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_FMSUB_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/f_fmsub.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Performs a fused multiply-sutract operation
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 * b0 - c0
														
 
															+    ...
														
 
															+    rN = aN * bN - cN
														
 
															+    @endcode
														
 
															+
														
 
															+    Implemented only on architectures with either @c X86_FMA3 or @c X86_FMA4
														
 
															+    support.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2, class E3> SIMDPP_INL
														
 
															+float32<N, expr_fmsub<float32<N,E1>,
														
 
															+                      float32<N,E2>,
														
 
															+                      float32<N,E3>>> fmsub(const float32<N,E1>& a,
														
 
															+                                            const float32<N,E2>& b,
														
 
															+                                            const float32<N,E3>& c)
														
 
															+{
														
 
															+    return { { a, b, c } };
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E1, class E2, class E3> SIMDPP_INL
														
 
															+float64<N, expr_fmsub<float64<N,E1>,
														
 
															+                      float64<N,E2>,
														
 
															+                      float64<N,E3>>> fmsub(const float64<N,E1>& a,
														
 
															+                                            const float64<N,E2>& b,
														
 
															+                                            const float64<N,E3>& c)
														
 
															+{
														
 
															+    return { { a, b, c } };
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_isnan.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_isnan.h
@@ -0,0 +1,63 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_ISNAN_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_ISNAN_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_isnan.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Checks whether elements in @a a are IEEE754 NaN.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = isnan(a0) ? 0xffffffff : 0
														
 
															+    ...
														
 
															+    rN = isnan(aN) ? 0xffffffff : 0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+mask_float32<N,expr_empty> isnan(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_isnan(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Checks whether elements in @a a are IEEE754 NaN.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = isnan(a0) ? 0xffffffffffffffff : 0
														
 
															+    ...
														
 
															+    rN = isnan(aN) ? 0xffffffffffffffff : 0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+mask_float64<N,expr_empty> isnan(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_isnan(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_isnan2.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_isnan2.h
@@ -0,0 +1,69 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_ISNAN2_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_ISNAN2_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_isnan2.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Checks whether corresponding elements in either @a a or @a b are IEEE754 NaN.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = isnan(a0) || isnan(b0) ? 0xffffffff : 0
														
 
															+    ...
														
 
															+    rN = isnan(aN) || isnan(bN) ? 0xffffffff : 0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, ALTIVEC, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @icost{NEON, ALTIVEC, 6}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float32<N,expr_empty> isnan2(const float32<N,E1>& a, const float32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_isnan2(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+/** Checks whether corresponding elements in either @a a or @a b are IEEE754
														
 
															+    NaN.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = isnan(a0) || isnan(b0) ? 0xffffffffffffffff : 0
														
 
															+    ...
														
 
															+    rN = isnan(aN) || isnan(bN) ? 0xffffffffffffffff : 0
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+mask_float64<N,expr_empty> isnan2(const float64<N,E1>& a, const float64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_isnan2(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_max.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_max.h
@@ -0,0 +1,73 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_MAX_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_MAX_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_max.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes maxima of the values of two vectors. If at least one of the values
														
 
															+    is NaN, or both values are zeroes, it is unspecified which value will be
														
 
															+    returned.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float32<N,expr_empty> max(const float32<N,E1>& a, const float32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, float32, float32)
														
 
															+
														
 
															+/** Computes maxima of the values of two vectors. If at least one of the values
														
 
															+    is NaN, or both values are zeroes, it is unspecified which value will be
														
 
															+    returned.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float64<N,expr_empty> max(const float64<N,E1>& a, const float64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_min.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_min.h
@@ -0,0 +1,74 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_MIN_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_MIN_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_min.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+// note: SSE doesn't provide a way to propagate NaNs in min/max
														
 
															+/** Computes minimum of the values in two vectors. If at least one of the
														
 
															+    values is NaN, or both values are zeroes, it is unspecified which value
														
 
															+    will be returned.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float32<N,expr_empty> min(const float32<N,E1>& a, const float32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, float32, float32)
														
 
															+
														
 
															+/** Computes minima of the values in two vectors. If at least one of the values
														
 
															+    is NaN, or both values are zeroes, it is unspecified which value will be
														
 
															+    returned.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float64<N,expr_empty> min(const float64<N,E1>& a, const float64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_mul.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_mul.h
@@ -0,0 +1,73 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_MUL_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_MUL_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/f_mul.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Multiplies the values of two vectors
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 * b0
														
 
															+    ...
														
 
															+    rN = aN * bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float32<N, expr_fmul<float32<N,E1>,
														
 
															+                     float32<N,E2>>> mul(const float32<N,E1>& a,
														
 
															+                                         const float32<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(mul, expr_fmul, float32, float32)
														
 
															+
														
 
															+/** Multiplies the values of two vectors
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 * b0
														
 
															+    ...
														
 
															+    rN = aN * bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float64<N, expr_fmul<float64<N,E1>,
														
 
															+                     float64<N,E2>>> mul(const float64<N,E1>& a,
														
 
															+                                         const float64<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(mul, expr_fmul, float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_neg.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_neg.h
@@ -0,0 +1,70 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_NEG_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_NEG_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/f_neg.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Negates the values of a float32x4 vector
														
 
															+
														
 
															+    @code
														
 
															+    r0 = -a0
														
 
															+    ...
														
 
															+    rN = -aN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, ALTIVEC, 2-3}
														
 
															+    @icost{AVX-AVX2, NEON, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N, expr_fneg<float32<N,E>>> neg(const float32<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+/** Negates the values of a vector
														
 
															+
														
 
															+    @code
														
 
															+    r0 = -a0
														
 
															+    ...
														
 
															+    rN = -aN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 1-2}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2-3}
														
 
															+    @icost{AVX-AVX2, 1-2}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float64<N, expr_fneg<float64<N,E>>> neg(const float64<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_rcp_e.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_rcp_e.h
@@ -0,0 +1,50 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_RCP_E_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_RCP_E_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_rcp_e.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes approximate reciprocal.
														
 
															+
														
 
															+    Relative error is as follows:
														
 
															+     - 1/2 ULP for NULL and NEON
														
 
															+     - ~1/2730 for SSE2
														
 
															+     - 1/16376 for AVX512
														
 
															+     - 1/4096 for ALTIVEC
														
 
															+     - 1/256 for NEON_FLT_SP
														
 
															+
														
 
															+    @code
														
 
															+    r0 = approx(1.0f / a0)
														
 
															+    ...
														
 
															+    rN = approx(1.0f / aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N,expr_empty> rcp_e(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_rcp_e(a.eval());
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_rcp_rh.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_rcp_rh.h
@@ -0,0 +1,64 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_RCP_RH_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_RCP_RH_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_rcp_rh.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes one Newton-Rhapson iterations for reciprocal. @a x is the current
														
 
															+    estimate, @a a are the values to estimate reciprocal for.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = x0 * (2 - x0*a0)
														
 
															+    ...
														
 
															+    rN = xN * (2 - xN*aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    Using this function, one can the division can be implemented as follows:
														
 
															+    @code
														
 
															+    // a/b
														
 
															+    float32x4 x;
														
 
															+    x = rcp_e(b);
														
 
															+    x = rcp_rh(x, b);
														
 
															+    x = rcp_rh(x, b);
														
 
															+    return mul(a, x);
														
 
															+    @endcode
														
 
															+
														
 
															+    Precision can be controlled by selecting the number of @c rcp_rh steps.
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 3-4}
														
 
															+    @icost{NEON, 2}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{AVX-AVX2, 3-4}
														
 
															+    @icost{SSE2-SSE4.1, 6-7}
														
 
															+    @icost{NEON, 4}
														
 
															+    @icost{ALTIVEC, 4-5}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N,expr_empty> rcp_rh(const float32<N,E>& x, const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_rcp_rh(x.eval(), a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_reduce_add.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_reduce_add.h
@@ -0,0 +1,44 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_REDUCE_ADD_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_REDUCE_ADD_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_reduce_add.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the sum of the elements in the vector.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 + a1 + a2 + ...
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float reduce_add(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+double reduce_add(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_reduce_max.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_reduce_max.h
@@ -0,0 +1,44 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_REDUCE_MAX_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_REDUCE_MAX_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_reduce_max.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the maximum of the elements in the vector.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, a1, a2, ...)
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float reduce_max(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+double reduce_max(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_reduce_min.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_reduce_min.h
@@ -0,0 +1,44 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_REDUCE_MIN_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_REDUCE_MIN_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_reduce_min.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the minimum of the elements in the vector.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, a1, a2, ...)
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float reduce_min(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+double reduce_min(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_reduce_mul.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_reduce_mul.h
@@ -0,0 +1,44 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_REDUCE_MUL_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_REDUCE_MUL_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_reduce_mul.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the product of the elements in the vector.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 * a1 * a2 * ...
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float reduce_mul(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_mul(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+double reduce_mul(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_mul(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_rsqrt_e.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_rsqrt_e.h
@@ -0,0 +1,50 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_RSQRT_E_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_RSQRT_E_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_rsqrt_e.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes approximate reciprocal square root.
														
 
															+
														
 
															+    Relative error is as follows:
														
 
															+     - 1/2 ULP for NULL and NEON
														
 
															+     - ~1/2730 for SSE2
														
 
															+     - 1/16384 for AVX512
														
 
															+     - 1/4096 for ALTIVEC
														
 
															+     - 1/256 for NEON_FLT_SP
														
 
															+
														
 
															+    @code
														
 
															+    r0 = approx(1 / sqrt(a0))
														
 
															+    ...
														
 
															+    rN = approx(1 / sqrt(aN))
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N,expr_empty> rsqrt_e(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_rsqrt_e(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_rsqrt_rh.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_rsqrt_rh.h
@@ -0,0 +1,53 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_RSQRT_RH_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_RSQRT_RH_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_rsqrt_rh.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes one Newton-Rhapson iteration for inverse of square root. @a x is
														
 
															+    the current estimate, @a a are the values to estimate the inverse square
														
 
															+    root for.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = x0 * (3 - a0*x0*x0) * 0.5
														
 
															+    ...
														
 
															+    rN = xN * (3 - aN*xN*xN) * 0.5
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2, SSE3, SSSE3, SSE4.1, 5-7}
														
 
															+    @icost{NEON, 3}
														
 
															+    @icost{ALTIVEC, 4-6}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{AVX-AVX2, 7}
														
 
															+    @icost{SSE2, SSE3, SSSE3, SSE4.1, 10-12}
														
 
															+    @icost{NEON, 6}
														
 
															+    @icost{ALTIVEC, 8-10}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N,expr_empty> rsqrt_rh(const float32<N,E>& x, const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_rsqrt_rh(x.eval(), a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_sign.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_sign.h
@@ -0,0 +1,71 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_SIGN_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_SIGN_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_sign.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Extracts sign bits from the values in float32x4 vector
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 & 0x80000000
														
 
															+    ...
														
 
															+    rN = aN & 0x80000000
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE4.1, ALTIVEC, NEON, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, ALTIVEC, NEON, 2-3}
														
 
															+    @icost{AVX-AVX2, 1-2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N,expr_empty> sign(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_sign(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Extracts sigh bit from the values in float64x2 vector.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 & 0x8000000000000000
														
 
															+    ...
														
 
															+    r0 = aN & 0x8000000000000000
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 1-2}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2-3}
														
 
															+    @icost{AVX-AVX2, 1-2}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float64<N,expr_empty> sign(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_sign(a.eval());
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_sqrt.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_sqrt.h
@@ -0,0 +1,70 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_SQRT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_SQRT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/f_sqrt.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes square root.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = sqrt(a0)
														
 
															+    ...
														
 
															+    rN = sqrt(aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, 5}
														
 
															+    @icost{ALTIVEC, 5-7}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @icost{NEON, 10}
														
 
															+    @icost{ALTIVEC, 10-12}
														
 
															+*/
														
 
															+template<unsigned N, class E1> SIMDPP_INL
														
 
															+float32<N,expr_empty> sqrt(const float32<N,E1>& a)
														
 
															+{
														
 
															+    return detail::insn::i_sqrt(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Computes square root.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = sqrt(a0)
														
 
															+    ...
														
 
															+    rN = sqrt(aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class E1> SIMDPP_INL
														
 
															+float64<N,expr_empty> sqrt(const float64<N,E1>& a)
														
 
															+{
														
 
															+    return detail::insn::i_sqrt(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_sub.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_sub.h
@@ -0,0 +1,74 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_SUB_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_SUB_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/f_sub.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+#include <simdpp/core/detail/get_expr_uint.h>
														
 
															+
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Substracts the values of two vectors
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 - b0
														
 
															+    ...
														
 
															+    rN = aN - bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float32<N, expr_fsub<float32<N,E1>,
														
 
															+                     float32<N,E2>>> sub(const float32<N,E1>& a,
														
 
															+                                         const float32<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(sub, expr_fsub, float32, float32)
														
 
															+
														
 
															+/** Subtracts the values of two vectors
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 - b0
														
 
															+    ...
														
 
															+    rN = aN - bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+float64<N, expr_fsub<float64<N,E1>,
														
 
															+                     float64<N,E2>>> sub(const float64<N,E1>& a,
														
 
															+                                         const float64<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(sub, expr_fsub, float64, float64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/f_trunc.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/f_trunc.h
@@ -0,0 +1,53 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_F_TRUNC_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_F_TRUNC_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <cmath>
														
 
															+#include <simdpp/detail/insn/f_trunc.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Rounds the values of a vector towards zero
														
 
															+    @code
														
 
															+    r0 = trunc(a0)
														
 
															+    ...
														
 
															+    rN = trunc(aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2, SSE3, SSSE3, 7-9}
														
 
															+    @icost{NEON, 5-6}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2, SSE3, SSSE3, 14-16}
														
 
															+    @icost{NEON, 10-11}
														
 
															+    @icost{SSE4.1, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float32<N,expr_empty> trunc(const float32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_trunc(a.eval());
														
 
															+}
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+float64<N,expr_empty> trunc(const float64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_trunc(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/for_each.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/for_each.h
@@ -0,0 +1,41 @@
 
															+/*  Copyright (C) 2017  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_FOR_EACH_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_FOR_EACH_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/for_each.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Executes the given function on all elements of the vector.
														
 
															+
														
 
															+    Equivalent to:
														
 
															+    @code
														
 
															+    function(extract<0>(v));
														
 
															+    function(extract<1>(v));
														
 
															+    ...
														
 
															+    function(extract<N>(v));
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class V, class F> SIMDPP_INL
														
 
															+void for_each(const any_vec<N, V>& v, F function)
														
 
															+{
														
 
															+    detail::for_each(v.wrapped().eval(), function);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_abs.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_abs.h
@@ -0,0 +1,117 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_ABS_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_ABS_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/i_abs.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes absolute value of 8-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = abs(a0)
														
 
															+    ...
														
 
															+    rN = abs(aN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+    @icost{ALTIVEC, 1-3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-AVX, NEON, 2}
														
 
															+    @icost{ALTIVEC, 2-4}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8<N, expr_iabs<int8<N,E>>> abs(const int8<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/** Computes absolute value of 16-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = abs(a0)
														
 
															+    ...
														
 
															+    rN = abs(aN)
														
 
															+    @endcode
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+    @icost{ALTIVEC, 1-3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-AVX, NEON, 2}
														
 
															+    @icost{ALTIVEC, 2-5}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16<N, expr_iabs<int16<N,E>>> abs(const int16<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+/** Computes absolute value of 32-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = abs(a0)
														
 
															+    ...
														
 
															+    rN = abs(aN)
														
 
															+    @endcode
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSE3, 3}
														
 
															+    @icost{ALTIVEC, 1-3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE3, 6}
														
 
															+    @icost{SSSE3-AVX, NEON, 2}
														
 
															+    @icost{ALTIVEC, 2-4}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32<N, expr_iabs<int32<N,E>>> abs(const int32<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+/** Computes absolute value of 64-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = abs(a0)
														
 
															+    ...
														
 
															+    rN = abs(aN)
														
 
															+    @endcode
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX, 5}
														
 
															+    @icost{NEON, 6}
														
 
															+    @novec{ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 10}
														
 
															+    @icost{NEON, 12}
														
 
															+    @icost{AVX2, 4}
														
 
															+    @novec{ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64<N, expr_iabs<int64<N,E>>> abs(const int64<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_add.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_add.h
@@ -0,0 +1,117 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_ADD_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_ADD_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/i_add.h>
														
 
															+#include <simdpp/core/detail/get_expr_uint.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+#include <simdpp/core/detail/get_expr_uint.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Adds 8-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 + b0
														
 
															+    ...
														
 
															+    rN = aN + bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_iadd, V1, V2>::type
														
 
															+        add(const any_int8<N,V1>& a,
														
 
															+            const any_int8<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(add, expr_iadd, any_int8, int8)
														
 
															+
														
 
															+/** Adds 16-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 + b0
														
 
															+    ...
														
 
															+    rN = aN + bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_iadd, V1, V2>::type
														
 
															+        add(const any_int16<N,V1>& a,
														
 
															+            const any_int16<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(add, expr_iadd, any_int16, int16)
														
 
															+
														
 
															+/** Adds 32-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 + b0
														
 
															+    ...
														
 
															+    rN = aN + bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_iadd, V1, V2>::type
														
 
															+        add(const any_int32<N,V1>& a,
														
 
															+            const any_int32<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(add, expr_iadd, any_int32, int32)
														
 
															+
														
 
															+/** Adds 64-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 + b0
														
 
															+    ...
														
 
															+    rN = aN + bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{ALTIVEC, 5-6}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, 2}
														
 
															+    @icost{ALTIVEC, 10-11}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_iadd, V1, V2>::type
														
 
															+        add(const any_int64<N,V1>& a,
														
 
															+            const any_int64<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(add, expr_iadd, any_int64, int64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_add_sat.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_add_sat.h
@@ -0,0 +1,111 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_ADD_SAT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_ADD_SAT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/i_add_sat.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Adds and saturates signed 8-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = signed_saturate(a0 + b0)
														
 
															+    ...
														
 
															+    rN = signed_saturate(aN + bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int8<N, expr_iadd_sat<int8<N,E1>,
														
 
															+                      int8<N,E2>>> add_sat(const int8<N,E1>& a,
														
 
															+                                           const int8<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(add_sat, expr_iadd_sat, int8, int8)
														
 
															+
														
 
															+/** Adds and saturates signed 16-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = signed_saturate(a0 + b0)
														
 
															+    ...
														
 
															+    rN = signed_saturate(aN + bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int16<N, expr_iadd_sat<int16<N,E1>,
														
 
															+                       int16<N,E2>>> add_sat(const int16<N,E1>& a,
														
 
															+                                             const int16<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(add_sat, expr_iadd_sat, int16, int16)
														
 
															+
														
 
															+/** Adds and saturates unsigned 8-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = unsigned_saturate(a0 + b0)
														
 
															+    ...
														
 
															+    rN = unsigned_saturate(aN + bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint8<N, expr_iadd_sat<uint8<N,E1>,
														
 
															+                       uint8<N,E2>>> add_sat(const uint8<N,E1>& a,
														
 
															+                                             const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(add_sat, expr_iadd_sat, uint8, uint8)
														
 
															+
														
 
															+/** Adds and saturates unsigned 16-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = unsigned_saturate(a0 + b0)
														
 
															+    ...
														
 
															+    rN = unsigned_saturate(aN + bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint16<N, expr_iadd_sat<uint16<N,E1>,
														
 
															+                        uint16<N,E2>>> add_sat(const uint16<N,E1>& a,
														
 
															+                                               const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(add_sat, expr_iadd_sat, uint16, uint16)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_avg.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_avg.h
@@ -0,0 +1,162 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_AVG_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_AVG_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_avg.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes rounded average of the unsigned 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0 + 1) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN + 1) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint8<N,expr_empty> avg(const uint8<N,E1>& a, const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint8, uint8)
														
 
															+
														
 
															+/** Computes rounded average of signed 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0 + 1) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN + 1) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 4-5}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 8-9}
														
 
															+    @icost{AVX2, 4-5}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int8<N,expr_empty> avg(const int8<N,E1>& a, const int8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int8, int8)
														
 
															+
														
 
															+/** Computes rounded average of unsigned 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0 + 1) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN + 1) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint16<N,expr_empty> avg(const uint16<N,E1>& a, const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint16, uint16)
														
 
															+
														
 
															+/** Computes rounded average of signed 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0 + 1) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN + 1) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 4-5}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 8-9}
														
 
															+    @icost{AVX2, 4-5}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int16<N,expr_empty> avg(const int16<N,E1>& a, const int16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int16, int16)
														
 
															+
														
 
															+/** Computes rounded average of unsigned 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0 + 1) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN + 1) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 6-7}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 12-13}
														
 
															+    @icost{AVX2, 6-7}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint32<N,expr_empty> avg(const uint32<N,E1>& a, const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint32, uint32)
														
 
															+
														
 
															+/** Computes rounded average of signed 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0 + 1) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN + 1) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 9-10}
														
 
															+    @icost{NEON, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 18-19}
														
 
															+    @icost{AVX2, 9-10}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int32<N,expr_empty> avg(const int32<N,E1>& a, const int32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int32, int32)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_avg_trunc.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_avg_trunc.h
@@ -0,0 +1,177 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_AVG_TRUNC_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_AVG_TRUNC_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_avg_trunc.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes truncated average of the unsigned 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 4}
														
 
															+    @icost{NEON, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 8}
														
 
															+    @icost{AVX2, 4}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint8<N,expr_empty> avg_trunc(const uint8<N,E1>& a, const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg_trunc(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg_trunc, uint8, uint8)
														
 
															+
														
 
															+/** Computes truncated average of signed 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 7-8}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 14-15}
														
 
															+    @icost{AVX2, 7-8}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int8<N,expr_empty> avg_trunc(const int8<N,E1>& a, const int8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg_trunc(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg_trunc, int8, int8)
														
 
															+
														
 
															+/** Computes truncated average of unsigned 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 4}
														
 
															+    @icost{NEON, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 8}
														
 
															+    @icost{AVX2, 4}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint16<N,expr_empty> avg_trunc(const uint16<N,E1>& a, const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg_trunc(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg_trunc, uint16, uint16)
														
 
															+
														
 
															+/** Computes truncated average of signed 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 7-8}
														
 
															+    @icost{NEON, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 14-15}
														
 
															+    @icost{AVX2, 7-8}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int16<N,expr_empty> avg_trunc(const int16<N,E1>& a, const int16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg_trunc(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg_trunc, int16, int16)
														
 
															+
														
 
															+/** Computes truncated average of unsigned 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 4}
														
 
															+    @icost{NEON, 1}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 8}
														
 
															+    @icost{AVX2, 4}
														
 
															+    @icost{NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint32<N,expr_empty> avg_trunc(const uint32<N,E1>& a, const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg_trunc(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg_trunc, uint32, uint32)
														
 
															+
														
 
															+/** Computes truncated average of signed 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (a0 + b0) / 2
														
 
															+    ...
														
 
															+    rN = (aN + bN) / 2
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 7-8}
														
 
															+    @icost{ALTIVEC, 4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 14-15}
														
 
															+    @icost{AVX2, 7-8}
														
 
															+    @icost{ALTIVEC, 8}
														
 
															+    @icost{NEON, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int32<N,expr_empty> avg_trunc(const int32<N,E1>& a, const int32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_avg_trunc(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(avg_trunc, int32, int32)
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_div_p.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_div_p.h
@@ -0,0 +1,131 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_DIV_P_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_DIV_P_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/core/bit_and.h>
														
 
															+#include <simdpp/core/bit_andnot.h>
														
 
															+#include <simdpp/core/bit_or.h>
														
 
															+#include <simdpp/core/cmp_lt.h>
														
 
															+#include <simdpp/core/i_sub.h>
														
 
															+#include <simdpp/detail/null/math.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+// FIXME: move to adv
														
 
															+/** Divides one 8-bit unsigned number by another. The precision of the operation
														
 
															+    is configurable: only P least significant bits of both numerator and
														
 
															+    denumerator are considered.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = num0 / den0
														
 
															+    ...
														
 
															+    rN = numN / denN
														
 
															+    @endcode
														
 
															+    @par 128-bit version:
														
 
															+    The operations costs at least 9 instructions per bit of precision.
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, 10}
														
 
															+    @icost{AVX2, 4}
														
 
															+*/
														
 
															+template<unsigned P> SIMDPP_INL
														
 
															+uint8x16 div_p(const uint8x16& num, const uint8x16& den)
														
 
															+{
														
 
															+#if SIMDPP_USE_NULL
														
 
															+    return detail::null::div_p<P>(num, den);
														
 
															+#else
														
 
															+    static_assert(P <= 8, "Precision too large");
														
 
															+    uint8x16 r, q, bit_mask;
														
 
															+    r = q = make_zero();
														
 
															+    bit_mask = make_uint(1 << (P-1));
														
 
															+
														
 
															+    for (unsigned i = P; i > 0; i--) {
														
 
															+        unsigned bit = i-1;
														
 
															+        uint8x16 n_bit;
														
 
															+        // we'll never shift out any bits, so larger shift doesn't matter
														
 
															+        r = shift_l<1>((uint16x8)r);
														
 
															+
														
 
															+        n_bit = bit_and(num, bit_mask);
														
 
															+        n_bit = shift_r((uint16x8)n_bit, bit);
														
 
															+        r = bit_or(r, n_bit);
														
 
															+
														
 
															+        uint8x16 cmp, csub, cbit;
														
 
															+        cmp = cmp_lt(r, den);
														
 
															+
														
 
															+        csub = bit_andnot(den, cmp);
														
 
															+        cbit = bit_andnot(bit_mask, cmp);
														
 
															+        r = sub(r, csub);
														
 
															+        q = bit_or(q, cbit);
														
 
															+
														
 
															+        bit_mask = shift_r<1>((uint16x8)bit_mask);
														
 
															+    }
														
 
															+    return q;
														
 
															+
														
 
															+    /*
														
 
															+    The actual algorithm is as follows:
														
 
															+    N - numerator, D - denominator, R - remainder, Q - quetient
														
 
															+    R = 0; Q = 0;
														
 
															+    for (unsigned i = P; i > 0; i--) {
														
 
															+        unsigned bit = i-1;
														
 
															+        R <<= 1;
														
 
															+        R |= (N >> bit) & 1;
														
 
															+        if (R >= D) {
														
 
															+            R = R - D;
														
 
															+            Q |= 1 << bit;
														
 
															+        }
														
 
															+    }*/
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+template<unsigned P> SIMDPP_INL
														
 
															+uint16x8 div_p(const uint16x8& num, const uint16x8& den)
														
 
															+{
														
 
															+#if SIMDPP_USE_NULL
														
 
															+    return detail::null::div_p<P>(num, den);
														
 
															+#else
														
 
															+    static_assert(P <= 16, "Precision too large");
														
 
															+    uint16x8 r, q, bit_mask;
														
 
															+
														
 
															+    r = q = make_zero();
														
 
															+    bit_mask = make_uint(1 << (P-1));
														
 
															+
														
 
															+    for (unsigned i = P; i > 0; i--) {
														
 
															+        unsigned bit = i-1; // TODO precision
														
 
															+        uint16x8 n_bit;
														
 
															+        r = shift_l<1>(r);
														
 
															+
														
 
															+        n_bit = bit_and(num, bit_mask);
														
 
															+        n_bit = shift_r(n_bit, bit);
														
 
															+        r = bit_or(r, n_bit);
														
 
															+
														
 
															+        uint16x8 cmp, csub, cbit;
														
 
															+        cmp = cmp_lt(r, den);
														
 
															+
														
 
															+        csub = bit_andnot(den, cmp);
														
 
															+        cbit = bit_andnot(bit_mask, cmp);
														
 
															+        r = sub(r, csub);
														
 
															+        q = bit_or(q, cbit);
														
 
															+
														
 
															+        bit_mask = shift_r<1>(bit_mask);
														
 
															+    }
														
 
															+    return q;
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_max.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_max.h
@@ -0,0 +1,193 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_MAX_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_MAX_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_max.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes maximum of the signed 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 8}
														
 
															+    @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int8<N,expr_empty> max(const int8<N,E1>& a, const int8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, int8, int8)
														
 
															+
														
 
															+/** Computes maximum of the unsigned 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint8<N,expr_empty> max(const uint8<N,E1>& a, const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint8, uint8)
														
 
															+
														
 
															+/** Computes maximum of the signed 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int16<N,expr_empty> max(const int16<N,E1>& a, const int16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, int16, int16)
														
 
															+
														
 
															+/** Computes maximum of the unsigned 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 6-7}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 12-13}
														
 
															+    @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint16<N,expr_empty> max(const uint16<N,E1>& a, const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint16, uint16)
														
 
															+
														
 
															+/** Computes maximum of the signed 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 8}
														
 
															+    @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int32<N,expr_empty> max(const int32<N,E1>& a, const int32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, int32, int32)
														
 
															+
														
 
															+/** Computes maximum of the unsigned 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 6-7}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 12-13}
														
 
															+    @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint32<N,expr_empty> max(const uint32<N,E1>& a, const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint32, uint32)
														
 
															+
														
 
															+/** Computes maximum of the signed 64-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    Supported since AVX2, NEON64. Not supported on ALTIVEC.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int64<N,expr_empty> max(const int64<N,E1>& a, const int64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, int64, int64)
														
 
															+
														
 
															+/** Computes maximum of the unsigned 64-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, b0)
														
 
															+    ...
														
 
															+    rN = max(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    Supported since AVX2, NEON64. Not supported on ALTIVEC.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint64<N,expr_empty> max(const uint64<N,E1>& a, const uint64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_max(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint64, uint64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_min.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_min.h
@@ -0,0 +1,194 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_MIN_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_MIN_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_min.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes minimum of signed 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 8}
														
 
															+    @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int8<N,expr_empty> min(const int8<N,E1>& a, const int8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, int8, int8)
														
 
															+
														
 
															+/** Computes minimum of the unsigned 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint8<N,expr_empty> min(const uint8<N,E1>& a, const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, uint8, uint8)
														
 
															+
														
 
															+/** Computes minimum of the signed 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int16<N,expr_empty> min(const int16<N,E1>& a, const int16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, int16, int16)
														
 
															+
														
 
															+/** Computes minimum of the unsigned 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 6-7}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 12-13}
														
 
															+    @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint16<N,expr_empty> min(const uint16<N,E1>& a, const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, uint16, uint16)
														
 
															+
														
 
															+/** Computes minimum of the signed 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 8}
														
 
															+    @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int32<N,expr_empty> min(const int32<N,E1>& a, const int32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, int32, int32)
														
 
															+
														
 
															+
														
 
															+/** Computes minimum of the unsigned 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 6-7}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 12-13}
														
 
															+    @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint32<N,expr_empty> min(const uint32<N,E1>& a, const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, uint32, uint32)
														
 
															+
														
 
															+/** Computes minimum of the signed 64-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    Supported since AVX2, NEON64. Not supported on ALTIVEC.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int64<N,expr_empty> min(const int64<N,E1>& a, const int64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, int64, int64)
														
 
															+
														
 
															+/** Computes minimum of the unsigned 64-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, b0)
														
 
															+    ...
														
 
															+    rN = min(aN, bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    Supported since AVX2, NEON64. Not supported on ALTIVEC.
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint64<N,expr_empty> min(const uint64<N,E1>& a, const uint64<N,E2>& b)
														
 
															+{
														
 
															+    return detail::insn::i_min(a.eval(), b.eval());
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_VEC(min, uint64, uint64)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_mul.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_mul.h
@@ -0,0 +1,129 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_MUL_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_MUL_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/i_mul.h>
														
 
															+#include <simdpp/core/detail/get_expr_uint.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+#include <simdpp/core/detail/get_expr_uint.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+// no 8 bit multiplications in SSE
														
 
															+/** Multiplies 16-bit values and returns the lower part of the multiplication
														
 
															+
														
 
															+    @code
														
 
															+    r0 = low(a0 * b0)
														
 
															+    ...
														
 
															+    rN = low(aN * bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_mul_lo, V1, V2>::type
														
 
															+        mul_lo(const any_int16<N,V1>& a,
														
 
															+               const any_int16<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo, expr_mul_lo, any_int16, int16)
														
 
															+
														
 
															+/** Multiplies signed 16-bit values and returns the higher half of the result.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = high(a0 * b0)
														
 
															+    ...
														
 
															+    rN = high(aN * bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, ALTIVEC, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, ALTIVEC, 6}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int16<N, expr_mul_hi<int16<N,E1>,
														
 
															+                     int16<N,E2>>> mul_hi(const int16<N,E1>& a,
														
 
															+                                          const int16<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi, expr_mul_hi, int16, int16)
														
 
															+
														
 
															+/** Multiplies unsigned 16-bit values and returns the higher half of the result.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = high(a0 * b0)
														
 
															+    ...
														
 
															+    rN = high(aN * bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, ALTIVEC, 3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, ALTIVEC, 6}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint16<N, expr_mul_hi<uint16<N,E1>,
														
 
															+                      uint16<N,E2>>> mul_hi(const uint16<N,E1>& a,
														
 
															+                                            const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi, expr_mul_hi, uint16, uint16)
														
 
															+
														
 
															+
														
 
															+/** Multiplies 32-bit values and returns the lower half of the result.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = low(a0 * b0)
														
 
															+    ...
														
 
															+    rN = low(aN * bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE2-SSSE3, 6}
														
 
															+    @icost{ALTIVEC, 8}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSSE3, 12}
														
 
															+    @icost{SSE4.1, AVX, NEON, 2}
														
 
															+    @icost{ALTIVEC, 16}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_mul_lo, V1, V2>::type
														
 
															+        mul_lo(const any_int32<N,V1>& a,
														
 
															+               const any_int32<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo, expr_mul_lo, any_int32, int32)
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_mull.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_mull.h
@@ -0,0 +1,156 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_MULL_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_MULL_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/i_mull.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/*  Note: widening integer multiplication instructions are very different among
														
 
															+    instruction sets. The main difference is in which half of the elements are
														
 
															+    selected for multiplication. Trying to abstract this incurs definite
														
 
															+    overhead.
														
 
															+
														
 
															+     - SSE2-SSE4.1 and AVX2 provide only instructions with interfaces similar
														
 
															+        to mul_lo and mul_hi. The result vectors must be interleaved to obtain
														
 
															+        contiguous result values. Multiplying 2 vectors always incurs
														
 
															+        overhead of at least two interleaving instructions.
														
 
															+
														
 
															+     - AVX512 only provides 32-bit integer support. Widening multiplication
														
 
															+        can be done only by using PMULDQ, which takes odd elements and produces
														
 
															+        widened multiplication results. Multiplication of two whole vectors
														
 
															+        always incurs overhead of at least two shifts or interleaving
														
 
															+        instructions.
														
 
															+
														
 
															+     - NEON, NEONv2 provide instructions that take elements of either the lower
														
 
															+        or higher halves of two 128-bit vectors and multiply them. No
														
 
															+        additional overhead is incurred to obtain contiguous result values.
														
 
															+
														
 
															+     - ALTIVEC hav multiply odd and multiply even instructions. No additional
														
 
															+        overhead is incurred to obtain contiguous result values.
														
 
															+
														
 
															+    The abstraction below uses the NEON model. No additional overhead is
														
 
															+    incurred on SSE/AVX and NEON. On ALTIVEC, a single additional permute
														
 
															+    instruction is needed for each vector multiplication on average.
														
 
															+*/
														
 
															+
														
 
															+/** Multiplies signed 16-bit values and expands the results to 32 bits.
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @code
														
 
															+    r0 = a0 * b0
														
 
															+    ...
														
 
															+    rN = aN * bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @icost{SSE2-AVX, ALTIVEC, 2-3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    @icost{SSE2-AVX, ALTIVEC, 4-6}
														
 
															+    @icost{AVX2, NEON, 2-3}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int32<N, expr_mull<int16<N,E1>,
														
 
															+                   int16<N,E2>>> mull(const int16<N,E1>& a, const int16<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(mull, expr_mull, int32, int16)
														
 
															+
														
 
															+/** Multiplies unsigned 16-bit values and expands the results to 32 bits.
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @code
														
 
															+    r0 = a0 * b0
														
 
															+    ...
														
 
															+    rN = aN * bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @icost{SSE2-AVX2, ALTIVEC, 2-3}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, ALTIVEC, 4-6}
														
 
															+    @icost{AVX2, 2-3}
														
 
															+    @icost{NEON, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint32<N, expr_mull<uint16<N,E1>,
														
 
															+                    uint16<N,E2>>> mull(const uint16<N,E1>& a, const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(mull, expr_mull, uint32, uint16)
														
 
															+
														
 
															+/** Multiplies signed 32-bit values in and expands the results to 64 bits.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 * b0
														
 
															+    ...
														
 
															+    rN = aN * bN
														
 
															+    @endcode
														
 
															+    @par 128-bit version:
														
 
															+    @icost{SSE4.1-AVX, 3}
														
 
															+    @unimp{SSE2-SSSE3, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE4.1-AVX, 6}
														
 
															+    @icost{AVX2, 3}
														
 
															+    @icost{NEON, 2}
														
 
															+    @unimp{SSE2-SSSE3, ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int64<N, expr_mull<int32<N,E1>,
														
 
															+                   int32<N,E2>>> mull(const int32<N,E1>& a, const int32<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(mull, expr_mull, int64, int32)
														
 
															+
														
 
															+/** Multiplies unsigned 32-bit values in the lower halves of the vectors and
														
 
															+    expands the results to 64 bits.
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @code
														
 
															+    r0 = a0 * b0
														
 
															+    r1 = a1 * b1
														
 
															+    @endcode
														
 
															+    @icost{SSE2-AVX, 3}
														
 
															+    @unimp{ALTIVEC}
														
 
															+
														
 
															+    @icost{SSE2-AVX, 6}
														
 
															+    @icost{AVX2, 3}
														
 
															+    @icost{NEON, 2}
														
 
															+    @unimp{ALTIVEC}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint64<N, expr_mull<uint32<N,E1>,
														
 
															+                    uint32<N,E2>>> mull(const uint32<N,E1>& a, const uint32<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(mull, expr_mull, uint64, uint32)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_neg.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_neg.h
@@ -0,0 +1,97 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_NEG_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_NEG_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/i_neg.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Negates signed 8-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = -a0
														
 
															+    ...
														
 
															+    rN = -aN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8<N, expr_ineg<int8<N,E>>> neg(const int8<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+/** Negates signed 16-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = -a0
														
 
															+    ...
														
 
															+    rN = -aN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16<N, expr_ineg<int16<N,E>>> neg(const int16<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+/** Negates signed 32-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = -a0
														
 
															+    ...
														
 
															+    rN = -aN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32<N, expr_ineg<int32<N,E>>> neg(const int32<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+/** Negates signed 64-bit values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = -a0
														
 
															+    ...
														
 
															+    rN = -aN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{ALTIVEC, 4-5}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, 2}
														
 
															+    @icost{ALTIVEC, 8-9}
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64<N, expr_ineg<int64<N,E>>> neg(const int64<N,E>& a)
														
 
															+{
														
 
															+    return { { a } };
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_popcnt.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_popcnt.h
@@ -0,0 +1,82 @@
 
															+/*  Copyright (C) 2017  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_POPCNT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_POPCNT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_popcnt.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Computes the population count of elements in the vector.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = popcnt(a0)
														
 
															+    r1 = popcnt(a1)
														
 
															+    ...
														
 
															+    rN = popcnt(aN)
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8<N,expr_empty> popcnt(const int8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_popcnt(uint8<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8<N,expr_empty> popcnt(const uint8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_popcnt(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16<N,expr_empty> popcnt(const int16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_popcnt(uint16<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16<N,expr_empty> popcnt(const uint16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_popcnt(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32<N,expr_empty> popcnt(const int32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_popcnt(uint32<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32<N,expr_empty> popcnt(const uint32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_popcnt(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64<N,expr_empty> popcnt(const int64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_popcnt(uint64<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64<N,expr_empty> popcnt(const uint64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_popcnt(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_add.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_add.h
@@ -0,0 +1,82 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_REDUCE_ADD_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_REDUCE_ADD_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_reduce_add.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the sum of the elements in the vector. Intermediate and the
														
 
															+    final result has twice as many bits as the input element size in 8 and 16
														
 
															+    bit cases.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 + a1 + a2 + ...
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16_t reduce_add(const int8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16_t reduce_add(const uint8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32_t reduce_add(const int16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_add(const uint16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32_t reduce_add(const int32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(uint32<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_add(const uint32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64_t reduce_add(const int64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(uint64<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64_t reduce_add(const uint64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_add(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_and.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_and.h
@@ -0,0 +1,80 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_REDUCE_AND_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_REDUCE_AND_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_reduce_and.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the bitwise AND of the elements in the vector
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 & a1 & a2 & ...
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8_t reduce_and(const int8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_and(uint8<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8_t reduce_and(const uint8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_and(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16_t reduce_and(const int16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_and(uint16<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16_t reduce_and(const uint16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_and(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32_t reduce_and(const int32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_and(uint32<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_and(const uint32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_and(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64_t reduce_and(const int64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_and(uint64<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64_t reduce_and(const uint64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_and(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_max.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_max.h
@@ -0,0 +1,80 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_REDUCE_MAX_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_REDUCE_MAX_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_reduce_max.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the maximum of the elements in the vector
														
 
															+
														
 
															+    @code
														
 
															+    r0 = max(a0, a1, a2, ...)
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8_t reduce_max(const int8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8_t reduce_max(const uint8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16_t reduce_max(const int16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16_t reduce_max(const uint16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32_t reduce_max(const int32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_max(const uint32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64_t reduce_max(const int64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64_t reduce_max(const uint64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_max(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_min.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_min.h
@@ -0,0 +1,80 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_REDUCE_MIN_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_REDUCE_MIN_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_reduce_min.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the minimum of the elements in the vector
														
 
															+
														
 
															+    @code
														
 
															+    r0 = min(a0, a1, a2, ...)
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8_t reduce_min(const int8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8_t reduce_min(const uint8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16_t reduce_min(const int16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16_t reduce_min(const uint16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32_t reduce_min(const int32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_min(const uint32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64_t reduce_min(const int64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64_t reduce_min(const uint64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_min(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_mul.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_mul.h
@@ -0,0 +1,58 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_REDUCE_MUL_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_REDUCE_MUL_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_reduce_mul.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the product of the elements in the vector. Intermediate and the
														
 
															+    final result is computed in 32-bit precision in 16 bit case. The
														
 
															+    behavior is undefined in the case of an overflow.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 * a1 * a2 * ...
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32_t reduce_mul(const int16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_mul(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_mul(const uint16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_mul(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32_t reduce_mul(const int32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_mul(uint32<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_mul(const uint32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_mul(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_or.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_or.h
@@ -0,0 +1,80 @@
 
															+/*  Copyright (C) 2016  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_REDUCE_OR_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_REDUCE_OR_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_reduce_or.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the bitwise OR of the elements in the vector
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 & a1 & a2 & ...
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8_t reduce_or(const int8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_or(uint8<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8_t reduce_or(const uint8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_or(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16_t reduce_or(const int16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_or(uint16<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16_t reduce_or(const uint16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_or(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32_t reduce_or(const int32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_or(uint32<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_or(const uint32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_or(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64_t reduce_or(const int64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_or(uint64<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64_t reduce_or(const uint64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_or(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_popcnt.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_reduce_popcnt.h
@@ -0,0 +1,80 @@
 
															+/*  Copyright (C) 2017  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_REDUCE_POPCNT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_REDUCE_POPCNT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/i_reduce_popcnt.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Computes the population count of all values in the vector.
														
 
															+
														
 
															+    @code
														
 
															+    result = popcnt(a0) + popcnt(a1) + ...
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_popcnt(const int8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_popcnt(uint32<N/4>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_popcnt(const uint8<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_popcnt(uint32<N/4>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_popcnt(const int16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_popcnt(uint32<N/2>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_popcnt(const uint16<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_popcnt(uint32<N/2>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_popcnt(const int32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_popcnt(uint32<N>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_popcnt(const uint32<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_popcnt(a.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_popcnt(const int64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_popcnt(uint32<N*2>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32_t reduce_popcnt(const uint64<N,E>& a)
														
 
															+{
														
 
															+    return detail::insn::i_reduce_popcnt(uint32<N*2>(a.eval()));
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_shift_l.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_shift_l.h
@@ -0,0 +1,303 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_SHIFT_L_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_SHIFT_L_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/capabilities.h>
														
 
															+#include <simdpp/detail/insn/i_shift_l.h>
														
 
															+#include <simdpp/detail/insn/i_shift_l_v.h>
														
 
															+#include <simdpp/detail/not_implemented.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+// -----------------------------------------------------------------------------
														
 
															+// shift by scalar
														
 
															+
														
 
															+/** Shifts 8-bit values left by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count
														
 
															+    ...
														
 
															+    rN = aN << count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8<N,expr_empty> shift_l(const int8<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    uint8<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l(qa, count);
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8<N,expr_empty> shift_l(const uint8<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_l(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts 16-bit values left by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count
														
 
															+    ...
														
 
															+    rN = aN << count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16<N,expr_empty> shift_l(const int16<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    uint16<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l(qa, count);
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16<N,expr_empty> shift_l(const uint16<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_l(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts 32-bit values left by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count
														
 
															+    ...
														
 
															+    rN = aN << count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32<N,expr_empty> shift_l(const int32<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    uint32<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l(qa, count);
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32<N,expr_empty> shift_l(const uint32<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_l(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts 64-bit values left by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count
														
 
															+    ...
														
 
															+    rN = aN << count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64<N,expr_empty> shift_l(const int64<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    uint64<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l(qa, count);
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64<N,expr_empty> shift_l(const uint64<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_l(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+// -----------------------------------------------------------------------------
														
 
															+// shift by vector
														
 
															+
														
 
															+/** Shifts 8-bit values left by the number of bits in corresponding element
														
 
															+    in the given count vector. Zero bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count0
														
 
															+    ...
														
 
															+    rN = aN << countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8<N,expr_empty> shift_l(const int8<N,E>& a, const uint8<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_INT8_SHIFT_R_BY_VECTOR
														
 
															+    uint8<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l_v(qa, count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8<N,expr_empty> shift_l(const uint8<N,E>& a, const uint8<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_UINT8_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_l_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Shifts 16-bit values left by the number of bits in corresponding element
														
 
															+    in the given count vector. Zero bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count0
														
 
															+    ...
														
 
															+    rN = aN << countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16<N,expr_empty> shift_l(const int16<N,E>& a, const uint16<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_INT16_SHIFT_R_BY_VECTOR
														
 
															+    uint16<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l_v(qa, count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16<N,expr_empty> shift_l(const uint16<N,E>& a, const uint16<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_UINT16_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_l_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Shifts 32-bit values left by the number of bits in corresponding element
														
 
															+    in the given count vector. Zero bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count0
														
 
															+    ...
														
 
															+    rN = aN << countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32<N,expr_empty> shift_l(const int32<N,E>& a, const uint32<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_INT32_SHIFT_R_BY_VECTOR
														
 
															+    uint32<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l_v(qa, count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32<N,expr_empty> shift_l(const uint32<N,E>& a, const uint32<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_UINT32_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_l_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+// -----------------------------------------------------------------------------
														
 
															+// shift by compile-time constant
														
 
															+
														
 
															+/** Shifts 8-bit values left by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count
														
 
															+    ...
														
 
															+    rN = aN << count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+int8<N,expr_empty> shift_l(const int8<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 8, "Shift out of bounds");
														
 
															+    uint8<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l_wrapper<count == 0>::template run<count>(qa);
														
 
															+}
														
 
															+
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+uint8<N,expr_empty> shift_l(const uint8<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 8, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_l_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts 16-bit values left by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count
														
 
															+    ...
														
 
															+    rN = aN << count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+int16<N,expr_empty> shift_l(const int16<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 16, "Shift out of bounds");
														
 
															+    uint16<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l_wrapper<count == 0>::template run<count>(qa);
														
 
															+}
														
 
															+
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+uint16<N,expr_empty> shift_l(const uint16<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 16, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_l_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts 32-bit values left by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count
														
 
															+    ...
														
 
															+    rN = aN << count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+int32<N,expr_empty> shift_l(const int32<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 32, "Shift out of bounds");
														
 
															+    uint32<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l_wrapper<count == 0>::template run<count>(qa);
														
 
															+}
														
 
															+
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+uint32<N,expr_empty> shift_l(const uint32<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 32, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_l_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts 64-bit values left by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 << count
														
 
															+    ...
														
 
															+    rN = aN << count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+int64<N,expr_empty> shift_l(const int64<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 64, "Shift out of bounds");
														
 
															+    uint64<N> qa = a.eval();
														
 
															+    return detail::insn::i_shift_l_wrapper<count == 0>::template run<count>(qa);
														
 
															+}
														
 
															+
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+uint64<N,expr_empty> shift_l(const uint64<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 64, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_l_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_shift_r.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_shift_r.h
@@ -0,0 +1,398 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_SHIFT_R_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_SHIFT_R_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/capabilities.h>
														
 
															+#include <simdpp/detail/insn/i_shift_r.h>
														
 
															+#include <simdpp/detail/insn/i_shift_r_v.h>
														
 
															+#include <simdpp/detail/not_implemented.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+// -----------------------------------------------------------------------------
														
 
															+// shift by scalar
														
 
															+
														
 
															+/** Shifts signed 8-bit values right by @a count bits while shifting in the
														
 
															+    sign bit.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8<N,expr_empty> shift_r(const int8<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_r(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 8-bit values right by @a count bits while shifting in zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8<N,expr_empty> shift_r(const uint8<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_r(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts signed 16-bit values right by @a count bits while shifting in the
														
 
															+    sign bit.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16<N,expr_empty> shift_r(const int16<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_r(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 16-bit values right by @a count bits while shifting in
														
 
															+    zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16<N,expr_empty> shift_r(const uint16<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_r(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts signed 32-bit values right by @a count bits while shifting in the
														
 
															+    sign bit.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32<N,expr_empty> shift_r(const int32<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_r(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 32-bit values right by @a count bits while shifting in
														
 
															+    zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32<N,expr_empty> shift_r(const uint32<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_r(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts signed 64-bit values right by @a count bits while shifting in the
														
 
															+    sign bit.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int64<N,expr_empty> shift_r(const int64<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_r(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 64-bit values right by @a count bits while shifting in
														
 
															+    zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint64<N,expr_empty> shift_r(const uint64<N,E>& a, unsigned count)
														
 
															+{
														
 
															+    return detail::insn::i_shift_r(a.eval(), count);
														
 
															+}
														
 
															+
														
 
															+// -----------------------------------------------------------------------------
														
 
															+// shift by vector
														
 
															+
														
 
															+/** Shifts signed 8-bit values right by the number of bits in corresponding
														
 
															+    element in the given count vector. Sign bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count0
														
 
															+    ...
														
 
															+    rN = aN >> countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int8<N,expr_empty> shift_r(const int8<N,E>& a, const uint8<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_INT8_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_r_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 8-bit values right by the number of bits in corresponding
														
 
															+    element in the given count vector. Zero bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count0
														
 
															+    ...
														
 
															+    rN = aN >> countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint8<N,expr_empty> shift_r(const uint8<N,E>& a, const uint8<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_UINT8_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_r_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Shifts signed 16-bit values right by the number of bits in corresponding
														
 
															+    element in the given count vector. Sign bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count0
														
 
															+    ...
														
 
															+    rN = aN >> countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int16<N,expr_empty> shift_r(const int16<N,E>& a, const uint16<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_INT16_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_r_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 16-bit values right by the number of bits in corresponding
														
 
															+    element in the given count vector. Zero bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count0
														
 
															+    ...
														
 
															+    rN = aN >> countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint16<N,expr_empty> shift_r(const uint16<N,E>& a, const uint16<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_UINT16_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_r_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Shifts signed 32-bit values right by the number of bits in corresponding
														
 
															+    element in the given count vector. Sign bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count0
														
 
															+    ...
														
 
															+    rN = aN >> countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+int32<N,expr_empty> shift_r(const int32<N,E>& a, const uint32<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_INT32_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_r_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 32-bit values right by the number of bits in corresponding
														
 
															+    element in the given count vector. Zero bits are shifted in.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count0
														
 
															+    ...
														
 
															+    rN = aN >> countN
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned N, class E> SIMDPP_INL
														
 
															+uint32<N,expr_empty> shift_r(const uint32<N,E>& a, const uint32<N,E>& count)
														
 
															+{
														
 
															+#if SIMDPP_HAS_UINT32_SHIFT_R_BY_VECTOR
														
 
															+    return detail::insn::i_shift_r_v(a.eval(), count.eval());
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(E, a, count);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+// -----------------------------------------------------------------------------
														
 
															+// shift by compile-time constant
														
 
															+
														
 
															+/** Shifts signed 8-bit values right by @a count bits while shifting in the
														
 
															+    sign bit.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+int8<N,expr_empty> shift_r(const int8<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 8, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_r_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 8-bit values right by @a count bits while shifting in
														
 
															+    zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+uint8<N,expr_empty> shift_r(const uint8<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 8, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_r_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts signed 16-bit values right by @a count bits while shifting in the
														
 
															+    sign bit.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+int16<N,expr_empty> shift_r(const int16<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 16, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_r_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 16-bit values right by @a count bits while shifting in
														
 
															+    zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+uint16<N,expr_empty> shift_r(const uint16<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 16, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_r_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts signed 32-bit values right by @a count bits while shifting in the
														
 
															+    sign bit.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+int32<N,expr_empty> shift_r(const int32<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 32, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_r_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 32-bit values right by @a count bits while shifting in
														
 
															+    zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+uint32<N,expr_empty> shift_r(const uint32<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 32, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_r_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts signed 64-bit values right by @a count bits while shifting in the
														
 
															+    sign bit.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+int64<N,expr_empty> shift_r(const int64<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 64, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_r_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+/** Shifts unsigned 64-bit values right by @a count bits while shifting in
														
 
															+    zeros.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 >> count
														
 
															+    ...
														
 
															+    rN = aN >> count
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned count, unsigned N, class E> SIMDPP_INL
														
 
															+uint64<N,expr_empty> shift_r(const uint64<N,E>& a)
														
 
															+{
														
 
															+    static_assert(count < 64, "Shift out of bounds");
														
 
															+    return detail::insn::i_shift_r_wrapper<count == 0>::template run<count>(a.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_sub.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_sub.h
@@ -0,0 +1,117 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_SUB_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_SUB_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/i_sub.h>
														
 
															+#include <simdpp/core/detail/get_expr_uint.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+#include <simdpp/core/detail/get_expr_uint.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Subtracts 8-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 - b0
														
 
															+    ...
														
 
															+    rN = aN - bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_isub, V1, V2>::type
														
 
															+        sub(const any_int8<N,V1>& a,
														
 
															+            const any_int8<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(sub, expr_isub, any_int8, int8)
														
 
															+
														
 
															+/** Subtracts 16-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 - b0
														
 
															+    ...
														
 
															+    rN = aN - bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_isub, V1, V2>::type
														
 
															+        sub(const any_int16<N,V1>& a,
														
 
															+            const any_int16<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(sub, expr_isub, any_int16, int16)
														
 
															+
														
 
															+/** Subtracts 32-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 - b0
														
 
															+    ...
														
 
															+    rN = aN - bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_isub, V1, V2>::type
														
 
															+        sub(const any_int32<N,V1>& a,
														
 
															+            const any_int32<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(sub, expr_isub, any_int32, int32)
														
 
															+
														
 
															+/** Subtracts 64-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a0 - b0
														
 
															+    ...
														
 
															+    rN = aN - bN
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{ALTIVEC, 5-6}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, 2}
														
 
															+    @icost{ALTIVEC, 10-11}
														
 
															+*/
														
 
															+template<unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr_uint<expr_isub, V1, V2>::type
														
 
															+        sub(const any_int64<N,V1>& a,
														
 
															+            const any_int64<N,V2>& b)
														
 
															+{
														
 
															+    return { { a.wrapped(), b.wrapped() } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(sub, expr_isub, any_int64, int64)
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/i_sub_sat.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/i_sub_sat.h
@@ -0,0 +1,110 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_I_SUBS_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_I_SUBS_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/expr/i_sub_sat.h>
														
 
															+#include <simdpp/core/detail/scalar_arg_impl.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Subtracts and saturaters signed 8-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = saturated(a0 - b0)
														
 
															+    ...
														
 
															+    rN = saturated(aN - bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int8<N, expr_isub_sat<int8<N,E1>,
														
 
															+                      int8<N,E2>>> sub_sat(const int8<N,E1>& a,
														
 
															+                                           const int8<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(sub_sat, expr_isub_sat, int8, int8)
														
 
															+
														
 
															+/** Subtracts and saturaters signed 16-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = saturated(a0 - b0)
														
 
															+    ...
														
 
															+    rN = saturated(aN - bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+int16<N, expr_isub_sat<int16<N,E1>,
														
 
															+                       int16<N,E2>>> sub_sat(const int16<N,E1>& a,
														
 
															+                                             const int16<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(sub_sat, expr_isub_sat, int16, int16)
														
 
															+
														
 
															+/** Subtracts and saturaters unsigned 8-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = saturated(a0 - b0)
														
 
															+    ...
														
 
															+    rN = saturated(aN - bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint8<N, expr_isub_sat<uint8<N,E1>,
														
 
															+                       uint8<N,E2>>> sub_sat(const uint8<N,E1>& a,
														
 
															+                                             const uint8<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(sub_sat, expr_isub_sat, uint8, uint8)
														
 
															+
														
 
															+/** Subtracts and saturaters unsigned 16-bit integer values.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = saturated(a0 - b0)
														
 
															+    ...
														
 
															+    rN = saturated(aN - bN)
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned N, class E1, class E2> SIMDPP_INL
														
 
															+uint16<N, expr_isub_sat<uint16<N,E1>,
														
 
															+                        uint16<N,E2>>> sub_sat(const uint16<N,E1>& a,
														
 
															+                                               const uint16<N,E2>& b)
														
 
															+{
														
 
															+    return { { a, b } };
														
 
															+}
														
 
															+
														
 
															+SIMDPP_SCALAR_ARG_IMPL_EXPR(sub_sat, expr_isub_sat, uint16, uint16)
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/insert.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/insert.h
@@ -0,0 +1,107 @@
 
															+/*  Copyright (C) 2011-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_INSERT_H
														
 
															+#define LIBSIMDPP_SIMD_INSERT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/insert.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Inserts an element into a vector at the position identified by @a id.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = (id == 0) ? x : a0
														
 
															+    ...
														
 
															+    rN = (id == N) ? x : aN
														
 
															+    @endcode
														
 
															+
														
 
															+    This function may have very high latency.
														
 
															+    It is expected that the argument comes from a general-purpose register.
														
 
															+*/
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+uint8<N> insert(const uint8<N>& a, uint8_t x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_insert<id>(a, x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+int8<N> insert(const int8<N>& a, int8_t x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return (int8<N>) detail::insn::i_insert<id>((uint8<N>) a, (uint8_t)x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+uint16<N> insert(const uint16<N>& a, uint16_t x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_insert<id>(a, x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+int16<N> insert(const int16<N>& a, int16_t x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return (int16<N>) detail::insn::i_insert<id>((uint16<N>) a, (uint16_t)x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+uint32<N> insert(const uint32<N>& a, uint32_t x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_insert<id>(a, x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+int32<N> insert(const int32<N>& a, int32_t x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return (int32<N>) detail::insn::i_insert<id>((uint32<N>)a, (uint32_t)x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+uint64<N> insert(const uint64<N>& a, uint64_t x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_insert<id>(a, x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+int64<N> insert(const int64<N>& a, int64_t x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return (int64<N>) detail::insn::i_insert<id>((uint64<N>)a, (uint64_t)x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+float32<N> insert(const float32<N>& a, float x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_insert<id>(a, x);
														
 
															+}
														
 
															+
														
 
															+template<unsigned id, unsigned N> SIMDPP_INL
														
 
															+float64<N> insert(const float64<N>& a, double x)
														
 
															+{
														
 
															+    static_assert(id < N, "index out of bounds");
														
 
															+    return detail::insn::i_insert<id>(a, x);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/load.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/load.h
@@ -0,0 +1,62 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_LOAD_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_LOAD_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/load.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector
														
 
															+    from an aligned memory location.
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    @code
														
 
															+    a[0..127] = *(p)
														
 
															+    @endcode
														
 
															+    @a p must be aligned to 16 bytes.
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    @code
														
 
															+    a[0..255] = *(p)
														
 
															+    @endcode
														
 
															+    @a p must be aligned to 32 bytes.
														
 
															+
														
 
															+    @icost{SSE2-SSE4.1, NEON, ALTIVEC, 2}
														
 
															+    @icost{AVX (integer vectors), 2}
														
 
															+*/
														
 
															+// Fixme return empty expression
														
 
															+template<class T>
														
 
															+SIMDPP_INL expr_vec_load load(const T* p)
														
 
															+{
														
 
															+    expr_vec_load r;
														
 
															+    r.a = reinterpret_cast<const char*>(p);
														
 
															+    return r;
														
 
															+}
														
 
															+
														
 
															+template<class V, class T> SIMDPP_INL
														
 
															+V load(const T* p)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    return detail::insn::i_load_any<V>(reinterpret_cast<const char*>(p));
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/load_packed2.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/load_packed2.h
@@ -0,0 +1,49 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_LOAD_PACKED2_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_LOAD_PACKED2_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/load_packed2.h>
														
 
															+#include <simdpp/detail/traits.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Loads values packed in pairs, de-interleaves them and stores the result
														
 
															+    into two vectors.
														
 
															+
														
 
															+    @code
														
 
															+    a = [ *(p),   *(p+2), *(p+4), ... , *(p+M*2-2) ]
														
 
															+    b = [ *(p+1), *(p+3), *(p+5), ... , *(p+M*2-1) ]
														
 
															+    @endcode
														
 
															+
														
 
															+    Here M is the number of elements in the vector
														
 
															+
														
 
															+    @a p must be aligned to the vector size in bytes
														
 
															+*/
														
 
															+template<unsigned N, class V, class T> SIMDPP_INL
														
 
															+void load_packed2(any_vec<N,V>& a, any_vec<N,V>& b, const T* p)
														
 
															+{
														
 
															+    static_assert(!is_mask<V>::value, "Mask types can not be loaded");
														
 
															+    typename detail::get_expr_nosign<V>::type ra, rb;
														
 
															+    detail::insn::i_load_packed2(ra, rb, reinterpret_cast<const char*>(p));
														
 
															+    a.wrapped() = ra;
														
 
															+    b.wrapped() = rb;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/load_packed3.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/load_packed3.h
@@ -0,0 +1,52 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_LOAD_PACKED3_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_LOAD_PACKED3_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/load_packed3.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Loads values packed in triplets, de-interleaves them and stores the result
														
 
															+    into three vectors.
														
 
															+
														
 
															+    @code
														
 
															+    a = [ *(p),   *(p+3), *(p+6), ... , *(p+M*3-3) ]
														
 
															+    b = [ *(p+1), *(p+4), *(p+7), ... , *(p+M*3-2) ]
														
 
															+    c = [ *(p+2), *(p+5), *(p+8), ... , *(p+M*3-1) ]
														
 
															+    @endcode
														
 
															+
														
 
															+    Here M is the number of elements in the vector
														
 
															+
														
 
															+    @a p must be aligned to the vector size in bytes
														
 
															+*/
														
 
															+template<unsigned N, class V, class T> SIMDPP_INL
														
 
															+void load_packed3(any_vec<N,V>& a, any_vec<N,V>& b, any_vec<N,V>& c,
														
 
															+                  const T* p)
														
 
															+{
														
 
															+    static_assert(!is_mask<V>::value, "Mask types can not be loaded");
														
 
															+    typename detail::get_expr_nosign<V>::type ra, rb, rc;
														
 
															+    detail::insn::i_load_packed3(ra, rb, rc, reinterpret_cast<const char*>(p));
														
 
															+    a.wrapped() = ra;
														
 
															+    b.wrapped() = rb;
														
 
															+    c.wrapped() = rc;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/load_packed4.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/load_packed4.h
@@ -0,0 +1,55 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_LOAD_PACKED4_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_LOAD_PACKED4_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/load_packed4.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+
														
 
															+/** Loads values packed in quartets, de-interleaves them and stores the result
														
 
															+    into four vectors.
														
 
															+
														
 
															+    @code
														
 
															+    a = [ *(p),   *(p+4), *(p+8),  ... , *(p+M*4-4) ]
														
 
															+    b = [ *(p+1), *(p+5), *(p+9),  ... , *(p+M*4-3) ]
														
 
															+    c = [ *(p+2), *(p+6), *(p+10), ... , *(p+M*4-2) ]
														
 
															+    d = [ *(p+3), *(p+7), *(p+11), ... , *(p+M*4-1) ]
														
 
															+    @endcode
														
 
															+
														
 
															+    Here M is the number of elements in the vector
														
 
															+
														
 
															+    @a p must be aligned to the vector size in bytes
														
 
															+*/
														
 
															+template<unsigned N, class V, class T> SIMDPP_INL
														
 
															+void load_packed4(any_vec<N,V>& a, any_vec<N,V>& b,
														
 
															+                  any_vec<N,V>& c, any_vec<N,V>& d,
														
 
															+                  const T* p)
														
 
															+{
														
 
															+    static_assert(!is_mask<V>::value, "Mask types can not be loaded");
														
 
															+    typename detail::get_expr_nosign<V>::type ra, rb, rc, rd;
														
 
															+    detail::insn::i_load_packed4(ra, rb, rc, rd, reinterpret_cast<const char*>(p));
														
 
															+    a.wrapped() = ra;
														
 
															+    b.wrapped() = rb;
														
 
															+    c.wrapped() = rc;
														
 
															+    d.wrapped() = rd;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/load_splat.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/load_splat.h
@@ -0,0 +1,51 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_LOAD_SPLAT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_LOAD_SPLAT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/load_splat.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Loads a value from a memory location and broadcasts it to all elements of a
														
 
															+    vector.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = *p
														
 
															+    ...
														
 
															+    rN = *p
														
 
															+    @endcode
														
 
															+
														
 
															+    @a p must have the alignment of the element of the target vector.
														
 
															+*/
														
 
															+// FIXME: return empty expression
														
 
															+template<class T>
														
 
															+SIMDPP_INL expr_vec_load_splat load_splat(const T* p)
														
 
															+{
														
 
															+    return expr_vec_load_splat(reinterpret_cast<const char*>(p));
														
 
															+}
														
 
															+
														
 
															+template<class V, class T> SIMDPP_INL
														
 
															+V load_splat(const T* p)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    return detail::insn::i_load_splat_any<V>(reinterpret_cast<const char*>(p));
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/load_u.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/load_u.h
@@ -0,0 +1,67 @@
 
															+/*  Copyright (C) 2013  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_LOAD_U_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_LOAD_U_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/load_u.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an
														
 
															+    unaligned memory location.
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    @code
														
 
															+    a[0..127] = *(p)
														
 
															+    @endcode
														
 
															+
														
 
															+    @a p must be aligned to the element size. If @a p is aligned to 16 bytes
														
 
															+    only the referenced 16 byte block is accessed. Otherwise, memory within the
														
 
															+    smallest 16-byte aligned 32-byte block may be accessed.
														
 
															+
														
 
															+    @icost{ALTIVEC, 4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    @code
														
 
															+    a[0..255] = *(p)
														
 
															+    @endcode
														
 
															+    @a p must be aligned to 32 bytes.
														
 
															+    @icost{SSE2-SSE4.1, NEON, 2}
														
 
															+    @icost{ALTIVEC, 6}
														
 
															+
														
 
															+    @a p must be aligned to the element size. If @a p is aligned to 32 bytes
														
 
															+    only the referenced 16 byte block is accessed. Otherwise, memory within the
														
 
															+    smallest 32-byte aligned 64-byte block may be accessed.
														
 
															+*/
														
 
															+// Fixme return empty expression
														
 
															+template<class T>
														
 
															+SIMDPP_INL expr_vec_load_u load_u(const T* p)
														
 
															+{
														
 
															+    return { reinterpret_cast<const char*>(p) };
														
 
															+}
														
 
															+
														
 
															+template<class V, class T> SIMDPP_INL
														
 
															+V load_u(const T* p)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    return detail::insn::i_load_u_any<V>(reinterpret_cast<const char*>(p));
														
 
															+}
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/make_float.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/make_float.h
@@ -0,0 +1,166 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_MAKE_FLOAT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_MAKE_FLOAT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/make_const.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Creates a vector from floating-point values known at compile-time.
														
 
															+    The result of this function may be assigned or converted to a vector of any
														
 
															+    type: standard conversions are used to convert the arguments. All
														
 
															+    conversions and other overhead is performed at compile-time thus even if the
														
 
															+    minimal optimization level is selected, the function results in a simple
														
 
															+    load from memory.
														
 
															+
														
 
															+    The function is not guaranteed to have adequate performance if the
														
 
															+    arguments are not known at compile-time.
														
 
															+
														
 
															+    If the vector has fewer elements than the number of the parameters this
														
 
															+    function accepts then the extra values are discarded.
														
 
															+
														
 
															+    @par 1 parameter version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v0 v0 v0 ... v0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 2 parameters version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v1 v0 v1 ... v1 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 4 parameters version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v1 v2 v3 ... v3 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 8 parameters version
														
 
															+    @code
														
 
															+        | 0  1  ..  7  8  ... n  |
														
 
															+    r = [ v0 v1 .. v7 v0  ... v7 ]
														
 
															+    @endcode
														
 
															+*/
														
 
															+SIMDPP_INL expr_vec_make_const<double,1> make_float(double v0)
														
 
															+{
														
 
															+    expr_vec_make_const<double,1> a;
														
 
															+    a.a[0] = v0;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+SIMDPP_INL expr_vec_make_const<double,2> make_float(double v0, double v1)
														
 
															+{
														
 
															+    expr_vec_make_const<double,2> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+SIMDPP_INL expr_vec_make_const<double,4>
														
 
															+    make_float(double v0, double v1, double v2, double v3)
														
 
															+{
														
 
															+    expr_vec_make_const<double,4> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+SIMDPP_INL expr_vec_make_const<double,8>
														
 
															+    make_float(double v0, double v1, double v2, double v3,
														
 
															+               double v4, double v5, double v6, double v7)
														
 
															+{
														
 
															+    expr_vec_make_const<double,8> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    a.a[4] = v4;  a.a[5] = v5;  a.a[6] = v6;  a.a[7] = v7;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+SIMDPP_INL expr_vec_make_const<double,16>
														
 
															+    make_float(double v0,  double v1,  double v2,  double v3,
														
 
															+               double v4,  double v5,  double v6,  double v7,
														
 
															+               double v8,  double v9,  double v10, double v11,
														
 
															+               double v12, double v13, double v14, double v15)
														
 
															+{
														
 
															+    expr_vec_make_const<double,16> a;
														
 
															+    a.a[0] = v0;    a.a[1] = v1;    a.a[2] = v2;    a.a[3] = v3;
														
 
															+    a.a[4] = v4;    a.a[5] = v5;    a.a[6] = v6;    a.a[7] = v7;
														
 
															+    a.a[8] = v8;    a.a[9] = v9;    a.a[10] = v10;  a.a[11] = v11;
														
 
															+    a.a[12] = v12;  a.a[13] = v13;  a.a[14] = v14;  a.a[15] = v15;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_float(double v0)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<double,1> a;
														
 
															+    a.a[0] = v0;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_float(double v0, double v1)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<double,2> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_float(double v0, double v1, double v2, double v3)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<double,4> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_float(double v0, double v1, double v2, double v3,
														
 
															+             double v4, double v5, double v6, double v7)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<double,8> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    a.a[4] = v4;  a.a[5] = v5;  a.a[6] = v6;  a.a[7] = v7;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_float(double v0,  double v1,  double v2,  double v3,
														
 
															+             double v4,  double v5,  double v6,  double v7,
														
 
															+             double v8,  double v9,  double v10, double v11,
														
 
															+             double v12, double v13, double v14, double v15)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<double,16> a;
														
 
															+    a.a[0] = v0;    a.a[1] = v1;    a.a[2] = v2;    a.a[3] = v3;
														
 
															+    a.a[4] = v4;    a.a[5] = v5;    a.a[6] = v6;    a.a[7] = v7;
														
 
															+    a.a[8] = v8;    a.a[9] = v9;    a.a[10] = v10;  a.a[11] = v11;
														
 
															+    a.a[12] = v12;  a.a[13] = v13;  a.a[14] = v14;  a.a[15] = v15;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/make_int.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/make_int.h
@@ -0,0 +1,171 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_MAKE_INT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_MAKE_INT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/make_const.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Creates a vector from signed integer values known at compile-time.
														
 
															+    The result of this function may be assigned or converted to a vector of any
														
 
															+    type: standard conversions are used to convert the arguments. All
														
 
															+    conversions and other overhead is performed at compile-time thus even if the
														
 
															+    minimal optimization level is selected, the function results in a simple
														
 
															+    load from memory.
														
 
															+
														
 
															+    The function is not guaranteed to have adequate performance if the
														
 
															+    arguments are not known at compile-time.
														
 
															+
														
 
															+    If the vector has fewer elements than the number of the parameters this
														
 
															+    function accepts then the extra values are discarded.
														
 
															+
														
 
															+    @par 1 parameter version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v0 v0 v0 ... v0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 2 parameters version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v1 v0 v1 ... v1 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 4 parameters version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v1 v2 v3 ... v3 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 8 parameters version
														
 
															+    @code
														
 
															+        | 0  1  ..  7  8  ... n  |
														
 
															+    r = [ v0 v1 .. v7 v0  ... v7 ]
														
 
															+    @endcode
														
 
															+*/
														
 
															+// FIXME: return empty expr
														
 
															+SIMDPP_INL expr_vec_make_const<int64_t,1> make_int(int64_t v0)
														
 
															+{
														
 
															+    expr_vec_make_const<int64_t,1> a;
														
 
															+    a.a[0] = v0;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+static SIMDPP_INL
														
 
															+expr_vec_make_const<int64_t,2> make_int(int64_t v0, int64_t v1)
														
 
															+{
														
 
															+    expr_vec_make_const<int64_t,2> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+static SIMDPP_INL
														
 
															+expr_vec_make_const<int64_t,4>
														
 
															+    make_int(int64_t v0, int64_t v1, int64_t v2, int64_t v3)
														
 
															+{
														
 
															+    expr_vec_make_const<int64_t,4> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+static SIMDPP_INL
														
 
															+expr_vec_make_const<int64_t,8>
														
 
															+    make_int(int64_t v0, int64_t v1, int64_t v2, int64_t v3,
														
 
															+             int64_t v4, int64_t v5, int64_t v6, int64_t v7)
														
 
															+{
														
 
															+    expr_vec_make_const<int64_t,8> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    a.a[4] = v4;  a.a[5] = v5;  a.a[6] = v6;  a.a[7] = v7;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+static SIMDPP_INL
														
 
															+expr_vec_make_const<int64_t,16>
														
 
															+    make_int(int64_t v0,  int64_t v1,  int64_t v2,  int64_t v3,
														
 
															+             int64_t v4,  int64_t v5,  int64_t v6,  int64_t v7,
														
 
															+             int64_t v8,  int64_t v9,  int64_t v10, int64_t v11,
														
 
															+             int64_t v12, int64_t v13, int64_t v14, int64_t v15)
														
 
															+{
														
 
															+    expr_vec_make_const<int64_t,16> a;
														
 
															+    a.a[0] = v0;    a.a[1] = v1;    a.a[2] = v2;    a.a[3] = v3;
														
 
															+    a.a[4] = v4;    a.a[5] = v5;    a.a[6] = v6;    a.a[7] = v7;
														
 
															+    a.a[8] = v8;    a.a[9] = v9;    a.a[10] = v10;  a.a[11] = v11;
														
 
															+    a.a[12] = v12;  a.a[13] = v13;  a.a[14] = v14;  a.a[15] = v15;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_int(int64_t v0)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<int64_t,1> a;
														
 
															+    a.a[0] = v0;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_int(int64_t v0, int64_t v1)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<int64_t,2> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_int(int64_t v0, int64_t v1, int64_t v2, int64_t v3)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<int64_t,4> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_int(int64_t v0, int64_t v1, int64_t v2, int64_t v3,
														
 
															+           int64_t v4, int64_t v5, int64_t v6, int64_t v7)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<int64_t,8> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    a.a[4] = v4;  a.a[5] = v5;  a.a[6] = v6;  a.a[7] = v7;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_int(int64_t v0,  int64_t v1,  int64_t v2,  int64_t v3,
														
 
															+           int64_t v4,  int64_t v5,  int64_t v6,  int64_t v7,
														
 
															+           int64_t v8,  int64_t v9,  int64_t v10, int64_t v11,
														
 
															+           int64_t v12, int64_t v13, int64_t v14, int64_t v15)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<int64_t,16> a;
														
 
															+    a.a[0] = v0;    a.a[1] = v1;    a.a[2] = v2;    a.a[3] = v3;
														
 
															+    a.a[4] = v4;    a.a[5] = v5;    a.a[6] = v6;    a.a[7] = v7;
														
 
															+    a.a[8] = v8;    a.a[9] = v9;    a.a[10] = v10;  a.a[11] = v11;
														
 
															+    a.a[12] = v12;  a.a[13] = v13;  a.a[14] = v14;  a.a[15] = v15;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/make_shuffle_bytes_mask.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/make_shuffle_bytes_mask.h
@@ -0,0 +1,559 @@
 
															+/*  Copyright (C) 2012-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_SHUFFLE_BYTES_MASK_H
														
 
															+#define LIBSIMDPP_SIMDPP_SHUFFLE_BYTES_MASK_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <cstdint>
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/core/make_uint.h>
														
 
															+#include <simdpp/detail/array.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+namespace detail {
														
 
															+
														
 
															+/// s - selector, u - number of elements per group
														
 
															+template<int s, unsigned u> SIMDPP_INL
														
 
															+void assert_selector_range()
														
 
															+{
														
 
															+    static_assert(-1 <= s && s < u*2, "Selector out of range");
														
 
															+}
														
 
															+
														
 
															+template<int s0, int s1, int u> SIMDPP_INL
														
 
															+void assert_selector_range()
														
 
															+{
														
 
															+    static_assert(-1 <= s0 && s0 < u*2, "Selector out of range");
														
 
															+    static_assert(-1 <= s1 && s1 < u*2, "Selector out of range");
														
 
															+}
														
 
															+
														
 
															+template<int s0, int s1, int s2, int s3, int u> SIMDPP_INL
														
 
															+void assert_selector_range()
														
 
															+{
														
 
															+    static_assert(-1 <= s0 && s0 < u*2, "Selector out of range");
														
 
															+    static_assert(-1 <= s1 && s1 < u*2, "Selector out of range");
														
 
															+    static_assert(-1 <= s2 && s2 < u*2, "Selector out of range");
														
 
															+    static_assert(-1 <= s3 && s3 < u*2, "Selector out of range");
														
 
															+}
														
 
															+
														
 
															+/// s - selector, u - the number of elements per group
														
 
															+template<int s, int u>
														
 
															+struct get_shuffle_bytex1_16 {
														
 
															+    static const unsigned r0 = (s == -1) ? 0x80 : (s < u ? s : (s-u)+16);
														
 
															+};
														
 
															+
														
 
															+/// s - selector, u - the number of elements per group
														
 
															+template<int s, int u>
														
 
															+struct get_shuffle_bytex2_16 {
														
 
															+    static const unsigned r0 = (s == -1) ? 0x80 : (s < u ? s*2   : (s-u)*2+16);
														
 
															+    static const unsigned r1 = (s == -1) ? 0x80 : r0+1;
														
 
															+};
														
 
															+
														
 
															+/// s - selector, u - the number of elements per group
														
 
															+template<int s, int u>
														
 
															+struct get_shuffle_bytex4_16 {
														
 
															+    static const unsigned r0 = (s == -1) ? 0x80 : (s < u ? s*4   : (s-u)*4+16);
														
 
															+    static const unsigned r1 = (s == -1) ? 0x80 : r0+1;
														
 
															+    static const unsigned r2 = (s == -1) ? 0x80 : r0+2;
														
 
															+    static const unsigned r3 = (s == -1) ? 0x80 : r0+3;
														
 
															+};
														
 
															+
														
 
															+/// s - selector, u - the number of elements per group
														
 
															+template<int s, int u>
														
 
															+struct get_shuffle_bytex8_16 {
														
 
															+    static const unsigned r0 = (s == -1) ? 0x80 : (s < u ? s*8   : (s-u)*8+16);
														
 
															+    static const unsigned r1 = (s == -1) ? 0x80 : r0+1;
														
 
															+    static const unsigned r2 = (s == -1) ? 0x80 : r0+2;
														
 
															+    static const unsigned r3 = (s == -1) ? 0x80 : r0+3;
														
 
															+    static const unsigned r4 = (s == -1) ? 0x80 : r0+4;
														
 
															+    static const unsigned r5 = (s == -1) ? 0x80 : r0+5;
														
 
															+    static const unsigned r6 = (s == -1) ? 0x80 : r0+6;
														
 
															+    static const unsigned r7 = (s == -1) ? 0x80 : r0+7;
														
 
															+};
														
 
															+
														
 
															+} // namespace detail
														
 
															+
														
 
															+/** Makes a mask to shuffle an int8x16 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    All elements within vectors are grouped into sets of two adjacent elements.
														
 
															+    Elements within each set of the resulting vector can be selected only from
														
 
															+    corresponding sets of the source vectors.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,1] select elements from the first vector.
														
 
															+     * Values [2,3] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0] : b[s0-2])
														
 
															+    r1 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1] : b[s1-2])
														
 
															+    r2 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0+2] : b[s0])
														
 
															+    r3 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1+2] : b[s1])
														
 
															+    ...
														
 
															+    r14 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0+14] : b[s0+12])
														
 
															+    r15 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1+14] : b[s1+12])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, unsigned N> SIMDPP_INL
														
 
															+uint8<N> make_shuffle_bytes16_mask(uint8<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,2>();
														
 
															+    const unsigned b0 = detail::get_shuffle_bytex1_16<s0,2>::r0;
														
 
															+    const unsigned b1 = detail::get_shuffle_bytex1_16<s1,2>::r0;
														
 
															+    mask = make_uint(b0,   b1,   b0+2, b1+2,
														
 
															+                     b0+4, b1+4, b0+6, b1+6,
														
 
															+                     b0+8, b1+8, b0+10,b1+10,
														
 
															+                     b0+12,b1+12,b0+14,b1+14);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int8x16 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    All elements within vectors are grouped into sets of four adjacent
														
 
															+    elements. Elements within each set of the resulting vector can be selected
														
 
															+    only from corresponding sets of the source vectors.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,3] select elements from the first vector.
														
 
															+     * Values [4,7] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 4 ? a[s0] : b[s0-4])
														
 
															+    r1 = (s1 == -1) ? 0 : (s1 < 4 ? a[s1] : b[s1-4])
														
 
															+    r2 = (s2 == -1) ? 0 : (s2 < 4 ? a[s2] : b[s2-4])
														
 
															+    r3 = (s3 == -1) ? 0 : (s3 < 4 ? a[s3] : b[s3-4])
														
 
															+    ...
														
 
															+    r12 = (s0 == -1) ? 0 : (s0 < 4 ? a[s0+12] : b[s0+8])
														
 
															+    r13 = (s1 == -1) ? 0 : (s1 < 4 ? a[s1+12] : b[s1+8])
														
 
															+    r14 = (s2 == -1) ? 0 : (s2 < 4 ? a[s2+12] : b[s2+8])
														
 
															+    r15 = (s3 == -1) ? 0 : (s3 < 4 ? a[s3+12] : b[s3+8])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, int s2, int s3, unsigned N> SIMDPP_INL
														
 
															+uint8<N> make_shuffle_bytes16_mask(uint8<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,s2,s3,4>();
														
 
															+    const unsigned b0 = detail::get_shuffle_bytex1_16<s0,4>::r0;
														
 
															+    const unsigned b1 = detail::get_shuffle_bytex1_16<s1,4>::r0;
														
 
															+    const unsigned b2 = detail::get_shuffle_bytex1_16<s2,4>::r0;
														
 
															+    const unsigned b3 = detail::get_shuffle_bytex1_16<s3,4>::r0;
														
 
															+    mask = make_uint(b0,   b1,   b2,   b3,
														
 
															+                     b0+4, b1+4, b2+4, b3+4,
														
 
															+                     b0+8, b1+8, b2+8, b3+8,
														
 
															+                     b0+12,b1+12,b2+12,b3+12);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int8x16 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    All elements within vectors are grouped into sets of eight adjacent
														
 
															+    elements. Elements within each set of the resulting vector can be selected
														
 
															+    only from corresponding sets of the source vectors.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,7] select elements from the first vector.
														
 
															+     * Values [8,15] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 8 ? a[s0] : b[s0-8])
														
 
															+    ...
														
 
															+    r7 = (s7 == -1) ? 0 : (s7 < 8 ? a[s7] : b[s7-8])
														
 
															+    r8 = (s0 == -1) ? 0 : (s0 < 8 ? a[s0+8] : b[s0])
														
 
															+    ...
														
 
															+    r15 = (s7 == -1) ? 0 : (s7 < 8 ? a[s7+8] : b[s7])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7, unsigned N> SIMDPP_INL
														
 
															+uint8<N> make_shuffle_bytes16_mask(uint8<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,s2,s3,8>();
														
 
															+    detail::assert_selector_range<s4,s5,s6,s7,8>();
														
 
															+    const unsigned b0 = detail::get_shuffle_bytex1_16<s0,8>::r0;
														
 
															+    const unsigned b1 = detail::get_shuffle_bytex1_16<s1,8>::r0;
														
 
															+    const unsigned b2 = detail::get_shuffle_bytex1_16<s2,8>::r0;
														
 
															+    const unsigned b3 = detail::get_shuffle_bytex1_16<s3,8>::r0;
														
 
															+    const unsigned b4 = detail::get_shuffle_bytex1_16<s4,8>::r0;
														
 
															+    const unsigned b5 = detail::get_shuffle_bytex1_16<s5,8>::r0;
														
 
															+    const unsigned b6 = detail::get_shuffle_bytex1_16<s6,8>::r0;
														
 
															+    const unsigned b7 = detail::get_shuffle_bytex1_16<s7,8>::r0;
														
 
															+    mask = make_uint(b0,   b1,   b2,   b3,
														
 
															+                     b4,   b5,   b6,   b7,
														
 
															+                     b0+8, b1+8, b2+8, b3+8,
														
 
															+                     b4+8, b5+8, b6+8, b7+8);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int8x16 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,15] select elements from the first vector.
														
 
															+     * Values [16,32] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 16 ? a[s0] : b[s0-16])
														
 
															+    r1 = (s1 == -1) ? 0 : (s0 < 16 ? a[s1] : b[s1-16])
														
 
															+    ...
														
 
															+    r15 = (s15 == -1) ? 0 : (s15 < 16 ? a[s15] : b[s15-16])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7,
														
 
															+         int s8, int s9, int s10, int s11, int s12, int s13, int s14, int s15, unsigned N> SIMDPP_INL
														
 
															+uint8<N> make_shuffle_bytes16_mask(uint8<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,s2,s3,16>();
														
 
															+    detail::assert_selector_range<s4,s5,s6,s7,16>();
														
 
															+    detail::assert_selector_range<s8,s9,s10,s11,16>();
														
 
															+    detail::assert_selector_range<s12,s13,s14,s15,16>();
														
 
															+    const unsigned b0 = detail::get_shuffle_bytex1_16<s0,16>::r0;
														
 
															+    const unsigned b1 = detail::get_shuffle_bytex1_16<s1,16>::r0;
														
 
															+    const unsigned b2 = detail::get_shuffle_bytex1_16<s2,16>::r0;
														
 
															+    const unsigned b3 = detail::get_shuffle_bytex1_16<s3,16>::r0;
														
 
															+    const unsigned b4 = detail::get_shuffle_bytex1_16<s4,16>::r0;
														
 
															+    const unsigned b5 = detail::get_shuffle_bytex1_16<s5,16>::r0;
														
 
															+    const unsigned b6 = detail::get_shuffle_bytex1_16<s6,16>::r0;
														
 
															+    const unsigned b7 = detail::get_shuffle_bytex1_16<s7,16>::r0;
														
 
															+    const unsigned b8 = detail::get_shuffle_bytex1_16<s8,16>::r0;
														
 
															+    const unsigned b9 = detail::get_shuffle_bytex1_16<s9,16>::r0;
														
 
															+    const unsigned b10 = detail::get_shuffle_bytex1_16<s10,16>::r0;
														
 
															+    const unsigned b11 = detail::get_shuffle_bytex1_16<s11,16>::r0;
														
 
															+    const unsigned b12 = detail::get_shuffle_bytex1_16<s12,16>::r0;
														
 
															+    const unsigned b13 = detail::get_shuffle_bytex1_16<s13,16>::r0;
														
 
															+    const unsigned b14 = detail::get_shuffle_bytex1_16<s14,16>::r0;
														
 
															+    const unsigned b15 = detail::get_shuffle_bytex1_16<s15,16>::r0;
														
 
															+    mask = make_uint(b0,  b1,  b2,  b3,
														
 
															+                     b4,  b5,  b6,  b7,
														
 
															+                     b8,  b9,  b10, b11,
														
 
															+                     b12, b13, b14, b15);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int16x8 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    All elements within vectors are grouped into sets of two adjacent elements.
														
 
															+    Elements within each set of the resulting vector can be selected only from
														
 
															+    corresponding sets of the source vectors.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,1] select elements from the first vector.
														
 
															+     * Values [2,3] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0] : b[s0-2])
														
 
															+    r1 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1] : b[s1-2])
														
 
															+    r2 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0+2] : b[s0])
														
 
															+    r3 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1+2] : b[s1])
														
 
															+    ...
														
 
															+    r6 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0+6] : b[s0+4])
														
 
															+    r7 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1+6] : b[s1+4])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, unsigned N> SIMDPP_INL
														
 
															+uint16<N> make_shuffle_bytes16_mask(uint16<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,2>();
														
 
															+    using b0 = typename detail::get_shuffle_bytex2_16<s0,2>;
														
 
															+    using b1 = typename detail::get_shuffle_bytex2_16<s1,2>;
														
 
															+    mask = (uint8<N*2>) make_uint(b0::r0,   b0::r1,   b1::r0,   b1::r1,
														
 
															+                                  b0::r0+4, b0::r1+4, b1::r0+4, b1::r1+4,
														
 
															+                                  b0::r0+8, b0::r1+8, b1::r0+8, b1::r1+8,
														
 
															+                                  b0::r0+12,b0::r1+12,b1::r0+12,b1::r1+12);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int16x8 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    All elements within vectors are grouped into sets of four adjacent
														
 
															+    elements. Elements within each set of the resulting vector can be selected
														
 
															+    only from corresponding sets of the source vectors.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,3] select elements from the first vector.
														
 
															+     * Values [4,7] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 4 ? a[s0] : b[s0-4])
														
 
															+    r1 = (s1 == -1) ? 0 : (s1 < 4 ? a[s1] : b[s1-4])
														
 
															+    r2 = (s2 == -1) ? 0 : (s2 < 4 ? a[s2] : b[s2-4])
														
 
															+    r3 = (s3 == -1) ? 0 : (s3 < 4 ? a[s3] : b[s3-4])
														
 
															+    ...
														
 
															+    r12 = (s0 == -1) ? 0 : (s0 < 4 ? a[s0+12] : b[s0+8])
														
 
															+    r13 = (s1 == -1) ? 0 : (s1 < 4 ? a[s1+12] : b[s1+8])
														
 
															+    r14 = (s2 == -1) ? 0 : (s2 < 4 ? a[s2+12] : b[s2+8])
														
 
															+    r15 = (s3 == -1) ? 0 : (s3 < 4 ? a[s3+12] : b[s3+8])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, int s2, int s3, unsigned N> SIMDPP_INL
														
 
															+uint16<N> make_shuffle_bytes16_mask(uint16<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,s2,s3,4>();
														
 
															+    using b0 = typename detail::get_shuffle_bytex2_16<s0,4>;
														
 
															+    using b1 = typename detail::get_shuffle_bytex2_16<s1,4>;
														
 
															+    using b2 = typename detail::get_shuffle_bytex2_16<s2,4>;
														
 
															+    using b3 = typename detail::get_shuffle_bytex2_16<s3,4>;
														
 
															+    mask = (uint8<N*2>) make_uint(b0::r0,   b0::r1,   b1::r0,   b1::r1,
														
 
															+                                  b2::r0,   b2::r1,   b3::r0,   b3::r1,
														
 
															+                                  b0::r0+8, b0::r1+8, b1::r0+8, b1::r1+8,
														
 
															+                                  b2::r0+8, b2::r1+8, b3::r0+8, b3::r1+8);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int16x8 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,7] select elements from the first vector.
														
 
															+     * Values [8,15] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 8 ? a[s0] : b[s0-8])
														
 
															+    ...
														
 
															+    r7 = (s7 == -1) ? 0 : (s7 < 8 ? a[s7] : b[s7-8])
														
 
															+    r8 = (s0 == -1) ? 0 : (s0 < 8 ? a[s0+8] : b[s0])
														
 
															+    ...
														
 
															+    r15 = (s7 == -1) ? 0 : (s7 < 8 ? a[s7+8] : b[s7])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7,
														
 
															+         unsigned N> SIMDPP_INL
														
 
															+uint16<N> make_shuffle_bytes16_mask(uint16<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,s2,s3,8>();
														
 
															+    detail::assert_selector_range<s4,s5,s6,s7,8>();
														
 
															+
														
 
															+    using b0 = typename detail::get_shuffle_bytex2_16<s0,8>;
														
 
															+    using b1 = typename detail::get_shuffle_bytex2_16<s1,8>;
														
 
															+    using b2 = typename detail::get_shuffle_bytex2_16<s2,8>;
														
 
															+    using b3 = typename detail::get_shuffle_bytex2_16<s3,8>;
														
 
															+    using b4 = typename detail::get_shuffle_bytex2_16<s4,8>;
														
 
															+    using b5 = typename detail::get_shuffle_bytex2_16<s5,8>;
														
 
															+    using b6 = typename detail::get_shuffle_bytex2_16<s6,8>;
														
 
															+    using b7 = typename detail::get_shuffle_bytex2_16<s7,8>;
														
 
															+    mask = (uint8<N*2>) make_uint(b0::r0, b0::r1, b1::r0, b1::r1,
														
 
															+                                  b2::r0, b2::r1, b3::r0, b3::r1,
														
 
															+                                  b4::r0, b4::r1, b5::r0, b5::r1,
														
 
															+                                  b6::r0, b6::r1, b7::r0, b7::r1);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int32x4 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    All elements within vectors are grouped into sets of two adjacent elements.
														
 
															+    Elements within each set of the resulting vector can be selected only from
														
 
															+    corresponding sets of the source vectors.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,1] select elements from the first vector.
														
 
															+     * Values [2,3] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0] : b[s0-2])
														
 
															+    r1 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1] : b[s1-2])
														
 
															+    r2 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0+2] : b[s0])
														
 
															+    r3 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1+2] : b[s1])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, unsigned N> SIMDPP_INL
														
 
															+uint32<N> make_shuffle_bytes16_mask(uint32<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,2>();
														
 
															+    using b0 = typename detail::get_shuffle_bytex4_16<s0,2>;
														
 
															+    using b1 = typename detail::get_shuffle_bytex4_16<s1,2>;
														
 
															+    mask = (uint8<N*4>) make_uint(b0::r0,   b0::r1,   b0::r2,   b0::r3,
														
 
															+                                  b1::r0,   b1::r1,   b1::r2,   b1::r3,
														
 
															+                                  b0::r0+8, b0::r1+8, b0::r2+8, b0::r3+8,
														
 
															+                                  b1::r0+8, b1::r1+8, b1::r2+8, b1::r3+8);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int32x4 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,3] select elements from the first vector.
														
 
															+     * Values [4,7] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 4 ? a[s0] : b[s0-4])
														
 
															+    r1 = (s1 == -1) ? 0 : (s1 < 4 ? a[s1] : b[s1-4])
														
 
															+    r2 = (s2 == -1) ? 0 : (s2 < 4 ? a[s2] : b[s2-4])
														
 
															+    r3 = (s3 == -1) ? 0 : (s3 < 4 ? a[s3] : b[s3-4])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, int s2, int s3, unsigned N> SIMDPP_INL
														
 
															+uint32<N> make_shuffle_bytes16_mask(uint32<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,s2,s3,4>();
														
 
															+    using b0 = typename detail::get_shuffle_bytex4_16<s0,4>;
														
 
															+    using b1 = typename detail::get_shuffle_bytex4_16<s1,4>;
														
 
															+    using b2 = typename detail::get_shuffle_bytex4_16<s2,4>;
														
 
															+    using b3 = typename detail::get_shuffle_bytex4_16<s3,4>;
														
 
															+    mask = (uint8<N*4>) make_uint(b0::r0, b0::r1, b0::r2, b0::r3,
														
 
															+                                  b1::r0, b1::r1, b1::r2, b1::r3,
														
 
															+                                  b2::r0, b2::r1, b2::r2, b2::r3,
														
 
															+                                  b3::r0, b3::r1, b3::r2, b3::r3);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+/** Makes a mask to shuffle an int64x2 vector using @c permute_bytes16,
														
 
															+    @c shuffle_bytes16, @c permute_zbytes16 or @c shuffle_zbytes16 functions.
														
 
															+
														
 
															+    The template arguments define which elements to select from each element
														
 
															+    group:
														
 
															+     * Values [0,1] select elements from the first vector.
														
 
															+     * Values [2,3] select elements from the second vector. The mask can only be
														
 
															+       used in @c shuffle_bytes16 or @c shuffle_zbytes16
														
 
															+     * Value [-1] sets the corresponding element to zero. The mask can only be
														
 
															+       used in @c permute_zbytes16 or @c shuffle_zbytes16
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+
														
 
															+    The created mask will cause @c shuffle_bytes16 to perform as follows:
														
 
															+    @code
														
 
															+    r0 = (s0 == -1) ? 0 : (s0 < 2 ? a[s0] : b[s0])
														
 
															+    r1 = (s1 == -1) ? 0 : (s1 < 2 ? a[s1] : b[s1])
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+
														
 
															+    The vectors will be shuffled as if the 128-bit version was applied to the
														
 
															+    lower and higher halves of the vectors separately.
														
 
															+*/
														
 
															+template<int s0, int s1, unsigned N> SIMDPP_INL
														
 
															+uint64<N> make_shuffle_bytes16_mask(uint64<N> &mask)
														
 
															+{
														
 
															+    detail::assert_selector_range<s0,s1,2>();
														
 
															+    using b0 = typename detail::get_shuffle_bytex8_16<s0,2>;
														
 
															+    using b1 = typename detail::get_shuffle_bytex8_16<s1,2>;
														
 
															+    mask = (uint8<N*8>) make_uint(b0::r0, b0::r1, b0::r2, b0::r3,
														
 
															+                                  b0::r4, b0::r5, b0::r6, b0::r7,
														
 
															+                                  b1::r0, b1::r1, b1::r2, b1::r3,
														
 
															+                                  b1::r4, b1::r5, b1::r6, b1::r7);
														
 
															+    return mask;
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/make_uint.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/make_uint.h
@@ -0,0 +1,199 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_MAKE_UINT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_MAKE_UINT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/make_const.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Creates a vector from unsigned integer values known at compile-time.
														
 
															+    The result of this function may be assigned or converted to a vector of any
														
 
															+    type: standard conversions are used to convert the arguments. All
														
 
															+    conversions and other overhead is performed at compile-time thus even if the
														
 
															+    minimal optimization level is selected, the function results in a simple
														
 
															+    load from memory.
														
 
															+
														
 
															+    The function is not guaranteed to have adequate performance if the
														
 
															+    arguments are not known at compile-time.
														
 
															+
														
 
															+    If the vector has fewer elements than the number of the parameters this
														
 
															+    function accepts then the extra values are discarded.
														
 
															+
														
 
															+    Note that per C++ rules negative values are sign-extended to fill entire
														
 
															+    element before being converted to unsigned type thus e.g. it's safe to use
														
 
															+    -1 to fill element with ones.
														
 
															+
														
 
															+    @par 1 parameter version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v0 v0 v0 ... v0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 2 parameters version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v1 v0 v1 ... v1 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 4 parameters version
														
 
															+    @code
														
 
															+        | 0  1  2  3  ... n  |
														
 
															+    r = [ v0 v1 v2 v3 ... v3 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 8 parameters version
														
 
															+    @code
														
 
															+        | 0  1  ..  7  8  ... n  |
														
 
															+    r = [ v0 v1 .. v7 v0  ... v7 ]
														
 
															+    @endcode
														
 
															+*/
														
 
															+// FIXME: return empty expr
														
 
															+SIMDPP_INL expr_vec_make_const<uint64_t,1> make_uint(uint64_t v0)
														
 
															+{
														
 
															+    expr_vec_make_const<uint64_t,1> a;
														
 
															+    a.a[0] = v0;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+static SIMDPP_INL
														
 
															+expr_vec_make_const<uint64_t,2> make_uint(uint64_t v0, uint64_t v1)
														
 
															+{
														
 
															+    expr_vec_make_const<uint64_t,2> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+static SIMDPP_INL
														
 
															+expr_vec_make_const<uint64_t,4>
														
 
															+    make_uint(uint64_t v0, uint64_t v1, uint64_t v2, uint64_t v3)
														
 
															+{
														
 
															+    expr_vec_make_const<uint64_t,4> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+static SIMDPP_INL
														
 
															+expr_vec_make_const<uint64_t,8>
														
 
															+    make_uint(uint64_t v0, uint64_t v1, uint64_t v2, uint64_t v3,
														
 
															+              uint64_t v4, uint64_t v5, uint64_t v6, uint64_t v7)
														
 
															+{
														
 
															+    expr_vec_make_const<uint64_t,8> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    a.a[4] = v4;  a.a[5] = v5;  a.a[6] = v6;  a.a[7] = v7;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+static SIMDPP_INL
														
 
															+expr_vec_make_const<uint64_t,16>
														
 
															+    make_uint(uint64_t v0,  uint64_t v1,  uint64_t v2,  uint64_t v3,
														
 
															+              uint64_t v4,  uint64_t v5,  uint64_t v6,  uint64_t v7,
														
 
															+              uint64_t v8,  uint64_t v9,  uint64_t v10, uint64_t v11,
														
 
															+              uint64_t v12, uint64_t v13, uint64_t v14, uint64_t v15)
														
 
															+{
														
 
															+    expr_vec_make_const<uint64_t,16> a;
														
 
															+    a.a[0] = v0;    a.a[1] = v1;    a.a[2] = v2;    a.a[3] = v3;
														
 
															+    a.a[4] = v4;    a.a[5] = v5;    a.a[6] = v6;    a.a[7] = v7;
														
 
															+    a.a[8] = v8;    a.a[9] = v9;    a.a[10] = v10;  a.a[11] = v11;
														
 
															+    a.a[12] = v12;  a.a[13] = v13;  a.a[14] = v14;  a.a[15] = v15;
														
 
															+    return a;
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_uint(uint64_t v0)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<uint64_t,1> a;
														
 
															+    a.a[0] = v0;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_uint(uint64_t v0, uint64_t v1)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<uint64_t,2> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_uint(uint64_t v0, uint64_t v1, uint64_t v2, uint64_t v3)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<uint64_t,4> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_uint(uint64_t v0, uint64_t v1, uint64_t v2, uint64_t v3,
														
 
															+            uint64_t v4, uint64_t v5, uint64_t v6, uint64_t v7)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<uint64_t,8> a;
														
 
															+    a.a[0] = v0;  a.a[1] = v1;  a.a[2] = v2;  a.a[3] = v3;
														
 
															+    a.a[4] = v4;  a.a[5] = v5;  a.a[6] = v6;  a.a[7] = v7;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_uint(uint64_t v0,  uint64_t v1,  uint64_t v2,  uint64_t v3,
														
 
															+            uint64_t v4,  uint64_t v5,  uint64_t v6,  uint64_t v7,
														
 
															+            uint64_t v8,  uint64_t v9,  uint64_t v10, uint64_t v11,
														
 
															+            uint64_t v12, uint64_t v13, uint64_t v14, uint64_t v15)
														
 
															+{
														
 
															+    static_assert(is_vector<V>::value && !is_mask<V>::value,
														
 
															+                  "V must be a non-mask vector");
														
 
															+    expr_vec_make_const<uint64_t,16> a;
														
 
															+    a.a[0] = v0;    a.a[1] = v1;    a.a[2] = v2;    a.a[3] = v3;
														
 
															+    a.a[4] = v4;    a.a[5] = v5;    a.a[6] = v6;    a.a[7] = v7;
														
 
															+    a.a[8] = v8;    a.a[9] = v9;    a.a[10] = v10;  a.a[11] = v11;
														
 
															+    a.a[12] = v12;  a.a[13] = v13;  a.a[14] = v14;  a.a[15] = v15;
														
 
															+    return detail::insn::i_make_const_any<V>(a);
														
 
															+}
														
 
															+
														
 
															+/// Creates a vector initialized to zero
														
 
															+SIMDPP_INL expr_vec_make_const<uint64_t,1> make_zero()
														
 
															+{
														
 
															+    return make_uint(0);
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_zero()
														
 
															+{
														
 
															+    return make_uint<V>(0);
														
 
															+}
														
 
															+
														
 
															+/// Creates a vector initialized to ones
														
 
															+SIMDPP_INL expr_vec_make_ones make_ones()
														
 
															+{
														
 
															+    return expr_vec_make_ones();
														
 
															+}
														
 
															+
														
 
															+template<class V> SIMDPP_INL
														
 
															+V make_ones()
														
 
															+{
														
 
															+    return (V) make_ones();
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/move_l.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/move_l.h
@@ -0,0 +1,139 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_MOVE_L_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_MOVE_L_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/move_l.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Moves the elements in an int8x16 vector to the left by @a shift positions.
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0   1    .  14  15  |
														
 
															+     0      r = [ a0  a1   .  a14 a15 ]
														
 
															+     1      r = [ a1  a2   .  a15  0  ]
														
 
															+     2      r = [ a2  a3   .   0   0  ]
														
 
															+      ...    ..   .. ..   ...  ..  .. ..
														
 
															+     14     r = [ a15  0   .   0   0  ]
														
 
															+     15     r = [  0   0   .   0   0  ]
														
 
															+     16     r = [  0   0   .   0   0  ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        move16_l(const any_vec8<N,V>& a)
														
 
															+{
														
 
															+    static_assert(shift <= 16, "Shift out of bounds");
														
 
															+    typename detail::get_expr_nomask_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_move16_l_wrapper<shift>::run(ra);
														
 
															+}
														
 
															+
														
 
															+/** Moves the 16-bit elements in a vector to the left by @a shift positions.
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1   . 6  7  |
														
 
															+     0      r = [ a0 a1  . a6 a7 ]
														
 
															+     1      r = [ a1 a2  . a7  0 ]
														
 
															+     2      r = [ a2 a3  .  0  0 ]
														
 
															+      ...    ..   .. .. ... .. ..
														
 
															+     6      r = [ a6 a7  .  0  0 ]
														
 
															+     7      r = [ a7  0  .  0  0 ]
														
 
															+     8      r = [  0  0  .  0  0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        move8_l(const any_vec16<N,V>& a)
														
 
															+{
														
 
															+    static_assert(shift <= 8, "Shift out of bounds");
														
 
															+
														
 
															+    typename detail::get_expr_nomask_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_move8_l_wrapper<shift>::run(ra);
														
 
															+}
														
 
															+
														
 
															+/** Moves the 32-bit elements in a vector to the left by @a shift positions.
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1  2  3  |
														
 
															+     0      r = [ a0 a1 a2 a3 ]
														
 
															+     1      r = [ a1 a2 a3  0 ]
														
 
															+     2      r = [ a2 a3  0  0 ]
														
 
															+     3      r = [ a3  0  0  0 ]
														
 
															+     4      r = [  0  0  0  0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        move4_l(const any_vec32<N,V>& a)
														
 
															+{
														
 
															+    static_assert(shift <= 4, "Shift out of bounds");
														
 
															+
														
 
															+    typename detail::get_expr_nomask_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_move4_l_wrapper<shift>::run(ra);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/** Moves the 64-bit elements in a vector to the left by @a shift positions.
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1  |
														
 
															+     0      r = [ a0 a1 ]
														
 
															+     1      r = [ a1  0 ]
														
 
															+     2      r = [  0  0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        move2_l(const any_vec64<N,V>& a)
														
 
															+{
														
 
															+    static_assert(shift <= 2, "Shift out of bounds");
														
 
															+
														
 
															+    typename detail::get_expr_nomask_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_move2_l_wrapper<shift>::run(ra);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/move_r.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/move_r.h
@@ -0,0 +1,141 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_MOVE_R_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_MOVE_R_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/move_r.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Moves the 8-bit elements in a vector to the right by @a shift positions.
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0   1    .  14  15  |
														
 
															+     0      r = [ a0  a1   .  a14 a15 ]
														
 
															+     1      r = [  0  a0   .  a13 a14 ]
														
 
															+     2      r = [  0   0   .  a12 a13 ]
														
 
															+      ...    ..   .. ..   ...  ..  .. ..
														
 
															+     14     r = [  0   0   .  a0  a1  ]
														
 
															+     15     r = [  0   0   .   0  a0  ]
														
 
															+     16     r = [  0   0   .   0   0  ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        move16_r(const any_vec8<N,V>& a)
														
 
															+{
														
 
															+    static_assert(shift <= 16, "Shift out of bounds");
														
 
															+
														
 
															+    typename detail::get_expr_nomask_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_move16_r_wrapper<shift>::run(ra);
														
 
															+}
														
 
															+
														
 
															+/** Moves the 16-bit elements in a vector to the right by @a shift positions.
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1   . 6  7  |
														
 
															+     0      r = [ a0 a1  . a6 a7 ]
														
 
															+     1      r = [  0 a0  . a5 a6 ]
														
 
															+     2      r = [  0  0  . a4 a5 ]
														
 
															+      ...    ..   .. .. ... .. ..
														
 
															+     6      r = [  0  0  . a0 a1 ]
														
 
															+     7      r = [  0  0  .  0 a0 ]
														
 
															+     8      r = [  0  0  .  0  0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        move8_r(const any_vec16<N,V>& a)
														
 
															+{
														
 
															+    static_assert(shift <= 8, "Shift out of bounds");
														
 
															+
														
 
															+    typename detail::get_expr_nomask_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_move8_r_wrapper<shift>::run(ra);
														
 
															+}
														
 
															+
														
 
															+/** Moves the 32-bit elements in a vector to the right by @a shift positions.
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1  2  3  |
														
 
															+     0      r = [ a0 a1 a2 a3 ]
														
 
															+     1      r = [  0 a0 a1 a2 ]
														
 
															+     2      r = [  0  0 a0 a1 ]
														
 
															+     3      r = [  0  0  0 a0 ]
														
 
															+     4      r = [  0  0  0  0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        move4_r(const any_vec32<N,V>& a)
														
 
															+{
														
 
															+    static_assert(shift <= 4, "Shift out of bounds");
														
 
															+
														
 
															+    typename detail::get_expr_nomask_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_move4_r_wrapper<shift>::run(ra);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/** Moves the 64-bit elements in a vector to the right by @a shift positions.
														
 
															+
														
 
															+    @code
														
 
															+    shift:  pos:| 0  1  |
														
 
															+     0      r = [ a0 a1 ]
														
 
															+     1      r = [  0 a0 ]
														
 
															+     2      r = [  0  0 ]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    The lower and higher 128-bit halves are processed as if 128-bit instruction
														
 
															+    was applied to each of them separately.
														
 
															+
														
 
															+    @icost{SSE2-AVX, NEON, ALTIVEC, 2}
														
 
															+*/
														
 
															+template<unsigned shift, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        move2_r(const any_vec64<N,V>& a)
														
 
															+{
														
 
															+    static_assert(shift <= 2, "Shift out of bounds");
														
 
															+
														
 
															+    typename detail::get_expr_nomask_nosign<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_move2_r_wrapper<shift>::run(ra);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/permute2.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/permute2.h
@@ -0,0 +1,135 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_PERMUTE2_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_PERMUTE2_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/permute2.h>
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Permutes the 16-bit values within sets of two consecutive elements of the
														
 
															+    vector. The selector values must be in range [0; 1].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[s0]
														
 
															+    r1 = a[s1]
														
 
															+    r2 = a[s0+2]
														
 
															+    r3 = a[s1+2]
														
 
															+    r4 = a[s0+4]
														
 
															+    r5 = a[s1+4]
														
 
															+    ...
														
 
															+    @endcode
														
 
															+
														
 
															+    @par: 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 2}
														
 
															+    @icost{NEON, ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par: 256-bit version:
														
 
															+    @icost{SSE2-AVX, 4}
														
 
															+    @icost{AVX2, 2}
														
 
															+    @icost{NEON, 2-4}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        permute2(const any_vec16<N,V>& a)
														
 
															+{
														
 
															+    static_assert(s0 < 2 && s1 < 2, "Selector out of range");
														
 
															+    typename detail::get_expr_nomask<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute2<s0,s1>(ra);
														
 
															+}
														
 
															+
														
 
															+/** Permutes the values of each set of four consecutive 32-bit values. The
														
 
															+    selector values must be in range [0; 1].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[s0]
														
 
															+    r1 = a[s1]
														
 
															+    r2 = a[s0+2]
														
 
															+    r3 = a[s1+2]
														
 
															+    256-bit version:
														
 
															+    r4 = a[s0+4]
														
 
															+    r5 = a[s1+4]
														
 
															+    r6 = a[s0+6]
														
 
															+    r7 = a[s1+6]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par integer
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, 2-4}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, 4-8}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+
														
 
															+    @par floating-point
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, 2-4}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, 4-8}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        permute2(const any_vec32<N,V>& a)
														
 
															+{
														
 
															+    static_assert(s0 < 2 && s1 < 2, "Selector out of range");
														
 
															+    typename detail::get_expr_nomask<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute2<s0,s1>(ra);
														
 
															+}
														
 
															+
														
 
															+/** Permutes the values of each set of four consecutive 32-bit values. The
														
 
															+    selector values must be in range [0; 1].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[s0]
														
 
															+    r1 = a[s1]
														
 
															+
														
 
															+    256-bit version:
														
 
															+    r2 = a[s0+2]
														
 
															+    r3 = a[s1+2]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, 1-2}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, 2-4}
														
 
															+    @icost{ALTIVEC, 2-4}
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        permute2(const any_vec64<N,V>& a)
														
 
															+{
														
 
															+    static_assert(s0 < 2 && s1 < 2, "Selector out of range");
														
 
															+    typename detail::get_expr_nomask<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute2<s0,s1>(ra);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/permute4.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/permute4.h
@@ -0,0 +1,142 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_PERMUTE4_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_PERMUTE4_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/permute4.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Permutes the 16-bit values within each 4 consecutive values of the vector.
														
 
															+    The selector values must be in range [0; 3].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[s0]
														
 
															+    ...
														
 
															+    r3 = a[s3]
														
 
															+    r4 = a[s0+4]
														
 
															+    ...
														
 
															+    r7 = a[s3+4]
														
 
															+
														
 
															+    256-bit version:
														
 
															+
														
 
															+    r8 = a[s0+8]
														
 
															+    ...
														
 
															+    r11 = a[s3+8]
														
 
															+    r12 = a[s0+12]
														
 
															+    ...
														
 
															+    r15 = a[s3+12]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par: 128-bit version:
														
 
															+    @icost{SSE2-AVX2, 2}
														
 
															+    @icost{NEON, 1-5}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par: 256-bit version:
														
 
															+    @icost{SSE2-AVX, 4}
														
 
															+    @icost{AVX2, 2}
														
 
															+    @icost{NEON, 2-10}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned s2, unsigned s3,
														
 
															+         unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        permute4(const any_vec16<N,V>& a)
														
 
															+{
														
 
															+    static_assert(s0 < 4 && s1 < 4 && s2 < 4 && s3 < 4, "Selector out of range");
														
 
															+    typename detail::get_expr_nomask<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute4<s0,s1,s2,s3>(ra);
														
 
															+}
														
 
															+
														
 
															+/** Permutes the values of each set of four consecutive 32-bit values. The
														
 
															+    selector values must be in range [0; 3].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[s0]
														
 
															+    ...
														
 
															+    r3 = a[s3]
														
 
															+
														
 
															+    256-bit version:
														
 
															+    r4 = a[s0+4]
														
 
															+    ...
														
 
															+    r7 = a[s3+4]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par integer
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, 1-4}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, 2-8}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+
														
 
															+    @par floating-point
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, 1-4}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @icost{NEON, 2-8}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned s2, unsigned s3,
														
 
															+         unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        permute4(const any_vec32<N,V>& a)
														
 
															+{
														
 
															+    static_assert(s0 < 4 && s1 < 4 && s2 < 4 && s3 < 4, "Selector out of range");
														
 
															+    typename detail::get_expr_nomask<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute4<s0,s1,s2,s3>(ra);
														
 
															+}
														
 
															+
														
 
															+/** Permutes the values of each set of four consecutive 64-bit values. The
														
 
															+    selector values must be in range [0; 3].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[s0]
														
 
															+    r1 = a[s1]
														
 
															+    r2 = a[s2]
														
 
															+    r3 = a[s3]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par integer
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+
														
 
															+    @par floating-point
														
 
															+    @icost{SSE2-AVX, 1-2}
														
 
															+    @icost{NEON, 1-4}
														
 
															+    @icost{ALTIVEC, 1-4}
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned s2, unsigned s3,
														
 
															+         unsigned N, class V> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V>::empty
														
 
															+        permute4(const any_vec64<N,V>& a)
														
 
															+{
														
 
															+    static_assert(s0 < 4 && s1 < 4 && s2 < 4 && s3 < 4, "Selector out of range");
														
 
															+    typename detail::get_expr_nomask<V>::type ra;
														
 
															+    ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute4<s0,s1,s2,s3>(ra);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/permute_bytes16.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/permute_bytes16.h
@@ -0,0 +1,62 @@
 
															+/*  Copyright (C) 2013-2017  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_PERMUTE_BYTES16_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_PERMUTE_BYTES16_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+#include <simdpp/detail/insn/permute_bytes16.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Selects bytes from a vector according to a mask. Each byte within the
														
 
															+    mask defines which element to select:
														
 
															+     * Bits 7-4 must be zero or the behavior is undefined
														
 
															+     * Bits 3-0 define the element within the given vector.
														
 
															+*/
														
 
															+template<unsigned N, class V1, class E2> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V1>::empty
														
 
															+    permute_bytes16(const any_vec8<N,V1>& a, const uint8<N,E2>& mask)
														
 
															+{
														
 
															+    typename detail::get_expr_nomask<V1>::type ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute_bytes16(ra, mask.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V1, class E2> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V1>::empty
														
 
															+    permute_bytes16(const any_vec16<N,V1>& a, const uint16<N,E2>& mask)
														
 
															+{
														
 
															+    typename detail::get_expr_nomask<V1>::type ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute_bytes16(ra, mask.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V1, class E2> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V1>::empty
														
 
															+    permute_bytes16(const any_vec32<N,V1>& a, const uint32<N,E2>& mask)
														
 
															+{
														
 
															+    typename detail::get_expr_nomask<V1>::type ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute_bytes16(ra, mask.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V1, class E2> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V1>::empty
														
 
															+    permute_bytes16(const any_vec64<N,V1>& a, const uint64<N,E2>& mask)
														
 
															+{
														
 
															+    typename detail::get_expr_nomask<V1>::type ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute_bytes16(ra, mask.eval());
														
 
															+}
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/permute_zbytes16.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/permute_zbytes16.h
@@ -0,0 +1,64 @@
 
															+/*  Copyright (C) 2013-2017  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_PERMUTE_ZBYTES16_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_PERMUTE_ZBYTES16_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/get_expr.h>
														
 
															+#include <simdpp/detail/insn/permute_zbytes16.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Selects bytes from a vector according to a mask, optionally selecting zero.
														
 
															+    Each byte within the mask defines which element to select:
														
 
															+     * Bit 7 results in the result byte being zeroed, if set.
														
 
															+     * Bits 6-4 must be zero or the behavior is undefined
														
 
															+     * Bits 3-0 define the element within the given vector.
														
 
															+*/
														
 
															+template<unsigned N, class V1, class E2> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V1>::empty
														
 
															+    permute_zbytes16(const any_vec8<N,V1>& a, const uint8<N,E2>& mask)
														
 
															+{
														
 
															+    typename detail::get_expr_nomask<V1>::type ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute_zbytes16(ra, mask.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V1, class E2> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V1>::empty
														
 
															+    permute_zbytes16(const any_vec16<N,V1>& a, const uint16<N,E2>& mask)
														
 
															+{
														
 
															+    typename detail::get_expr_nomask<V1>::type ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute_zbytes16(ra, mask.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V1, class E2> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V1>::empty
														
 
															+    permute_zbytes16(const any_vec32<N,V1>& a, const uint32<N,E2>& mask)
														
 
															+{
														
 
															+    typename detail::get_expr_nomask<V1>::type ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute_zbytes16(ra, mask.eval());
														
 
															+}
														
 
															+
														
 
															+template<unsigned N, class V1, class E2> SIMDPP_INL
														
 
															+typename detail::get_expr_nomask<V1>::empty
														
 
															+    permute_zbytes16(const any_vec64<N,V1>& a, const uint64<N,E2>& mask)
														
 
															+{
														
 
															+    typename detail::get_expr_nomask<V1>::type ra = a.wrapped().eval();
														
 
															+    return detail::insn::i_permute_zbytes16(ra, mask.eval());
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/set_splat.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/set_splat.h
@@ -0,0 +1,54 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_SET_SPLAT_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_SET_SPLAT_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/set_splat.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Loads a value from a register and broadcasts it to all elements of a vector.
														
 
															+    The argument value is converted to the element of the resulting vector using
														
 
															+    standard conversions.
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a
														
 
															+    ...
														
 
															+    rN = a
														
 
															+    @endcode
														
 
															+*/
														
 
															+// FIXME: return empty expression
														
 
															+SIMDPP_INL expr_vec_set_splat<uint32_t> splat(unsigned x) { return { x }; }
														
 
															+SIMDPP_INL expr_vec_set_splat<uint64_t> splat(unsigned long x) { return { x }; }
														
 
															+SIMDPP_INL expr_vec_set_splat<uint64_t> splat(unsigned long long x) { return { x }; }
														
 
															+SIMDPP_INL expr_vec_set_splat<int32_t>  splat(int x) { return { x }; }
														
 
															+SIMDPP_INL expr_vec_set_splat<int64_t>  splat(long x) { return { x }; }
														
 
															+SIMDPP_INL expr_vec_set_splat<int64_t>  splat(long long x) { return { x }; }
														
 
															+SIMDPP_INL expr_vec_set_splat<float>    splat(float x) { return { x }; }
														
 
															+SIMDPP_INL expr_vec_set_splat<double>   splat(double x) { return { x }; }
														
 
															+
														
 
															+template<class V> SIMDPP_INL V splat(unsigned x) { return detail::splat_impl<V>(x); }
														
 
															+template<class V> SIMDPP_INL V splat(unsigned long x) { return detail::splat_impl<V>(x); }
														
 
															+template<class V> SIMDPP_INL V splat(unsigned long long x) { return detail::splat_impl<V>(x); }
														
 
															+template<class V> SIMDPP_INL V splat(int x) { return detail::splat_impl<V>(x); }
														
 
															+template<class V> SIMDPP_INL V splat(long x) { return detail::splat_impl<V>(x); }
														
 
															+template<class V> SIMDPP_INL V splat(long long x) { return detail::splat_impl<V>(x); }
														
 
															+template<class V> SIMDPP_INL V splat(float x) { return detail::splat_impl<V>(x); }
														
 
															+template<class V> SIMDPP_INL V splat(double x) { return detail::splat_impl<V>(x); }
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/shuffle1.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/shuffle1.h
@@ -0,0 +1,65 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_SHUFFLV1_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_SHUFFLV1_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/shuffle2x2.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Selects 64-bit values from two vectors. The first value in
														
 
															+    each pair of values must come from @a a, the second - from @a b. The
														
 
															+    selector values must be in range [0; 1].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[s0]
														
 
															+    r1 = b[s1]
														
 
															+
														
 
															+    256-bit version:
														
 
															+    r2 = a[s0+2]
														
 
															+    r3 = b[s1+2]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par floating-point
														
 
															+    @par 128-bit version:
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @novec{NEON, ALTIVEC}
														
 
															+
														
 
															+    @par integer
														
 
															+    @par 128-bit version:
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, 1-2}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned N, class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+        shuffle1(const any_vec64<N,V1>& a, const any_vec64<N,V2>& b)
														
 
															+{
														
 
															+    static_assert(s0 < 2 && s1 < 2, "Selector out of range");
														
 
															+    typename detail::get_expr2_nomask<V1, V2>::type ra = a.wrapped().eval(),
														
 
															+                                                    rb = b.wrapped().eval();
														
 
															+    return detail::insn::i_shuffle2x2<s0,s1+2>(ra, rb);
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/shuffle2.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/shuffle2.h
@@ -0,0 +1,122 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_SHUFFLE2_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_SHUFFLE2_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/shuffle4x2.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Selects 32-bit floating-point values from two vectors. The first two values
														
 
															+    in each four consecutive values must come from @a a, the last two - from @a
														
 
															+    b. The selector values must be in range [0; 3].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[a0]
														
 
															+    r1 = a[a1]
														
 
															+    r2 = b[b0]
														
 
															+    r3 = b[b1]
														
 
															+
														
 
															+    256-bit version:
														
 
															+    r4 = a[a0+4]
														
 
															+    r5 = a[a1+4]
														
 
															+    r6 = b[b0+4]
														
 
															+    r7 = b[b1+4]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par floating-point
														
 
															+    @par 128-bit version:
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+    @icost{NEON, 1-4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @icost{NEON, 2-8}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+
														
 
															+    @par integer
														
 
															+     @par 128-bit version:
														
 
															+    @icost{NEON, 1-4}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, 2-8}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+*/
														
 
															+template<unsigned sa0, unsigned sa1, unsigned sb0, unsigned sb1, unsigned N,
														
 
															+         class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    shuffle2(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b)
														
 
															+{
														
 
															+    static_assert(sa0 < 4 && sa1 < 4 && sb0 < 4 && sb1 < 4, "Selector out of range");
														
 
															+    typename detail::get_expr2_nomask<V1,V2,void>::type a0 = a.wrapped().eval(),
														
 
															+                                                        b0 = b.wrapped().eval();
														
 
															+    return detail::insn::i_shuffle4x2<sa0,sa1,sb0+4,sb1+4>(a0, b0);
														
 
															+}
														
 
															+
														
 
															+/** Selects 32-bit values from two vectors. The first two values in each four
														
 
															+    consecutive values must come from @a a, the last two - from @a b. The
														
 
															+    selector values must be in range [0; 3].
														
 
															+
														
 
															+    @code
														
 
															+    r0 = a[s0]
														
 
															+    r1 = a[s1]
														
 
															+    r2 = b[s0]
														
 
															+    r3 = b[s1]
														
 
															+
														
 
															+    256-bit version:
														
 
															+    r4 = a[s0+4]
														
 
															+    r5 = a[s1+4]
														
 
															+    r6 = b[s0+4]
														
 
															+    r7 = b[s1+4]
														
 
															+    @endcode
														
 
															+
														
 
															+    @par floating-point
														
 
															+    @par 128-bit version:
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+    @icost{NEON, 2-4}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-SSE4.1, 2}
														
 
															+    @icost{NEON, 4-8}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+
														
 
															+    @par integer
														
 
															+    @par 128-bit version:
														
 
															+    @icost{NEON, 2-4}
														
 
															+    @icost{ALTIVEC, 1-2}
														
 
															+
														
 
															+    @par 256-bit version:
														
 
															+    @icost{SSE2-AVX, 2}
														
 
															+    @icost{NEON, 4-8}
														
 
															+    @icost{ALTIVEC, 2-3}
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned N,
														
 
															+         class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    shuffle2(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b)
														
 
															+{
														
 
															+    static_assert(s0 < 4 && s1 < 4, "Selector out of range");
														
 
															+    typename detail::get_expr2_nomask<V1,V2,void>::type a0 = a.wrapped().eval(),
														
 
															+                                                        b0 = b.wrapped().eval();
														
 
															+    return detail::insn::i_shuffle4x2<s0,s1,s0+4,s1+4>(a0, b0);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/shuffle2x2.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/shuffle2x2.h
@@ -0,0 +1,75 @@
 
															+/*  Copyright (C) 2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_SHUFFLE2x2_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_SHUFFLE2x2_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/shuffle4x2.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Selects 32-bit values from two vectors.
														
 
															+    The selector values must be in range [0; 3].
														
 
															+
														
 
															+    @code
														
 
															+    For each 64-bit segment:
														
 
															+    ab = [ a..b ]
														
 
															+    r0 = ab[s0]
														
 
															+    r1 = ab[s1]
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned N,
														
 
															+         class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    shuffle2x2(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b)
														
 
															+{
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_SSE2 || SIMDPP_USE_NEON
														
 
															+    static_assert(s0 < 4 && s1 < 4, "Selector out of range");
														
 
															+    typename detail::get_expr2_nomask_nosign<V1,V2,void>::type a0 = a.wrapped().eval(),
														
 
															+                                                               b0 = b.wrapped().eval();
														
 
															+    return detail::insn::i_shuffle2x2<s0,s1>(a0, b0);
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(V1, a, b);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Selects 64-bit values from two vectors.
														
 
															+    The selector values must be in range [0; 3].
														
 
															+
														
 
															+    @code
														
 
															+    For each 128-bit segment:
														
 
															+    ab = [ a..b ]
														
 
															+    r0 = ab[s0]
														
 
															+    r1 = ab[s1]
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned N,
														
 
															+         class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    shuffle2x2(const any_vec64<N,V1>& a, const any_vec64<N,V2>& b)
														
 
															+{
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_SSE2 || SIMDPP_USE_NEON
														
 
															+    static_assert(s0 < 4 && s1 < 4, "Selector out of range");
														
 
															+    typename detail::get_expr2_nomask_nosign<V1,V2,void>::type a0 = a.wrapped().eval(),
														
 
															+                                                               b0 = b.wrapped().eval();
														
 
															+    return detail::insn::i_shuffle2x2<s0,s1>(a0, b0);
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(V1, a, b);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+
														
--- a/Source/BansheeUtility/ThirdParty/simdpp/core/shuffle4x2.h
+++ b/Source/BansheeUtility/ThirdParty/simdpp/core/shuffle4x2.h
@@ -0,0 +1,79 @@
 
															+/*  Copyright (C) 2013-2014  Povilas Kanapickas <[email protected]>
														
 
															+
														
 
															+    Distributed under the Boost Software License, Version 1.0.
														
 
															+        (See accompanying file LICENSE_1_0.txt or copy at
														
 
															+            http://www.boost.org/LICENSE_1_0.txt)
														
 
															+*/
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMDPP_CORE_SHUFFLE4x2_H
														
 
															+#define LIBSIMDPP_SIMDPP_CORE_SHUFFLE4x2_H
														
 
															+
														
 
															+#ifndef LIBSIMDPP_SIMD_H
														
 
															+    #error "This file must be included through simd.h"
														
 
															+#endif
														
 
															+
														
 
															+#include <simdpp/types.h>
														
 
															+#include <simdpp/detail/insn/shuffle4x2.h>
														
 
															+
														
 
															+namespace simdpp {
														
 
															+namespace SIMDPP_ARCH_NAMESPACE {
														
 
															+
														
 
															+/** Selects 32-bit values from two vectors.
														
 
															+    The selector values must be in range [0; 7].
														
 
															+
														
 
															+    @code
														
 
															+    For each 128-bit segment:
														
 
															+    ab = [ a..b ]
														
 
															+    r0 = ab[s0]
														
 
															+    r1 = ab[s1]
														
 
															+    r2 = ab[s2]
														
 
															+    r3 = ab[s3]
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned s2, unsigned s3, unsigned N,
														
 
															+         class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    shuffle4x2(const any_vec32<N,V1>& a, const any_vec32<N,V2>& b)
														
 
															+{
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_SSE2 || SIMDPP_USE_NEON
														
 
															+    static_assert(s0 < 8 && s1 < 8 && s2 < 8 && s3 < 8, "Selector out of range");
														
 
															+    typename detail::get_expr2_nomask_nosign<V1,V2,void>::type a0 = a.wrapped().eval(),
														
 
															+                                                               b0 = b.wrapped().eval();
														
 
															+    return detail::insn::i_shuffle4x2<s0,s1,s2,s3>(a0, b0);
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(V1, a, b);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/** Selects 64-bit values from two vectors.
														
 
															+    The selector values must be in range [0; 7].
														
 
															+
														
 
															+    @code
														
 
															+    For each 256-bit segment:
														
 
															+    ab = [ a..b ]
														
 
															+    r0 = ab[s0]
														
 
															+    r1 = ab[s1]
														
 
															+    r2 = ab[s2]
														
 
															+    r3 = ab[s3]
														
 
															+    @endcode
														
 
															+*/
														
 
															+template<unsigned s0, unsigned s1, unsigned s2, unsigned s3, unsigned N,
														
 
															+         class V1, class V2> SIMDPP_INL
														
 
															+typename detail::get_expr2_nomask<V1, V2>::empty
														
 
															+    shuffle4x2(const any_vec64<N,V1>& a, const any_vec64<N,V2>& b)
														
 
															+{
														
 
															+#if SIMDPP_USE_NULL || SIMDPP_USE_SSE2 || SIMDPP_USE_NEON
														
 
															+    static_assert(s0 < 8 && s1 < 8 && s2 < 8 && s3 < 8, "Selector out of range");
														
 
															+    typename detail::get_expr2_nomask_nosign<V1,V2,void>::type a0 = a.wrapped().eval(),
														
 
															+                                                               b0 = b.wrapped().eval();
														
 
															+    return detail::insn::i_shuffle4x2<s0,s1,s2,s3>(a0, b0);
														
 
															+#else
														
 
															+    return SIMDPP_NOT_IMPLEMENTED_TEMPLATE2(V1, a, b);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+} // namespace SIMDPP_ARCH_NAMESPACE
														
 
															+} // namespace simdpp
														
 
															+
														
 
															+#endif
														
 
															+