1 年之前 · 839ffcb07c
--- a/Jolt/Geometry/RayAABox8.h
+++ b/Jolt/Geometry/RayAABox8.h
@@ -1,76 +0,0 @@
 
				-// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
			
 
				-// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
			
 
				-// SPDX-License-Identifier: MIT
			
 
				-
			
 
				-#pragma once
			
 
				-
			
 
				-#include <Jolt/Math/Vec8.h>
			
 
				-#include <Jolt/Geometry/RayAABox.h>
			
 
				-
			
 
				-JPH_NAMESPACE_BEGIN
			
 
				-
			
 
				-/// Intersect 8 AABBs with ray, returns minimal distance along ray or FLT_MAX if no hit
			
 
				-/// Note: Can return negative value if ray starts in box
			
 
				-JPH_INLINE Vec8 RayAABox8(Vec3Arg inOrigin, const RayInvDirection &inInvDirection, Vec8Arg inBoundsMinX, Vec8Arg inBoundsMinY, Vec8Arg inBoundsMinZ, Vec8Arg inBoundsMaxX, Vec8Arg inBoundsMaxY, Vec8Arg inBoundsMaxZ)
			
 
				-{
			
 
				-	// Constants
			
 
				-	Vec8 flt_min = Vec8::sReplicate(-FLT_MAX);
			
 
				-	Vec8 flt_max = Vec8::sReplicate(FLT_MAX);
			
 
				-
			
 
				-	// Origin
			
 
				-	Vec8 originx = Vec8::sSplatX(Vec4(inOrigin));
			
 
				-	Vec8 originy = Vec8::sSplatY(Vec4(inOrigin));
			
 
				-	Vec8 originz = Vec8::sSplatZ(Vec4(inOrigin));
			
 
				-
			
 
				-	// Parallel
			
 
				-	UVec8 parallelx = UVec8::sSplatX(inInvDirection.mIsParallel);
			
 
				-	UVec8 parallely = UVec8::sSplatY(inInvDirection.mIsParallel);
			
 
				-	UVec8 parallelz = UVec8::sSplatZ(inInvDirection.mIsParallel);
			
 
				-
			
 
				-	// Inverse direction
			
 
				-	Vec8 invdirx = Vec8::sSplatX(Vec4(inInvDirection.mInvDirection));
			
 
				-	Vec8 invdiry = Vec8::sSplatY(Vec4(inInvDirection.mInvDirection));
			
 
				-	Vec8 invdirz = Vec8::sSplatZ(Vec4(inInvDirection.mInvDirection));
			
 
				-
			
 
				-	// Test against all three axii simultaneously.
			
 
				-	Vec8 t1x = (inBoundsMinX - originx) * invdirx;
			
 
				-	Vec8 t1y = (inBoundsMinY - originy) * invdiry;
			
 
				-	Vec8 t1z = (inBoundsMinZ - originz) * invdirz;
			
 
				-	Vec8 t2x = (inBoundsMaxX - originx) * invdirx;
			
 
				-	Vec8 t2y = (inBoundsMaxY - originy) * invdiry;
			
 
				-	Vec8 t2z = (inBoundsMaxZ - originz) * invdirz;
			
 
				-
			
 
				-	// Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't
			
 
				-	// use the results from any directions parallel to the slab.
			
 
				-	Vec8 t_minx = Vec8::sSelect(Vec8::sMin(t1x, t2x), flt_min, parallelx);
			
 
				-	Vec8 t_miny = Vec8::sSelect(Vec8::sMin(t1y, t2y), flt_min, parallely);
			
 
				-	Vec8 t_minz = Vec8::sSelect(Vec8::sMin(t1z, t2z), flt_min, parallelz);
			
 
				-	Vec8 t_maxx = Vec8::sSelect(Vec8::sMax(t1x, t2x), flt_max, parallelx);
			
 
				-	Vec8 t_maxy = Vec8::sSelect(Vec8::sMax(t1y, t2y), flt_max, parallely);
			
 
				-	Vec8 t_maxz = Vec8::sSelect(Vec8::sMax(t1z, t2z), flt_max, parallelz);
			
 
				-
			
 
				-	// t_min.xyz = maximum(t_min.x, t_min.y, t_min.z);
			
 
				-	Vec8 t_min = Vec8::sMax(Vec8::sMax(t_minx, t_miny), t_minz);
			
 
				-
			
 
				-	// t_max.xyz = minimum(t_max.x, t_max.y, t_max.z);
			
 
				-	Vec8 t_max = Vec8::sMin(Vec8::sMin(t_maxx, t_maxy), t_maxz);
			
 
				-
			
 
				-	// if (t_min > t_max) return FLT_MAX;
			
 
				-	UVec8 no_intersection = Vec8::sGreater(t_min, t_max);
			
 
				-
			
 
				-	// if (t_max < 0.0f) return FLT_MAX;
			
 
				-	no_intersection = UVec8::sOr(no_intersection, Vec8::sLess(t_max, Vec8::sZero()));
			
 
				-
			
 
				-	// if bounds are invalid return FLOAT_MAX;
			
 
				-	UVec8 bounds_invalid = UVec8::sOr(UVec8::sOr(Vec8::sGreater(inBoundsMinX, inBoundsMaxX), Vec8::sGreater(inBoundsMinY, inBoundsMaxY)), Vec8::sGreater(inBoundsMinZ, inBoundsMaxZ));
			
 
				-	no_intersection = UVec8::sOr(no_intersection, bounds_invalid);
			
 
				-
			
 
				-	// if (inInvDirection.mIsParallel && !(Min <= inOrigin && inOrigin <= Max)) return FLT_MAX; else return t_min;
			
 
				-	UVec8 no_parallel_overlapx = UVec8::sAnd(parallelx, UVec8::sOr(Vec8::sLess(originx, inBoundsMinX), Vec8::sGreater(originx, inBoundsMaxX)));
			
 
				-	UVec8 no_parallel_overlapy = UVec8::sAnd(parallely, UVec8::sOr(Vec8::sLess(originy, inBoundsMinY), Vec8::sGreater(originy, inBoundsMaxY)));
			
 
				-	UVec8 no_parallel_overlapz = UVec8::sAnd(parallelz, UVec8::sOr(Vec8::sLess(originz, inBoundsMinZ), Vec8::sGreater(originz, inBoundsMaxZ)));
			
 
				-	no_intersection = UVec8::sOr(no_intersection, UVec8::sOr(UVec8::sOr(no_parallel_overlapx, no_parallel_overlapy), no_parallel_overlapz));
			
 
				-	return Vec8::sSelect(t_min, flt_max, no_intersection);
			
 
				-}
			
 
				-
			
 
				-JPH_NAMESPACE_END
			
--- a/Jolt/Geometry/RayTriangle8.h
+++ b/Jolt/Geometry/RayTriangle8.h
@@ -1,91 +0,0 @@
 
				-// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
			
 
				-// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
			
 
				-// SPDX-License-Identifier: MIT
			
 
				-
			
 
				-#pragma once
			
 
				-
			
 
				-#include <Jolt/Math/Vec8.h>
			
 
				-
			
 
				-JPH_NAMESPACE_BEGIN
			
 
				-
			
 
				-/// Intersect ray with 8 triangles in SOA format, returns 8 vector of closest points or FLT_MAX if no hit
			
 
				-JPH_INLINE Vec8 RayTriangle8(Vec3Arg inOrigin, Vec3Arg inDirection, Vec8Arg inV0X, Vec8Arg inV0Y, Vec8Arg inV0Z, Vec8Arg inV1X, Vec8Arg inV1Y, Vec8Arg inV1Z, Vec8Arg inV2X, Vec8Arg inV2Y, Vec8Arg inV2Z)
			
 
				-{
			
 
				-	// Epsilon
			
 
				-	Vec8 epsilon = Vec8::sReplicate(1.0e-12f);
			
 
				-
			
 
				-	// Zero & one
			
 
				-	Vec8 zero = Vec8::sZero();
			
 
				-	Vec8 one = Vec8::sReplicate(1.0f);
			
 
				-
			
 
				-	// Find vectors for two edges sharing inV0
			
 
				-	Vec8 e1x = inV1X - inV0X;
			
 
				-	Vec8 e1y = inV1Y - inV0Y;
			
 
				-	Vec8 e1z = inV1Z - inV0Z;
			
 
				-	Vec8 e2x = inV2X - inV0X;
			
 
				-	Vec8 e2y = inV2Y - inV0Y;
			
 
				-	Vec8 e2z = inV2Z - inV0Z;
			
 
				-
			
 
				-	// Get direction vector components
			
 
				-	Vec8 dx = Vec8::sSplatX(Vec4(inDirection));
			
 
				-	Vec8 dy = Vec8::sSplatY(Vec4(inDirection));
			
 
				-	Vec8 dz = Vec8::sSplatZ(Vec4(inDirection));
			
 
				-
			
 
				-	// Begin calculating determinant - also used to calculate u parameter
			
 
				-	Vec8 px = dy * e2z - dz * e2y;
			
 
				-	Vec8 py = dz * e2x - dx * e2z;
			
 
				-	Vec8 pz = dx * e2y - dy * e2x;
			
 
				-
			
 
				-	// if determinant is near zero, ray lies in plane of triangle
			
 
				-	Vec8 det = e1x * px + e1y * py + e1z * pz;
			
 
				-
			
 
				-	// Check which determinants are near zero
			
 
				-	UVec8 det_near_zero = Vec8::sLess(det.Abs(), epsilon);
			
 
				-
			
 
				-	// Set components of the determinant to 1 that are near zero to avoid dividing by zero
			
 
				-	det = Vec8::sSelect(det, Vec8::sReplicate(1.0f), det_near_zero);
			
 
				-
			
 
				-	// Calculate distance from inV0 to ray origin
			
 
				-	Vec8 sx = Vec8::sSplatX(Vec4(inOrigin)) - inV0X;
			
 
				-	Vec8 sy = Vec8::sSplatY(Vec4(inOrigin)) - inV0Y;
			
 
				-	Vec8 sz = Vec8::sSplatZ(Vec4(inOrigin)) - inV0Z;
			
 
				-
			
 
				-	// Calculate u parameter and flip sign if determinant was negative
			
 
				-	Vec8 u = (sx * px + sy * py + sz * pz) / det;
			
 
				-
			
 
				-	// Prepare to test v parameter
			
 
				-	Vec8 qx = sy * e1z - sz * e1y;
			
 
				-	Vec8 qy = sz * e1x - sx * e1z;
			
 
				-	Vec8 qz = sx * e1y - sy * e1x;
			
 
				-
			
 
				-	// Calculate v parameter and flip sign if determinant was negative
			
 
				-	Vec8 v = (dx * qx + dy * qy + dz * qz) / det;
			
 
				-
			
 
				-	// Get intersection point and flip sign if determinant was negative
			
 
				-	Vec8 t = (e2x * qx + e2y * qy + e2z * qz) / det;
			
 
				-
			
 
				-	// Check if there is an intersection
			
 
				-	UVec8 no_intersection =
			
 
				-		UVec8::sOr
			
 
				-		(
			
 
				-			UVec8::sOr
			
 
				-			(
			
 
				-				UVec8::sOr
			
 
				-				(
			
 
				-					det_near_zero,
			
 
				-					Vec8::sLess(u, zero)
			
 
				-				),
			
 
				-				UVec8::sOr
			
 
				-				(
			
 
				-					Vec8::sLess(v, zero),
			
 
				-					Vec8::sGreater(u + v, one)
			
 
				-				)
			
 
				-			),
			
 
				-			Vec8::sLess(t, zero)
			
 
				-		);
			
 
				-
			
 
				-	// Select intersection point or FLT_MAX based on if there is an intersection or not
			
 
				-	return Vec8::sSelect(t, Vec8::sReplicate(FLT_MAX), no_intersection);
			
 
				-}
			
 
				-
			
 
				-JPH_NAMESPACE_END
			
--- a/Jolt/Jolt.cmake
+++ b/Jolt/Jolt.cmake
@@ -96,12 +96,10 @@ set(JOLT_PHYSICS_SRC_FILES
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/OrientedBox.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/Plane.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/RayAABox.h
			
 
				-	${JOLT_PHYSICS_ROOT}/Geometry/RayAABox8.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/RayCapsule.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/RayCylinder.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/RaySphere.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/RayTriangle.h
			
 
				-	${JOLT_PHYSICS_ROOT}/Geometry/RayTriangle8.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/Sphere.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Geometry/Triangle.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Jolt.cmake
			
@@ -131,15 +129,11 @@ set(JOLT_PHYSICS_SRC_FILES
 
				 	${JOLT_PHYSICS_ROOT}/Math/Trigonometry.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Math/UVec4.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Math/UVec4.inl
			
 
				-	${JOLT_PHYSICS_ROOT}/Math/UVec8.h
			
 
				-	${JOLT_PHYSICS_ROOT}/Math/UVec8.inl
			
 
				 	${JOLT_PHYSICS_ROOT}/Math/Vec3.cpp
			
 
				 	${JOLT_PHYSICS_ROOT}/Math/Vec3.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Math/Vec3.inl
			
 
				 	${JOLT_PHYSICS_ROOT}/Math/Vec4.h
			
 
				 	${JOLT_PHYSICS_ROOT}/Math/Vec4.inl
			
 
				-	${JOLT_PHYSICS_ROOT}/Math/Vec8.h
			
 
				-	${JOLT_PHYSICS_ROOT}/Math/Vec8.inl
			
 
				 	${JOLT_PHYSICS_ROOT}/Math/Vector.h
			
 
				 	${JOLT_PHYSICS_ROOT}/ObjectStream/SerializableObject.cpp
			
 
				 	${JOLT_PHYSICS_ROOT}/ObjectStream/SerializableObject.h
			
--- a/Jolt/Math/MathTypes.h
+++ b/Jolt/Math/MathTypes.h
@@ -10,8 +10,6 @@ class Vec3;
 
				 class DVec3;
			
 
				 class Vec4;
			
 
				 class UVec4;
			
 
				-class Vec8;
			
 
				-class UVec8;
			
 
				 class Quat;
			
 
				 class Mat44;
			
 
				 class DMat44;
			
@@ -25,8 +23,6 @@ using Vec3Arg = const Vec3;
 
				 #endif
			
 
				 using Vec4Arg = const Vec4;
			
 
				 using UVec4Arg = const UVec4;
			
 
				-using Vec8Arg = const Vec8;
			
 
				-using UVec8Arg = const UVec8;
			
 
				 using QuatArg = const Quat;
			
 
				 using Mat44Arg = const Mat44 &;
			
 
				 using DMat44Arg = const DMat44 &;
			
--- a/Jolt/Math/UVec8.h
+++ b/Jolt/Math/UVec8.h
@@ -1,100 +0,0 @@
 
				-// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
			
 
				-// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
			
 
				-// SPDX-License-Identifier: MIT
			
 
				-
			
 
				-#pragma once
			
 
				-
			
 
				-#include <Jolt/Math/Vec8.h>
			
 
				-
			
 
				-JPH_NAMESPACE_BEGIN
			
 
				-
			
 
				-class [[nodiscard]] UVec8
			
 
				-{
			
 
				-public:
			
 
				-	JPH_OVERRIDE_NEW_DELETE
			
 
				-
			
 
				-								UVec8() = default; ///< Intentionally not initialized for performance reasons
			
 
				-								UVec8(const UVec8 &inRHS) = default;
			
 
				-	JPH_INLINE					UVec8(__m256i inRHS) : mValue(inRHS)				{ }
			
 
				-
			
 
				-	/// Set 256 bit vector from 2 128 bit vectors
			
 
				-	JPH_INLINE					UVec8(UVec4Arg inLo, UVec4Arg inHi);
			
 
				-
			
 
				-	/// Comparison
			
 
				-	JPH_INLINE bool				operator == (UVec8Arg inV2) const;
			
 
				-	JPH_INLINE bool				operator != (UVec8Arg inV2) const					{ return !(*this == inV2); }
			
 
				-
			
 
				-	/// Replicate int across all components
			
 
				-	static JPH_INLINE UVec8		sReplicate(uint32 inV);
			
 
				-
			
 
				-	/// Replicate the X component of inV to all components
			
 
				-	static JPH_INLINE UVec8		sSplatX(UVec4Arg inV);
			
 
				-
			
 
				-	/// Replicate the Y component of inV to all components
			
 
				-	static JPH_INLINE UVec8		sSplatY(UVec4Arg inV);
			
 
				-
			
 
				-	/// Replicate the Z component of inV to all components
			
 
				-	static JPH_INLINE UVec8		sSplatZ(UVec4Arg inV);
			
 
				-
			
 
				-	/// Equals (component wise)
			
 
				-	static JPH_INLINE UVec8		sEquals(UVec8Arg inV1, UVec8Arg inV2);
			
 
				-
			
 
				-	/// Component wise select, returns inV1 when highest bit of inControl = 0 and inV2 when highest bit of inControl = 1
			
 
				-	static JPH_INLINE UVec8		sSelect(UVec8Arg inV1, UVec8Arg inV2, UVec8Arg inControl);
			
 
				-
			
 
				-	/// Logical or
			
 
				-	static JPH_INLINE UVec8		sOr(UVec8Arg inV1, UVec8Arg inV2);
			
 
				-
			
 
				-	/// Logical xor
			
 
				-	static JPH_INLINE UVec8		sXor(UVec8Arg inV1, UVec8Arg inV2);
			
 
				-
			
 
				-	/// Logical and
			
 
				-	static JPH_INLINE UVec8		sAnd(UVec8Arg inV1, UVec8Arg inV2);
			
 
				-
			
 
				-	/// Get float component by index
			
 
				-	JPH_INLINE uint32			operator [] (uint inCoordinate) const				{ JPH_ASSERT(inCoordinate < 8); return mU32[inCoordinate]; }
			
 
				-	JPH_INLINE uint32 &			operator [] (uint inCoordinate)						{ JPH_ASSERT(inCoordinate < 8); return mU32[inCoordinate]; }
			
 
				-
			
 
				-	/// 256 bit variant of Vec::Swizzle (no cross 128 bit lane swizzle)
			
 
				-	template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
			
 
				-	JPH_INLINE UVec8			Swizzle() const;
			
 
				-
			
 
				-	/// Test if any of the components are true (true is when highest bit of component is set)
			
 
				-	JPH_INLINE bool				TestAnyTrue() const;
			
 
				-
			
 
				-	/// Test if all components are true (true is when highest bit of component is set)
			
 
				-	JPH_INLINE bool				TestAllTrue() const;
			
 
				-
			
 
				-	/// Fetch the lower 128 bit from a 256 bit variable
			
 
				-	JPH_INLINE UVec4			LowerVec4() const;
			
 
				-
			
 
				-	/// Fetch the higher 128 bit from a 256 bit variable
			
 
				-	JPH_INLINE UVec4			UpperVec4() const;
			
 
				-
			
 
				-	/// Converts int to float
			
 
				-	JPH_INLINE Vec8				ToFloat() const;
			
 
				-
			
 
				-	/// Shift all components by Count bits to the left (filling with zeros from the left)
			
 
				-	template <const uint Count>
			
 
				-	JPH_INLINE UVec8			LogicalShiftLeft() const;
			
 
				-
			
 
				-	/// Shift all components by Count bits to the right (filling with zeros from the right)
			
 
				-	template <const uint Count>
			
 
				-	JPH_INLINE UVec8			LogicalShiftRight() const;
			
 
				-
			
 
				-	/// Shift all components by Count bits to the right (shifting in the value of the highest bit)
			
 
				-	template <const uint Count>
			
 
				-	JPH_INLINE UVec8			ArithmeticShiftRight() const;
			
 
				-
			
 
				-	union
			
 
				-	{
			
 
				-		__m256i					mValue;
			
 
				-		uint32					mU32[8];
			
 
				-	};
			
 
				-};
			
 
				-
			
 
				-static_assert(is_trivial<UVec8>(), "Is supposed to be a trivial type!");
			
 
				-
			
 
				-JPH_NAMESPACE_END
			
 
				-
			
 
				-#include "UVec8.inl"
			
--- a/Jolt/Math/UVec8.inl
+++ b/Jolt/Math/UVec8.inl
@@ -1,138 +0,0 @@
 
				-// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
			
 
				-// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
			
 
				-// SPDX-License-Identifier: MIT
			
 
				-
			
 
				-JPH_NAMESPACE_BEGIN
			
 
				-
			
 
				-UVec8::UVec8(UVec4Arg inLo, UVec4Arg inHi) :
			
 
				-	mValue(_mm256_insertf128_si256(_mm256_castsi128_si256(inLo.mValue), inHi.mValue, 1))
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-bool UVec8::operator == (UVec8Arg inV2) const
			
 
				-{
			
 
				-	return sEquals(*this, inV2).TestAllTrue();
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sReplicate(uint32 inV)
			
 
				-{
			
 
				-	return _mm256_set1_epi32(int(inV));
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sSplatX(UVec4Arg inV)
			
 
				-{
			
 
				-	return _mm256_set1_epi32(inV.GetX());
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sSplatY(UVec4Arg inV)
			
 
				-{
			
 
				-	return _mm256_set1_epi32(inV.GetY());
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sSplatZ(UVec4Arg inV)
			
 
				-{
			
 
				-	return _mm256_set1_epi32(inV.GetZ());
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sEquals(UVec8Arg inV1, UVec8Arg inV2)
			
 
				-{
			
 
				-#ifdef JPH_USE_AVX2
			
 
				-	return _mm256_cmpeq_epi32(inV1.mValue, inV2.mValue);
			
 
				-#else
			
 
				-	return UVec8(UVec4::sEquals(inV1.LowerVec4(), inV2.LowerVec4()), UVec4::sEquals(inV1.UpperVec4(), inV2.UpperVec4()));
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sSelect(UVec8Arg inV1, UVec8Arg inV2, UVec8Arg inControl)
			
 
				-{
			
 
				-	return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue), _mm256_castsi256_ps(inControl.mValue)));
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sOr(UVec8Arg inV1, UVec8Arg inV2)
			
 
				-{
			
 
				-	return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sXor(UVec8Arg inV1, UVec8Arg inV2)
			
 
				-{
			
 
				-	return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
			
 
				-}
			
 
				-
			
 
				-UVec8 UVec8::sAnd(UVec8Arg inV1, UVec8Arg inV2)
			
 
				-{
			
 
				-	return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
			
 
				-}
			
 
				-
			
 
				-template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
			
 
				-UVec8 UVec8::Swizzle() const
			
 
				-{
			
 
				-	static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
			
 
				-	static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
			
 
				-	static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
			
 
				-	static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
			
 
				-
			
 
				-	return _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(mValue), _mm256_castsi256_ps(mValue), _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX)));
			
 
				-}
			
 
				-
			
 
				-bool UVec8::TestAnyTrue() const
			
 
				-{
			
 
				-	return _mm256_movemask_ps(_mm256_castsi256_ps(mValue)) != 0;
			
 
				-}
			
 
				-
			
 
				-bool UVec8::TestAllTrue() const
			
 
				-{
			
 
				-	return _mm256_movemask_ps(_mm256_castsi256_ps(mValue)) == 0xff;
			
 
				-}
			
 
				-
			
 
				-UVec4 UVec8::LowerVec4() const
			
 
				-{
			
 
				-	return _mm256_castsi256_si128(mValue);
			
 
				-}
			
 
				-
			
 
				-UVec4 UVec8::UpperVec4() const
			
 
				-{
			
 
				-	return _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(mValue), 1));
			
 
				-}
			
 
				-
			
 
				-Vec8 UVec8::ToFloat() const
			
 
				-{
			
 
				-	return _mm256_cvtepi32_ps(mValue);
			
 
				-}
			
 
				-
			
 
				-template <const uint Count>
			
 
				-UVec8 UVec8::LogicalShiftLeft() const
			
 
				-{
			
 
				-	static_assert(Count <= 31, "Invalid shift");
			
 
				-
			
 
				-#ifdef JPH_USE_AVX2
			
 
				-	return _mm256_slli_epi32(mValue, Count);
			
 
				-#else
			
 
				-	return UVec8(LowerVec4().LogicalShiftLeft<Count>(), UpperVec4().LogicalShiftLeft<Count>());
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-template <const uint Count>
			
 
				-UVec8 UVec8::LogicalShiftRight() const
			
 
				-{
			
 
				-	static_assert(Count <= 31, "Invalid shift");
			
 
				-
			
 
				-#ifdef JPH_USE_AVX2
			
 
				-	return _mm256_srli_epi32(mValue, Count);
			
 
				-#else
			
 
				-	return UVec8(LowerVec4().LogicalShiftRight<Count>(), UpperVec4().LogicalShiftRight<Count>());
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-template <const uint Count>
			
 
				-UVec8 UVec8::ArithmeticShiftRight() const
			
 
				-{
			
 
				-	static_assert(Count <= 31, "Invalid shift");
			
 
				-
			
 
				-#ifdef JPH_USE_AVX2
			
 
				-	return _mm256_srai_epi32(mValue, Count);
			
 
				-#else
			
 
				-	return UVec8(LowerVec4().ArithmeticShiftRight<Count>(), UpperVec4().ArithmeticShiftRight<Count>());
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-JPH_NAMESPACE_END
			
--- a/Jolt/Math/Vec8.h
+++ b/Jolt/Math/Vec8.h
@@ -1,112 +0,0 @@
 
				-// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
			
 
				-// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
			
 
				-// SPDX-License-Identifier: MIT
			
 
				-
			
 
				-#pragma once
			
 
				-
			
 
				-#include <Jolt/Math/MathTypes.h>
			
 
				-
			
 
				-JPH_NAMESPACE_BEGIN
			
 
				-
			
 
				-class [[nodiscard]] Vec8
			
 
				-{
			
 
				-public:
			
 
				-	JPH_OVERRIDE_NEW_DELETE
			
 
				-
			
 
				-	/// Constructor
			
 
				-								Vec8() = default; ///< Intentionally not initialized for performance reasons
			
 
				-								Vec8(const Vec8 &inRHS) = default;
			
 
				-	JPH_INLINE					Vec8(__m256 inRHS) : mValue(inRHS)				{ }
			
 
				-
			
 
				-	/// Set 256 bit vector from 2 128 bit vectors
			
 
				-	JPH_INLINE					Vec8(Vec4Arg inLo, Vec4Arg inHi);
			
 
				-
			
 
				-	/// Vector with all zeros
			
 
				-	static JPH_INLINE Vec8		sZero();
			
 
				-
			
 
				-	/// Replicate across all components
			
 
				-	static JPH_INLINE Vec8		sReplicate(float inV);
			
 
				-
			
 
				-	/// Replicate the X component of inV to all components
			
 
				-	static JPH_INLINE Vec8		sSplatX(Vec4Arg inV);
			
 
				-
			
 
				-	/// Replicate the Y component of inV to all components
			
 
				-	static JPH_INLINE Vec8		sSplatY(Vec4Arg inV);
			
 
				-
			
 
				-	/// Replicate the Z component of inV to all components
			
 
				-	static JPH_INLINE Vec8		sSplatZ(Vec4Arg inV);
			
 
				-
			
 
				-	/// Calculates inMul1 * inMul2 + inAdd
			
 
				-	static JPH_INLINE Vec8		sFusedMultiplyAdd(Vec8Arg inMul1, Vec8Arg inMul2, Vec8Arg inAdd);
			
 
				-
			
 
				-	/// Component wise select, returns inV1 when highest bit of inControl = 0 and inV2 when highest bit of inControl = 1
			
 
				-	static JPH_INLINE Vec8		sSelect(Vec8Arg inV1, Vec8Arg inV2, UVec8Arg inControl);
			
 
				-
			
 
				-	/// Component wise min
			
 
				-	static JPH_INLINE Vec8		sMin(Vec8Arg inV1, Vec8Arg inV2);
			
 
				-
			
 
				-	/// Component wise max
			
 
				-	static JPH_INLINE Vec8		sMax(Vec8Arg inV1, Vec8Arg inV2);
			
 
				-
			
 
				-	/// Less than
			
 
				-	static JPH_INLINE UVec8		sLess(Vec8Arg inV1, Vec8Arg inV2);
			
 
				-
			
 
				-	/// Greater than
			
 
				-	static JPH_INLINE UVec8		sGreater(Vec8Arg inV1, Vec8Arg inV2);
			
 
				-
			
 
				-	/// Load from memory
			
 
				-	static JPH_INLINE Vec8		sLoadFloat8(const float *inV);
			
 
				-
			
 
				-	/// Load 8 floats from memory, 32 bytes aligned
			
 
				-	static JPH_INLINE Vec8		sLoadFloat8Aligned(const float *inV);
			
 
				-
			
 
				-	/// Get float component by index
			
 
				-	JPH_INLINE float			operator [] (uint inCoordinate) const			{ JPH_ASSERT(inCoordinate < 8); return mF32[inCoordinate]; }
			
 
				-	JPH_INLINE float &			operator [] (uint inCoordinate)					{ JPH_ASSERT(inCoordinate < 8); return mF32[inCoordinate]; }
			
 
				-
			
 
				-	/// Multiply two float vectors
			
 
				-	JPH_INLINE Vec8				operator * (Vec8Arg inV2) const;
			
 
				-
			
 
				-	/// Multiply vector by float
			
 
				-	JPH_INLINE Vec8				operator * (float inV2) const;
			
 
				-
			
 
				-	/// Add two float vectors
			
 
				-	JPH_INLINE Vec8				operator + (Vec8Arg inV2) const;
			
 
				-
			
 
				-	/// Subtract two float vectors
			
 
				-	JPH_INLINE Vec8				operator - (Vec8Arg inV2) const;
			
 
				-
			
 
				-	/// Divide
			
 
				-	JPH_INLINE Vec8				operator / (Vec8Arg inV2) const;
			
 
				-
			
 
				-	/// Reciprocal vector
			
 
				-	JPH_INLINE Vec8				Reciprocal() const;
			
 
				-
			
 
				-	/// 256 bit variant of Vec::Swizzle (no cross 128 bit lane swizzle)
			
 
				-	template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
			
 
				-	JPH_INLINE Vec8				Swizzle() const;
			
 
				-
			
 
				-	/// Get absolute value of all components
			
 
				-	JPH_INLINE Vec8				Abs() const;
			
 
				-
			
 
				-	/// Fetch the lower 128 bit from a 256 bit variable
			
 
				-	JPH_INLINE Vec4				LowerVec4() const;
			
 
				-
			
 
				-	/// Fetch the higher 128 bit from a 256 bit variable
			
 
				-	JPH_INLINE Vec4				UpperVec4() const;
			
 
				-
			
 
				-	/// Get the minimum value of the 8 floats
			
 
				-	JPH_INLINE float			ReduceMin() const;
			
 
				-
			
 
				-	union
			
 
				-	{
			
 
				-		__m256					mValue;
			
 
				-		float					mF32[8];
			
 
				-	};
			
 
				-};
			
 
				-
			
 
				-static_assert(is_trivial<Vec8>(), "Is supposed to be a trivial type!");
			
 
				-
			
 
				-JPH_NAMESPACE_END
			
 
				-
			
 
				-#include "Vec8.inl"
			
--- a/Jolt/Math/Vec8.inl
+++ b/Jolt/Math/Vec8.inl
@@ -1,148 +0,0 @@
 
				-// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
			
 
				-// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
			
 
				-// SPDX-License-Identifier: MIT
			
 
				-
			
 
				-#include <Jolt/Math/UVec8.h>
			
 
				-
			
 
				-JPH_NAMESPACE_BEGIN
			
 
				-
			
 
				-Vec8::Vec8(Vec4Arg inLo, Vec4Arg inHi) :
			
 
				-	mValue(_mm256_insertf128_ps(_mm256_castps128_ps256(inLo.mValue), inHi.mValue, 1))
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sZero()
			
 
				-{
			
 
				-	return _mm256_setzero_ps();
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sReplicate(float inV)
			
 
				-{
			
 
				-	return _mm256_set1_ps(inV);
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sSplatX(Vec4Arg inV)
			
 
				-{
			
 
				-	return _mm256_set1_ps(inV.GetX());
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sSplatY(Vec4Arg inV)
			
 
				-{
			
 
				-	return _mm256_set1_ps(inV.GetY());
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sSplatZ(Vec4Arg inV)
			
 
				-{
			
 
				-	return _mm256_set1_ps(inV.GetZ());
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sFusedMultiplyAdd(Vec8Arg inMul1, Vec8Arg inMul2, Vec8Arg inAdd)
			
 
				-{
			
 
				-#ifdef JPH_USE_FMADD
			
 
				-	return _mm256_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
			
 
				-#else
			
 
				-	return _mm256_add_ps(_mm256_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sSelect(Vec8Arg inV1, Vec8Arg inV2, UVec8Arg inControl)
			
 
				-{
			
 
				-	return _mm256_blendv_ps(inV1.mValue, inV2.mValue, _mm256_castsi256_ps(inControl.mValue));
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sMin(Vec8Arg inV1, Vec8Arg inV2)
			
 
				-{
			
 
				-	return _mm256_min_ps(inV1.mValue, inV2.mValue);
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sMax(Vec8Arg inV1, Vec8Arg inV2)
			
 
				-{
			
 
				-	return _mm256_max_ps(inV1.mValue, inV2.mValue);
			
 
				-}
			
 
				-
			
 
				-UVec8 Vec8::sLess(Vec8Arg inV1, Vec8Arg inV2)
			
 
				-{
			
 
				-	return _mm256_castps_si256(_mm256_cmp_ps(inV1.mValue, inV2.mValue, _CMP_LT_OQ));
			
 
				-}
			
 
				-
			
 
				-UVec8 Vec8::sGreater(Vec8Arg inV1, Vec8Arg inV2)
			
 
				-{
			
 
				-	return _mm256_castps_si256(_mm256_cmp_ps(inV1.mValue, inV2.mValue, _CMP_GT_OQ));
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sLoadFloat8(const float *inV)
			
 
				-{
			
 
				-	return _mm256_loadu_ps(inV);
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::sLoadFloat8Aligned(const float *inV)
			
 
				-{
			
 
				-	return _mm256_load_ps(inV);
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::operator * (Vec8Arg inV2) const
			
 
				-{
			
 
				-	return _mm256_mul_ps(mValue, inV2.mValue);
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::operator * (float inV2) const
			
 
				-{
			
 
				-	return _mm256_mul_ps(mValue, _mm256_set1_ps(inV2));
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::operator + (Vec8Arg inV2) const
			
 
				-{
			
 
				-	return _mm256_add_ps(mValue, inV2.mValue);
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::operator - (Vec8Arg inV2) const
			
 
				-{
			
 
				-	return _mm256_sub_ps(mValue, inV2.mValue);
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::operator / (Vec8Arg inV2) const
			
 
				-{
			
 
				-	return _mm256_div_ps(mValue, inV2.mValue);
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::Reciprocal() const
			
 
				-{
			
 
				-	return Vec8::sReplicate(1.0f) / mValue;
			
 
				-}
			
 
				-
			
 
				-template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
			
 
				-Vec8 Vec8::Swizzle() const
			
 
				-{
			
 
				-	static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
			
 
				-	static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
			
 
				-	static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
			
 
				-	static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
			
 
				-
			
 
				-	return _mm256_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
			
 
				-}
			
 
				-
			
 
				-Vec8 Vec8::Abs() const
			
 
				-{
			
 
				-#if defined(JPH_USE_AVX512)
			
 
				-	return _mm256_range_ps(mValue, mValue, 0b1000);
			
 
				-#else
			
 
				-	return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), mValue), mValue);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-Vec4 Vec8::LowerVec4() const
			
 
				-{
			
 
				-	return _mm256_castps256_ps128(mValue);
			
 
				-}
			
 
				-
			
 
				-Vec4 Vec8::UpperVec4() const
			
 
				-{
			
 
				-	return _mm256_extractf128_ps(mValue, 1);
			
 
				-}
			
 
				-
			
 
				-float Vec8::ReduceMin() const
			
 
				-{
			
 
				-	return Vec4::sMin(LowerVec4(), UpperVec4()).ReduceMin();
			
 
				-}
			
 
				-
			
 
				-JPH_NAMESPACE_END