Browse Source

Created optimized version of cast sphere vs triangle (#21)

* Moved check if there are any non-active edges higher up to avoid a matrix multiply
* Removed some code duplication in tree walks
* MeshShape: Deduplicated triangle walking code
* Incremented max jobs to fix failing unit test
* Added sphere vs triangle edge and vertex unit tests
jrouwe 3 years ago
parent
commit
f6fd4c8f74

+ 2 - 2
Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h

@@ -207,7 +207,7 @@ public:
 		}
 
 		/// Constructor
-		inline explicit				DecodingContext(const Header *inHeader) :
+		JPH_INLINE explicit			DecodingContext(const Header *inHeader) :
 			mRootBoundsMin(Vec3::sLoadFloat3Unsafe(inHeader->mRootBoundsMin)),
 			mRootBoundsMax(Vec3::sLoadFloat3Unsafe(inHeader->mRootBoundsMax))
 		{
@@ -217,7 +217,7 @@ public:
 
 		/// Walk the node tree calling the Visitor::VisitNodes for each node encountered and Visitor::VisitTriangles for each triangle encountered
 		template <class TriangleContext, class Visitor>
-		inline void					WalkTree(const uint8 *inBufferStart, const TriangleContext &inTriangleContext, Visitor &ioVisitor)
+		JPH_INLINE void				WalkTree(const uint8 *inBufferStart, const TriangleContext &inTriangleContext, Visitor &ioVisitor)
 		{
 			do
 			{

+ 7 - 0
Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h

@@ -417,6 +417,13 @@ public:
 			return first_block[inTriangleIndex >> 2].mFlags[inTriangleIndex & 0b11];
 		}
 
+		/// Unpacks triangles and flags, convencience function
+		JPH_INLINE void				Unpack(Vec3Arg inBoundsMin, Vec3Arg inBoundsMax, const void *inTriangleStart, uint32 inNumTriangles, Vec3 *outTriangles, uint8 *outTriangleFlags) const
+		{
+			Unpack(inBoundsMin, inBoundsMax, inTriangleStart, inNumTriangles, outTriangles);
+			sGetFlags(inTriangleStart, inNumTriangles, outTriangleFlags);
+		}
+
 	private:
 		Vec4						mOffsetX;
 		Vec4						mOffsetY;

+ 3 - 0
Jolt/Jolt.cmake

@@ -182,6 +182,8 @@ set(JOLT_PHYSICS_SRC_FILES
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/BroadPhase/QuadTree.h
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/CastConvexVsTriangles.cpp
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/CastConvexVsTriangles.h
+	${JOLT_PHYSICS_ROOT}/Physics/Collision/CastSphereVsTriangles.cpp
+	${JOLT_PHYSICS_ROOT}/Physics/Collision/CastSphereVsTriangles.h
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/CastResult.h
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/CollectFacesMode.h
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/CollideConvexVsTriangles.cpp
@@ -256,6 +258,7 @@ set(JOLT_PHYSICS_SRC_FILES
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/Shape/TriangleShape.h
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/ShapeCast.h
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/ShapeFilter.h
+	${JOLT_PHYSICS_ROOT}/Physics/Collision/SortReverseAndStore.h
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/TransformedShape.cpp
 	${JOLT_PHYSICS_ROOT}/Physics/Collision/TransformedShape.h
 	${JOLT_PHYSICS_ROOT}/Physics/Constraints/ConeConstraint.cpp

+ 2 - 3
Jolt/Physics/Collision/ActiveEdges.h

@@ -39,9 +39,8 @@ namespace ActiveEdges
 	/// @return Returns inNormal if an active edge was hit, otherwise returns inTriangleNormal
 	inline static Vec3					FixNormal(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, Vec3Arg inTriangleNormal, uint8 inActiveEdges, Vec3Arg inPoint, Vec3Arg inNormal, Vec3Arg inMovementDirection)
 	{
-		// Check: All of the edges are active, we have the correct normal already
-		if (inActiveEdges == 0b111)
-			return inNormal;
+		// Check: All of the edges are active, we have the correct normal already. No need to call this function!
+		JPH_ASSERT(inActiveEdges != 0b111);
 
 		// If inNormal would affect movement less than inTriangleNormal use inNormal
 		// This is done since it is really hard to make a distinction between sliding over a horizontal triangulated grid and hitting an edge (in this case you want to use the triangle normal)

+ 28 - 62
Jolt/Physics/Collision/BroadPhase/QuadTree.cpp

@@ -6,6 +6,7 @@
 #include <Physics/Collision/RayCast.h>
 #include <Physics/Collision/AABoxCast.h>
 #include <Physics/Collision/CastResult.h>
+#include <Physics/Collision/SortReverseAndStore.h>
 #include <Physics/Body/BodyPair.h>
 #include <Physics/PhysicsLock.h>
 #include <Geometry/AABox4.h>
@@ -986,34 +987,31 @@ JPH_INLINE void QuadTree::WalkTree(const ObjectLayerFilter &inObjectLayerFilter,
 		{
 			JPH_IF_TRACK_BROADPHASE_STATS(++nodes_visited;)
 
-			// Process normal node
-			const Node &node = mAllocator->Get(child_node_id.GetNodeIndex());
-			JPH_ASSERT(IsAligned(&node, JPH_CACHE_LINE_SIZE));
-
-			// Load bounds of 4 children
-			Vec4 bounds_minx = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMinX);
-			Vec4 bounds_miny = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMinY);
-			Vec4 bounds_minz = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMinZ);
-			Vec4 bounds_maxx = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMaxX);
-			Vec4 bounds_maxy = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMaxY);
-			Vec4 bounds_maxz = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMaxZ);
-
-			// Load ids for 4 children
-			UVec4 child_ids = UVec4::sLoadInt4Aligned((const uint32 *)&node.mChildNodeID[0]);
-
-			// Check which sub nodes to visit
-			int num_results = ioVisitor.VisitNodes(bounds_minx, bounds_miny, bounds_minz, bounds_maxx, bounds_maxy, bounds_maxz, child_ids, top);
-			if (num_results > 0)
+			// Check if stack can hold more nodes
+			if (top + 4 < cStackSize)
 			{
-				// Push them onto the stack
-				if (top + 4 < cStackSize)
-				{
-					child_ids.StoreInt4((uint32 *)&node_stack[top]);
-					top += num_results;
-				}
-				else
-					JPH_ASSERT(false, "Stack full!");
+				// Process normal node
+				const Node &node = mAllocator->Get(child_node_id.GetNodeIndex());
+				JPH_ASSERT(IsAligned(&node, JPH_CACHE_LINE_SIZE));
+
+				// Load bounds of 4 children
+				Vec4 bounds_minx = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMinX);
+				Vec4 bounds_miny = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMinY);
+				Vec4 bounds_minz = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMinZ);
+				Vec4 bounds_maxx = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMaxX);
+				Vec4 bounds_maxy = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMaxY);
+				Vec4 bounds_maxz = Vec4::sLoadFloat4Aligned((const Float4 *)&node.mBoundsMaxZ);
+
+				// Load ids for 4 children
+				UVec4 child_ids = UVec4::sLoadInt4Aligned((const uint32 *)&node.mChildNodeID[0]);
+
+				// Check which sub nodes to visit
+				int num_results = ioVisitor.VisitNodes(bounds_minx, bounds_miny, bounds_minz, bounds_maxx, bounds_maxy, bounds_maxz, child_ids, top);
+				child_ids.StoreInt4((uint32 *)&node_stack[top]);
+				top += num_results;
 			}
+			else
+				JPH_ASSERT(false, "Stack full!");
 		}
 
 		// Fetch next node until we find one that the visitor wants to see
@@ -1073,24 +1071,8 @@ void QuadTree::CastRay(const RayCast &inRay, RayCastBodyCollector &ioCollector,
 			// Test the ray against 4 bounding boxes
 			Vec4 fraction = RayAABox4(mOrigin, mInvDirection, inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
 
-			// Count how many results are hitting
-			UVec4 hitting = Vec4::sLess(fraction, Vec4::sReplicate(mCollector.GetEarlyOutFraction()));
-			int num_results = hitting.CountTrues();
-			if (num_results > 0)
-			{
-				// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-				Vec4::sSort4Reverse(fraction, ioChildNodeIDs);
-
-				// Shift the results so that only the hitting ones remain
-				ioChildNodeIDs = ioChildNodeIDs.ShiftComponents4Minus(num_results);
-				fraction = fraction.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-
-				// Push them onto the stack
-				if (inStackTop + 4 < cStackSize)
-					fraction.StoreFloat4((Float4 *)&mFractionStack[inStackTop]);
-			}
-
-			return num_results;
+			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
+			return SortReverseAndStore(fraction, mCollector.GetEarlyOutFraction(), ioChildNodeIDs, &mFractionStack[inStackTop]);
 		}
 
 		/// Visit a body, returns false if the algorithm should terminate because no hits can be generated anymore
@@ -1388,24 +1370,8 @@ void QuadTree::CastAABox(const AABoxCast &inBox, CastShapeBodyCollector &ioColle
 			// Test 4 children
 			Vec4 fraction = RayAABox4(mOrigin, mInvDirection, inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
 
-			// Count how many results are hitting
-			UVec4 hitting = Vec4::sLess(fraction, Vec4::sReplicate(mCollector.GetEarlyOutFraction()));
-			int num_results = hitting.CountTrues();
-			if (num_results > 0)
-			{
-				// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-				Vec4::sSort4Reverse(fraction, ioChildNodeIDs);
-
-				// Shift the results so that only the hitting ones remain
-				ioChildNodeIDs = ioChildNodeIDs.ShiftComponents4Minus(num_results);
-				fraction = fraction.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-
-				// Push them onto the stack
-				if (inStackTop + 4 < cStackSize)
-					fraction.StoreFloat4((Float4 *)&mFractionStack[inStackTop]);
-			}
-
-			return num_results;
+			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
+			return SortReverseAndStore(fraction, mCollector.GetEarlyOutFraction(), ioChildNodeIDs, &mFractionStack[inStackTop]);
 		}
 
 		/// Visit a body, returns false if the algorithm should terminate because no hits can be generated anymore

+ 1 - 1
Jolt/Physics/Collision/CastConvexVsTriangles.cpp

@@ -67,7 +67,7 @@ void CastConvexVsTriangles::Cast(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, uint8
 	if (epa.CastShape(mShapeCast.mCenterOfMassStart, mShapeCast.mDirection, mShapeCastSettings.mCollisionTolerance, mShapeCastSettings.mPenetrationTolerance, *mSupport, triangle, mSupport->GetConvexRadius(), 0.0f, mShapeCastSettings.mReturnDeepestPoint, fraction, contact_point_a, contact_point_b, contact_normal))
 	{
 		// Check if we have enabled active edge detection
-		if (mShapeCastSettings.mActiveEdgeMode == EActiveEdgeMode::CollideOnlyWithActive)
+		if (mShapeCastSettings.mActiveEdgeMode == EActiveEdgeMode::CollideOnlyWithActive && inActiveEdges != 0b111)
 		{
 			// Convert the active edge velocity hint to local space
 			Vec3 active_edge_movement_direction = mCenterOfMassTransform2.Multiply3x3Transposed(mShapeCastSettings.mActiveEdgeMovementDirection);

+ 216 - 0
Jolt/Physics/Collision/CastSphereVsTriangles.cpp

@@ -0,0 +1,216 @@
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#include <Jolt.h>
+
+#include <Physics/Collision/CastSphereVsTriangles.h>
+#include <Physics/Collision/TransformedShape.h>
+#include <Physics/Collision/Shape/ScaleHelpers.h>
+#include <Physics/Collision/Shape/SphereShape.h>
+#include <Physics/Collision/ActiveEdges.h>
+#include <Physics/Collision/NarrowPhaseStats.h>
+#include <Geometry/ClosestPoint.h>
+#include <Geometry/RaySphere.h>
+#include <Core/Profiler.h>
+
+namespace JPH {
+
+CastSphereVsTriangles::CastSphereVsTriangles(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Vec3 &inScale, const ShapeFilter &inShapeFilter, const Mat44 &inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, CastShapeCollector &ioCollector) :
+	mStart(inShapeCast.mCenterOfMassStart.GetTranslation()),
+	mDirection(inShapeCast.mDirection),
+	mShapeCastSettings(inShapeCastSettings),
+	mShapeFilter(inShapeFilter), 
+	mCenterOfMassTransform2(inCenterOfMassTransform2),
+	mScale(inScale),
+	mSubShapeIDCreator1(inSubShapeIDCreator1),
+	mCollector(ioCollector) 
+{ 
+	// Cast to sphere shape
+	JPH_ASSERT(inShapeCast.mShape->GetSubType() == EShapeSubType::Sphere);
+	const SphereShape *sphere = static_cast<const SphereShape *>(inShapeCast.mShape);
+
+	// Scale the radius
+	mRadius = sphere->GetRadius() * abs(inShapeCast.mScale.GetX());
+
+	// Determine if shape is inside out or not
+	mScaleSign = ScaleHelpers::IsInsideOut(inScale)? -1.0f : 1.0f;
+}
+
+void CastSphereVsTriangles::AddHit(bool inBackFacing, const SubShapeID &inSubShapeID2, float inFraction, Vec3Arg inContactPointA, Vec3Arg inContactPointB, Vec3Arg inContactNormal)
+{
+	// Convert to world space
+	Vec3 contact_point_a = mCenterOfMassTransform2 * (mStart + inContactPointA);
+	Vec3 contact_point_b = mCenterOfMassTransform2 * (mStart + inContactPointB);
+	Vec3 contact_normal_world = mCenterOfMassTransform2.Multiply3x3(inContactNormal);
+	
+	// Its a hit, store the sub shape id's
+	ShapeCastResult result(inFraction, contact_point_a, contact_point_b, contact_normal_world, inBackFacing, mSubShapeIDCreator1.GetID(), inSubShapeID2, TransformedShape::sGetBodyID(mCollector.GetContext()));
+
+	JPH_IF_TRACK_NARROWPHASE_STATS(TrackNarrowPhaseCollector track;)
+	mCollector.AddHit(result);
+}
+
+void CastSphereVsTriangles::AddHitWithActiveEdgeDetection(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, bool inBackFacing, Vec3Arg inTriangleNormal, uint8 inActiveEdges, const SubShapeID &inSubShapeID2, float inFraction, Vec3Arg inContactPointA, Vec3Arg inContactPointB, Vec3Arg inContactNormal)
+{
+	// Check if we have enabled active edge detection
+	Vec3 contact_normal = inContactNormal;
+	if (mShapeCastSettings.mActiveEdgeMode == EActiveEdgeMode::CollideOnlyWithActive && inActiveEdges != 0b111)
+	{
+		// Convert the active edge velocity hint to local space
+		Vec3 active_edge_movement_direction = mCenterOfMassTransform2.Multiply3x3Transposed(mShapeCastSettings.mActiveEdgeMovementDirection);
+
+		// Update the contact normal to account for active edges
+		// Note that we flip the triangle normal as the penetration axis is pointing towards the triangle instead of away
+		contact_normal = ActiveEdges::FixNormal(inV0, inV1, inV2, inBackFacing? inTriangleNormal : -inTriangleNormal, inActiveEdges, inContactPointB, inContactNormal, active_edge_movement_direction);
+	}
+
+	AddHit(inBackFacing, inSubShapeID2, inFraction, inContactPointA, inContactPointB, contact_normal);
+}
+
+// This is a simplified version of the ray cylinder test from: Real Time Collision Detection - Christer Ericson
+// Chapter 5.3.7, page 194-197. Some conditions have been removed as we're not interested in hitting the caps of the cylinder.
+// Note that the ray origin is assumed to be the origin here.
+float CastSphereVsTriangles::RayCylinder(Vec3Arg inRayDirection, Vec3Arg inCylinderA, Vec3Arg inCylinderB, float inRadius) const
+{
+	// Calculate cylinder axis
+	Vec3 axis = inCylinderB - inCylinderA;
+
+	// Make ray start relative to cylinder side A (moving cylinder A to the origin)
+	Vec3 start = -inCylinderA;
+
+	// Test if segment is fully on the A side of the cylinder
+	float start_dot_axis = start.Dot(axis);
+	float direction_dot_axis = inRayDirection.Dot(axis);
+	float end_dot_axis = start_dot_axis + direction_dot_axis;
+	if (start_dot_axis < 0.0f && end_dot_axis < 0.0f) 
+		return FLT_MAX;
+
+	// Test if segment is fully on the B side of the cylinder
+	float axis_len_sq = axis.LengthSq();
+	if (start_dot_axis > axis_len_sq && end_dot_axis > axis_len_sq) 
+		return FLT_MAX;
+
+	// Calculate a, b and c, the factors for quadratic equation
+	// We're basically solving the ray: x = start + direction * t
+	// The closest point to x on the segment A B is: w = (x . axis) * axis / (axis . axis)
+	// The distance between x and w should be radius: (x - w) . (x - w) = radius^2
+	// Solving this gives the following:
+	float a = axis_len_sq * inRayDirection.LengthSq() - Square(direction_dot_axis);
+	if (abs(a) < 1.0e-6f)
+		return FLT_MAX; // Segment runs parallel to cylinder axis, stop processing, we will either hit at fraction = 0 or we'll hit a vertex
+	float b = axis_len_sq * start.Dot(inRayDirection) - direction_dot_axis * start_dot_axis; // should be multiplied by 2, instead we'll divide a and c by 2 when we solve the quadratic equation
+	float c = axis_len_sq * (start.LengthSq() - Square(inRadius)) - Square(start_dot_axis);
+	float det = Square(b) - a * c; // normally 4 * a * c but since both a and c need to be divided by 2 we lose the 4
+	if (det < 0.0f) 
+		return FLT_MAX; // No solution to quadractic equation
+	
+	// Solve fraction t where the ray hits the cylinder
+	float t = -(b + sqrt(det)) / a; // normally divided by 2 * a but since a should be divided by 2 we lose the 2
+	if (t < 0.0f || t > 1.0f) 
+		return FLT_MAX; // Intersection lies outside segment
+	if (start_dot_axis + t * direction_dot_axis < 0.0f || start_dot_axis + t * direction_dot_axis > axis_len_sq) 
+		return FLT_MAX; // Intersection outside the end point of the cyclinder, stop processing, we will possibly hit a vertex
+	return t;
+}
+
+void CastSphereVsTriangles::Cast(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, uint8 inActiveEdges, const SubShapeID &inSubShapeID2)
+{
+	JPH_PROFILE_FUNCTION();
+
+	// Scale triangle and make it relative to the start of the cast
+	Vec3 v0 = mScale * inV0 - mStart;
+	Vec3 v1 = mScale * inV1 - mStart;
+	Vec3 v2 = mScale * inV2 - mStart;
+
+	// Calculate triangle normal
+	Vec3 triangle_normal = mScaleSign * (v1 - v0).Cross(v2 - v0).Normalized();
+
+	// Backface check
+	float normal_dot_direction = triangle_normal.Dot(mDirection);
+	bool back_facing = normal_dot_direction > 0.0f;
+	if (mShapeCastSettings.mBackFaceModeTriangles == EBackFaceMode::IgnoreBackFaces && back_facing)
+		return;
+
+	// Test the shape filter if this shape should collide
+	if (!mShapeFilter.ShouldCollide(mSubShapeIDCreator1.GetID(), inSubShapeID2))
+		return;
+
+	// Test if distance between the sphere and plane of triangle is smaller or equal than the radius
+	if (abs(v0.Dot(triangle_normal)) <= mRadius)
+	{
+		// Check if the sphere intersects at the start of the cast
+		uint32 closest_feature;
+		Vec3 q = ClosestPoint::GetClosestPointOnTriangle(v0, v1, v2, closest_feature);
+		float q_len_sq = q.LengthSq();
+		if (q_len_sq <= Square(mRadius))
+		{
+			// Yes it does, generate contacts now
+			float q_len = sqrt(q_len_sq);
+			Vec3 contact_normal = q_len > 0.0f? q / q_len : Vec3::sAxisY();
+			Vec3 contact_point_a = q + contact_normal * (mRadius - q_len);
+			Vec3 contact_point_b = q;
+			AddHitWithActiveEdgeDetection(v0, v1, v2, back_facing, triangle_normal, inActiveEdges, inSubShapeID2, 0.0f, contact_point_a, contact_point_b, contact_normal);
+			return;
+		}
+	}
+	else
+	{
+		// Check if cast is not parallel to the plane of the triangle
+		float abs_normal_dot_direction = abs(normal_dot_direction);
+		if (abs_normal_dot_direction > 1.0e-6f)
+		{
+			// Calculate the point on the sphere that will hit the triangle's plane first and calculate a fraction where it will do so
+			Vec3 d = Sign(normal_dot_direction) * mRadius * triangle_normal;
+			float plane_intersection = (v0 - d).Dot(triangle_normal) / normal_dot_direction;
+
+			// Check if sphere will hit in the interval that we're interested in
+			if (plane_intersection * abs_normal_dot_direction < -mRadius	// Sphere hits the plane before the sweep, cannot intersect
+				|| plane_intersection > 1.0f)								// Sphere hits the plane after the sweep, cannot intersect
+				return;
+
+			// We can only report an interior hit if we're hitting the plane during our sweep and not before
+			if (plane_intersection >= 0.0f)
+			{
+				// Calculate the point of contact on the plane
+				Vec3 p = d + plane_intersection * mDirection;
+
+				// Check if this is an interior point
+				float u, v, w;
+				ClosestPoint::GetBaryCentricCoordinates(v0 - p, v1 - p, v2 - p, u, v, w);
+				if (u >= 0.0f && v >= 0.0f && w >= 0.0f)
+				{
+					// Interior point, we found the collision point. We don't need to check active edges.
+					AddHit(back_facing, inSubShapeID2, plane_intersection, p, p, back_facing? triangle_normal : -triangle_normal);
+					return;
+				}
+			}
+		}
+	}
+
+	// Test 3 edges
+	float fraction = RayCylinder(mDirection, v0, v1, mRadius);
+	fraction = min(fraction, RayCylinder(mDirection, v1, v2, mRadius));
+	fraction = min(fraction, RayCylinder(mDirection, v2, v0, mRadius));
+
+	// Test 3 vertices
+	fraction = min(fraction, RaySphere(Vec3::sZero(), mDirection, v0, mRadius));
+	fraction = min(fraction, RaySphere(Vec3::sZero(), mDirection, v1, mRadius));
+	fraction = min(fraction, RaySphere(Vec3::sZero(), mDirection, v2, mRadius));
+
+	// Check if we have a collision
+	JPH_ASSERT(fraction >= 0.0f);
+	if (fraction <= 1.0f)
+	{
+		// Calculate the center of the sphere at the point of contact
+		Vec3 p = fraction * mDirection;
+
+		// Get contact point and normal
+		uint32 closest_feature;
+		Vec3 q = ClosestPoint::GetClosestPointOnTriangle(v0 - p, v1 - p, v2 - p, closest_feature);
+		Vec3 contact_normal = q.Normalized();
+		Vec3 contact_point_ab = p + q;
+		AddHitWithActiveEdgeDetection(v0, v1, v2, back_facing, triangle_normal, inActiveEdges, inSubShapeID2, fraction, contact_point_ab, contact_point_ab, contact_normal);
+	}
+}
+
+} // JPH

+ 50 - 0
Jolt/Physics/Collision/CastSphereVsTriangles.h

@@ -0,0 +1,50 @@
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <Physics/Collision/ShapeCast.h>
+
+namespace JPH {
+
+/// Collision detection helper that casts a sphere vs one or more triangles
+class CastSphereVsTriangles
+{
+public:
+	/// Constructor
+	/// @param inShapeCast The sphere to cast against the triangles and its start and direction
+	/// @param inShapeCastSettings Settings for performing the cast
+	/// @param inScale Local space scale for the shape to cast against.
+	/// @param inShapeFilter Determines if sub shapes of the shape can collide
+	/// @param inCenterOfMassTransform2 Is the center of mass transform of shape 2 (excluding scale), this is used to provide a transform to the shape cast result so that local quantities can be transformed into world space.
+	/// @param inSubShapeIDCreator1 Class that tracks the current sub shape ID for the casting shape
+	/// @param ioCollector The collector that receives the results.
+									CastSphereVsTriangles(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Vec3 &inScale, const ShapeFilter &inShapeFilter, const Mat44 &inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, CastShapeCollector &ioCollector);
+
+	/// Cast sphere with a single triangle
+	/// @param inV0 , inV1 , inV2: CCW triangle vertices
+	/// @param inActiveEdges bit 0 = edge v0..v1 is active, bit 1 = edge v1..v2 is active, bit 2 = edge v2..v0 is active
+	/// An active edge is an edge that is not connected to another triangle in such a way that it is impossible to collide with the edge
+	/// @param inSubShapeID2 The sub shape ID for the triangle
+	void							Cast(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, uint8 inActiveEdges, const SubShapeID &inSubShapeID2);
+
+protected:
+	Vec3							mStart;								///< Starting location of the sphere
+	Vec3							mDirection;							///< Direction and length of movement of sphere
+	float							mRadius;							///< Scaled radius of sphere
+	const ShapeCastSettings &		mShapeCastSettings;
+	const ShapeFilter &				mShapeFilter;
+	const Mat44 &					mCenterOfMassTransform2;
+	Vec3							mScale;
+	SubShapeIDCreator				mSubShapeIDCreator1;
+	CastShapeCollector &			mCollector;
+
+private:
+	void							AddHit(bool inBackFacing, const SubShapeID &inSubShapeID2, float inFraction, Vec3Arg inContactPointA, Vec3Arg inContactPointB, Vec3Arg inContactNormal);
+	void							AddHitWithActiveEdgeDetection(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, bool inBackFacing, Vec3Arg inTriangleNormal, uint8 inActiveEdges, const SubShapeID &inSubShapeID2, float inFraction, Vec3Arg inContactPointA, Vec3Arg inContactPointB, Vec3Arg inContactNormal);
+	float							RayCylinder(Vec3Arg inRayDirection, Vec3Arg inCylinderA, Vec3Arg inCylinderB, float inRadius) const;
+
+	float							mScaleSign;							///< Sign of the scale, -1 if object is inside out, 1 if not
+};
+
+} // JPH

+ 1 - 1
Jolt/Physics/Collision/CollideConvexVsTriangles.cpp

@@ -120,7 +120,7 @@ void CollideConvexVsTriangles::Collide(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2,
 		point1 -= penetration_axis * (mCollideShapeSettings.mMaxSeparationDistance / penetration_axis_len);
 
 	// Check if we have enabled active edge detection
-	if (mCollideShapeSettings.mActiveEdgeMode == EActiveEdgeMode::CollideOnlyWithActive)
+	if (mCollideShapeSettings.mActiveEdgeMode == EActiveEdgeMode::CollideOnlyWithActive && inActiveEdges != 0b111)
 	{
 		// Convert the active edge velocity hint to local space
 		Vec3 active_edge_movement_direction = mTransform1.Multiply3x3Transposed(mCollideShapeSettings.mActiveEdgeMovementDirection);

+ 71 - 37
Jolt/Physics/Collision/Shape/HeightFieldShape.cpp

@@ -6,16 +6,19 @@
 #include <Physics/Collision/Shape/HeightFieldShape.h>
 #include <Physics/Collision/Shape/ConvexShape.h>
 #include <Physics/Collision/Shape/ScaleHelpers.h>
+#include <Physics/Collision/Shape/SphereShape.h>
 #include <Physics/Collision/RayCast.h>
 #include <Physics/Collision/ShapeCast.h>
 #include <Physics/Collision/CastResult.h>
 #include <Physics/Collision/CollidePointResult.h>
 #include <Physics/Collision/ShapeFilter.h>
 #include <Physics/Collision/CastConvexVsTriangles.h>
+#include <Physics/Collision/CastSphereVsTriangles.h>
 #include <Physics/Collision/CollideConvexVsTriangles.h>
 #include <Physics/Collision/TransformedShape.h>
 #include <Physics/Collision/ActiveEdges.h>
 #include <Physics/Collision/CollisionDispatch.h>
+#include <Physics/Collision/SortReverseAndStore.h>
 #include <Core/Profiler.h>
 #include <Core/StringTools.h>
 #include <Core/StreamIn.h>
@@ -1298,20 +1301,9 @@ bool HeightFieldShape::CastRay(const RayCast &inRay, const SubShapeIDCreator &in
 		{
 			// Test bounds of 4 children
 			Vec4 distance = RayAABox4(mRayOrigin, mRayInvDirection, inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
-	
-			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
-
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mHit.mFraction));
-			int num_results = closer.CountTrues();
 
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
-
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
+			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
+			return SortReverseAndStore(distance, mHit.mFraction, ioProperties, &mDistanceStack[inStackTop]);
 		}
 
 		JPH_INLINE void			VisitTriangle(uint inX, uint inY, uint inTriangle, Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2) 
@@ -1383,18 +1375,7 @@ void HeightFieldShape::CastRay(const RayCast &inRay, const RayCastSettings &inRa
 			Vec4 distance = RayAABox4(mRayOrigin, mRayInvDirection, inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
 	
 			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
-
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mCollector.GetEarlyOutFraction()));
-			int num_results = closer.CountTrues();
-
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
-
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
+			return SortReverseAndStore(distance, mCollector.GetEarlyOutFraction(), ioProperties, &mDistanceStack[inStackTop]);
 		}
 
 		JPH_INLINE void			VisitTriangle(uint inX, uint inY, uint inTriangle, Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2) const
@@ -1473,18 +1454,7 @@ void HeightFieldShape::sCastConvexVsHeightField(const ShapeCast &inShapeCast, co
 			Vec4 distance = RayAABox4(mBoxCenter, mInvDirection, bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z);
 	
 			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
-
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mCollector.GetEarlyOutFraction()));
-			int num_results = closer.CountTrues();
-
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
-
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
+			return SortReverseAndStore(distance, mCollector.GetEarlyOutFraction(), ioProperties, &mDistanceStack[inStackTop]);
 		}
 
 		JPH_INLINE void				VisitTriangle(uint inX, uint inY, uint inTriangle, Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2)
@@ -1518,6 +1488,67 @@ void HeightFieldShape::sCastConvexVsHeightField(const ShapeCast &inShapeCast, co
 	shape->WalkHeightField(visitor);
 }
 
+void HeightFieldShape::sCastSphereVsHeightField(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector)
+{
+	JPH_PROFILE_FUNCTION();
+
+	struct Visitor : public CastSphereVsTriangles
+	{
+		using CastSphereVsTriangles::CastSphereVsTriangles;
+
+		JPH_INLINE bool				ShouldAbort() const
+		{
+			return mCollector.ShouldEarlyOut();
+		}
+
+		JPH_INLINE bool				ShouldVisitRangeBlock(int inStackTop) const
+		{
+			return mDistanceStack[inStackTop] < mCollector.GetEarlyOutFraction();
+		}
+
+		JPH_INLINE int				VisitRangeBlock(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+		{
+			// Scale the bounding boxes of this node 
+			Vec4 bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z;
+			AABox4Scale(mScale, inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ, bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z);
+
+			// Enlarge them by the radius of the sphere
+			AABox4EnlargeWithExtent(Vec3::sReplicate(mRadius), bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z);
+
+			// Test bounds of 4 children
+			Vec4 distance = RayAABox4(mStart, mInvDirection, bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z);
+	
+			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
+			return SortReverseAndStore(distance, mCollector.GetEarlyOutFraction(), ioProperties, &mDistanceStack[inStackTop]);
+		}
+
+		JPH_INLINE void				VisitTriangle(uint inX, uint inY, uint inTriangle, Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2)
+		{			
+			// Create sub shape id for this part
+			SubShapeID triangle_sub_shape_id = mShape2->EncodeSubShapeID(mSubShapeIDCreator2, inX, inY, inTriangle);
+
+			// Determine active edges
+			uint8 active_edges = mShape2->GetEdgeFlags(inX, inY, inTriangle);
+
+			Cast(inV0, inV1, inV2, active_edges, triangle_sub_shape_id);
+		}
+
+		const HeightFieldShape *	mShape2;
+		RayInvDirection				mInvDirection;
+		SubShapeIDCreator			mSubShapeIDCreator2;
+		float						mDistanceStack[cStackSize];
+	};
+
+	JPH_ASSERT(inShape->GetSubType() == EShapeSubType::HeightField);
+	const HeightFieldShape *shape = static_cast<const HeightFieldShape *>(inShape);
+
+	Visitor visitor(inShapeCast, inShapeCastSettings, inScale, inShapeFilter, inCenterOfMassTransform2, inSubShapeIDCreator1, ioCollector);
+	visitor.mShape2 = shape;
+	visitor.mInvDirection.Set(inShapeCast.mDirection);
+	visitor.mSubShapeIDCreator2 = inSubShapeIDCreator2;
+	shape->WalkHeightField(visitor);
+}
+
 struct HeightFieldShape::HSGetTrianglesContext
 {
 			HSGetTrianglesContext(const HeightFieldShape *inShape, const AABox &inBox, Vec3Arg inPositionCOM, QuatArg inRotation, Vec3Arg inScale) : 
@@ -1764,6 +1795,9 @@ void HeightFieldShape::sRegister()
 		CollisionDispatch::sRegisterCollideShape(s, EShapeSubType::HeightField, sCollideConvexVsHeightField);
 		CollisionDispatch::sRegisterCastShape(s, EShapeSubType::HeightField, sCastConvexVsHeightField);
 	}
+
+	// Specialized collision functions
+	CollisionDispatch::sRegisterCastShape(EShapeSubType::Sphere, EShapeSubType::HeightField, sCastSphereVsHeightField);
 }
 
 } // JPH

+ 1 - 0
Jolt/Physics/Collision/Shape/HeightFieldShape.h

@@ -218,6 +218,7 @@ private:
 	// Helper functions called by CollisionDispatch
 	static void						sCollideConvexVsHeightField(const Shape *inShape1, const Shape *inShape2, Vec3Arg inScale1, Vec3Arg inScale2, Mat44Arg inCenterOfMassTransform1, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, const CollideShapeSettings &inCollideShapeSettings, CollideShapeCollector &ioCollector);
 	static void						sCastConvexVsHeightField(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector);
+	static void						sCastSphereVsHeightField(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector);
 
 	/// Visit the entire height field using a visitor pattern
 	template <class Visitor>

+ 187 - 196
Jolt/Physics/Collision/Shape/MeshShape.cpp

@@ -14,9 +14,11 @@
 #include <Physics/Collision/CollidePointResult.h>
 #include <Physics/Collision/CollideConvexVsTriangles.h>
 #include <Physics/Collision/CastConvexVsTriangles.h>
+#include <Physics/Collision/CastSphereVsTriangles.h>
 #include <Physics/Collision/TransformedShape.h>
 #include <Physics/Collision/ActiveEdges.h>
 #include <Physics/Collision/CollisionDispatch.h>
+#include <Physics/Collision/SortReverseAndStore.h>
 #include <Core/StringTools.h>
 #include <Core/StreamIn.h>
 #include <Core/StreamOut.h>
@@ -54,13 +56,13 @@ using TriangleCodec = TriangleCodecIndexed8BitPackSOA4Flags;
 using NodeCodec = NodeCodecQuadTreeHalfFloat<1>;
 
 // Get header for tree
-static inline const NodeCodec::Header *sGetNodeHeader(const ByteBuffer &inTree)
+static JPH_INLINE const NodeCodec::Header *sGetNodeHeader(const ByteBuffer &inTree)
 {
 	return inTree.Get<NodeCodec::Header>(0);
 }
 
 // Get header for triangles
-static inline const TriangleCodec::TriangleHeader *sGetTriangleHeader(const ByteBuffer &inTree) 
+static JPH_INLINE const TriangleCodec::TriangleHeader *sGetTriangleHeader(const ByteBuffer &inTree) 
 {
 	return inTree.Get<TriangleCodec::TriangleHeader>(NodeCodec::HeaderSize);
 }
@@ -361,7 +363,7 @@ uint MeshShape::GetSubShapeIDBitsRecursive() const
 }
 
 template <class Visitor>
-void MeshShape::WalkTree(Visitor &ioVisitor) const
+JPH_INLINE void MeshShape::WalkTree(Visitor &ioVisitor) const
 {
 	const NodeCodec::Header *header = sGetNodeHeader(mTree);
 	NodeCodec::DecodingContext node_ctx(header);
@@ -371,6 +373,70 @@ void MeshShape::WalkTree(Visitor &ioVisitor) const
 	node_ctx.WalkTree(buffer_start, triangle_ctx, ioVisitor);
 }
 
+template <class Visitor>
+JPH_INLINE void MeshShape::WalkTreePerTriangle(const SubShapeIDCreator &inSubShapeIDCreator2, Visitor &ioVisitor) const
+{
+	struct ChainedVisitor
+	{
+		JPH_INLINE			ChainedVisitor(Visitor &ioVisitor, const SubShapeIDCreator &inSubShapeIDCreator2, uint inTriangleBlockIDBits) :
+			mVisitor(ioVisitor),
+			mSubShapeIDCreator2(inSubShapeIDCreator2),
+			mTriangleBlockIDBits(inTriangleBlockIDBits)
+		{
+		}
+
+		JPH_INLINE bool		ShouldAbort() const
+		{
+			return mVisitor.ShouldAbort();
+		}
+
+		JPH_INLINE bool		ShouldVisitNode(int inStackTop) const
+		{
+			return mVisitor.ShouldVisitNode(inStackTop);
+		}
+
+		JPH_INLINE int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+		{
+			return mVisitor.VisitNodes(inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ, ioProperties, inStackTop);
+		}
+
+		JPH_INLINE void		VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+		{
+			// Create ID for triangle block
+			SubShapeIDCreator block_sub_shape_id = mSubShapeIDCreator2.PushID(inTriangleBlockID, mTriangleBlockIDBits);
+
+			// Decode vertices and flags
+			JPH_ASSERT(inNumTriangles <= MaxTrianglesPerLeaf);
+			Vec3 vertices[MaxTrianglesPerLeaf * 3];
+			uint8 flags[MaxTrianglesPerLeaf];
+			ioContext.Unpack(inRootBoundsMin, inRootBoundsMax, inTriangles, inNumTriangles, vertices, flags);
+
+			int triangle_idx = 0;
+			for (const Vec3 *v = vertices, *v_end = vertices + inNumTriangles * 3; v < v_end; v += 3, triangle_idx++)
+			{
+				// Determine active edges
+				uint8 active_edges = (flags[triangle_idx] >> FLAGS_ACTIVE_EGDE_SHIFT) & FLAGS_ACTIVE_EDGE_MASK;
+
+				// Create ID for triangle
+				SubShapeIDCreator triangle_sub_shape_id = block_sub_shape_id.PushID(triangle_idx, NumTriangleBits);
+
+				mVisitor.VisitTriangle(v[0], v[1], v[2], active_edges, triangle_sub_shape_id.GetID());
+
+				// Check if we should early out now
+				if (mVisitor.ShouldAbort())
+					break;
+			}
+		}
+
+		Visitor &			mVisitor;
+		SubShapeIDCreator	mSubShapeIDCreator2;
+		uint				mTriangleBlockIDBits;
+	};
+
+	ChainedVisitor visitor(ioVisitor, inSubShapeIDCreator2, NodeCodec::DecodingContext::sTriangleBlockIDBits(mTree));
+	WalkTree(visitor);
+}
+
 #ifdef JPH_DEBUG_RENDERER
 void MeshShape::Draw(DebugRenderer *inRenderer, Mat44Arg inCenterOfMassTransform, Vec3Arg inScale, ColorArg inColor, bool inUseMaterialColors, bool inDrawWireframe) const
 {
@@ -386,24 +452,24 @@ void MeshShape::Draw(DebugRenderer *inRenderer, Mat44Arg inCenterOfMassTransform
 	{
 		struct Visitor
 		{
-			bool	ShouldAbort() const
+			JPH_INLINE bool		ShouldAbort() const
 			{
 				return false;
 			}
 
-			bool	ShouldVisitNode(int inStackTop) const
+			JPH_INLINE bool		ShouldVisitNode(int inStackTop) const
 			{
 				return true;
 			}
 
-			int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+			JPH_INLINE int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
 			{
 				UVec4 valid = UVec4::sOr(UVec4::sOr(Vec4::sLess(inBoundsMinX, inBoundsMaxX), Vec4::sLess(inBoundsMinY, inBoundsMaxY)), Vec4::sLess(inBoundsMinZ, inBoundsMaxZ));
 				UVec4::sSort4True(valid, ioProperties);
 				return valid.CountTrues();
 			}
 
-			void	VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+			JPH_INLINE void		VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, [[maybe_unused]] uint32 inTriangleBlockID) 
 			{
 				JPH_ASSERT(inNumTriangles <= MaxTrianglesPerLeaf);
 				Vec3 vertices[MaxTrianglesPerLeaf * 3];
@@ -454,39 +520,36 @@ void MeshShape::Draw(DebugRenderer *inRenderer, Mat44Arg inCenterOfMassTransform
 	{
 		struct Visitor
 		{
-					Visitor(DebugRenderer *inRenderer, Mat44Arg inTransform) :
+			JPH_INLINE 			Visitor(DebugRenderer *inRenderer, Mat44Arg inTransform) :
 				mRenderer(inRenderer),
 				mTransform(inTransform)
 			{
 			}
 
-			bool	ShouldAbort() const
+			JPH_INLINE bool		ShouldAbort() const
 			{
 				return false;
 			}
 
-			bool	ShouldVisitNode(int inStackTop) const
+			JPH_INLINE bool		ShouldVisitNode(int inStackTop) const
 			{
 				return true;
 			}
 
-			int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+			JPH_INLINE int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
 			{
 				UVec4 valid = UVec4::sOr(UVec4::sOr(Vec4::sLess(inBoundsMinX, inBoundsMaxX), Vec4::sLess(inBoundsMinY, inBoundsMaxY)), Vec4::sLess(inBoundsMinZ, inBoundsMaxZ));
 				UVec4::sSort4True(valid, ioProperties);
 				return valid.CountTrues();
 			}
 
-			void	VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+			JPH_INLINE void		VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
 			{
-				// Get vertices
+				// Decode vertices and flags
 				JPH_ASSERT(inNumTriangles <= MaxTrianglesPerLeaf);
 				Vec3 vertices[MaxTrianglesPerLeaf * 3];
-				ioContext.Unpack(inRootBoundsMin, inRootBoundsMax, inTriangles, inNumTriangles, vertices);
-
-				// Get flags
 				uint8 flags[MaxTrianglesPerLeaf];
-				TriangleCodec::DecodingContext::sGetFlags(inTriangles, inNumTriangles, flags);
+				ioContext.Unpack(inRootBoundsMin, inRootBoundsMax, inTriangles, inNumTriangles, vertices, flags);
 
 				// Loop through triangles
 				const uint8 *f = flags;
@@ -523,42 +586,31 @@ bool MeshShape::CastRay(const RayCast &inRay, const SubShapeIDCreator &inSubShap
 
 	struct Visitor
 	{
-		explicit	Visitor(RayCastResult &ioHit) : 
+		JPH_INLINE explicit	Visitor(RayCastResult &ioHit) : 
 			mHit(ioHit)
 		{
 		}
 
-		bool		ShouldAbort() const
+		JPH_INLINE bool		ShouldAbort() const
 		{
 			return mHit.mFraction <= 0.0f;
 		}
 
-		bool		ShouldVisitNode(int inStackTop) const
+		JPH_INLINE bool		ShouldVisitNode(int inStackTop) const
 		{
 			return mDistanceStack[inStackTop] < mHit.mFraction;
 		}
 
-		int			VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+		JPH_INLINE int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
 		{
 			// Test bounds of 4 children
 			Vec4 distance = RayAABox4(mRayOrigin, mRayInvDirection, inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
 	
 			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
-
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mHit.mFraction));
-			int num_results = closer.CountTrues();
-
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
-
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
+			return SortReverseAndStore(distance, mHit.mFraction, ioProperties, &mDistanceStack[inStackTop]);
 		}
 
-		void		VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+		JPH_INLINE void		VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
 		{
 			// Test against triangles
 			uint32 triangle_idx;
@@ -582,13 +634,11 @@ bool MeshShape::CastRay(const RayCast &inRay, const SubShapeIDCreator &inSubShap
 	};
 
 	Visitor visitor(ioHit);
-
 	visitor.mRayOrigin = inRay.mOrigin;
 	visitor.mRayDirection = inRay.mDirection;
 	visitor.mRayInvDirection.Set(inRay.mDirection);
 	visitor.mTriangleBlockIDBits = NodeCodec::DecodingContext::sTriangleBlockIDBits(mTree);
 	visitor.mSubShapeIDCreator = inSubShapeIDCreator;
-
 	WalkTree(visitor);
 
 	return visitor.mReturnValue;
@@ -600,89 +650,54 @@ void MeshShape::CastRay(const RayCast &inRay, const RayCastSettings &inRayCastSe
 
 	struct Visitor
 	{
-		explicit	Visitor(CastRayCollector &ioCollector) : 
+		JPH_INLINE explicit	Visitor(CastRayCollector &ioCollector) : 
 			mCollector(ioCollector)
 		{
 		}
 
-		bool		ShouldAbort() const
+		JPH_INLINE bool		ShouldAbort() const
 		{
 			return mCollector.ShouldEarlyOut();
 		}
 
-		bool		ShouldVisitNode(int inStackTop) const
+		JPH_INLINE bool		ShouldVisitNode(int inStackTop) const
 		{
 			return mDistanceStack[inStackTop] < mCollector.GetEarlyOutFraction();
 		}
 
-		int			VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+		JPH_INLINE int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
 		{
 			// Test bounds of 4 children
 			Vec4 distance = RayAABox4(mRayOrigin, mRayInvDirection, inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
 	
 			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
-
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mCollector.GetEarlyOutFraction()));
-			int num_results = closer.CountTrues();
-
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
-
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
+			return SortReverseAndStore(distance, mCollector.GetEarlyOutFraction(), ioProperties, &mDistanceStack[inStackTop]);
 		}
 
-		void		VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+		JPH_INLINE void		VisitTriangle(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, [[maybe_unused]] uint8 inActiveEdges, SubShapeID inSubShapeID2) 
 		{
-			// Create ID for triangle block
-			SubShapeIDCreator block_sub_shape_id = mSubShapeIDCreator.PushID(inTriangleBlockID, mTriangleBlockIDBits);
-
-			// Decode vertices
-			JPH_ASSERT(inNumTriangles <= MaxTrianglesPerLeaf);
-			Vec3 vertices[MaxTrianglesPerLeaf * 3];
-			ioContext.Unpack(inRootBoundsMin, inRootBoundsMax, inTriangles, inNumTriangles, vertices);
-
-			// Decode triangle flags
-			uint8 flags[MaxTrianglesPerLeaf];
-			TriangleCodec::DecodingContext::sGetFlags(inTriangles, inNumTriangles, flags);
+			// Back facing check
+			if (mBackFaceMode == EBackFaceMode::IgnoreBackFaces && (inV2 - inV0).Cross(inV1 - inV0).Dot(mRayDirection) < 0)
+				return;
 
-			// Loop over all triangles
-			for (int triangle_idx = 0; triangle_idx < inNumTriangles; ++triangle_idx)
+			// Check the triangle
+			float fraction = RayTriangle(mRayOrigin, mRayDirection, inV0, inV1, inV2);
+			if (fraction < mCollector.GetEarlyOutFraction())
 			{
-				// Determine vertices
-				const Vec3 *vertex = vertices + triangle_idx * 3;
-				Vec3 v0 = vertex[0];
-				Vec3 v1 = vertex[1];
-				Vec3 v2 = vertex[2];
-
-				// Back facing check
-				if (mBackFaceMode == EBackFaceMode::IgnoreBackFaces && (v2 - v0).Cross(v1 - v0).Dot(mRayDirection) < 0)
-					continue;
-
-				// Check the triangle
-				float fraction = RayTriangle(mRayOrigin, mRayDirection, v0, v1, v2);
-				if (fraction < mCollector.GetEarlyOutFraction())
-				{
-					RayCastResult hit;
-					hit.mBodyID = TransformedShape::sGetBodyID(mCollector.GetContext());
-					hit.mFraction = fraction;
-					hit.mSubShapeID2 = block_sub_shape_id.PushID(triangle_idx, NumTriangleBits).GetID();
-					mCollector.AddHit(hit);
-				}
+				RayCastResult hit;
+				hit.mBodyID = TransformedShape::sGetBodyID(mCollector.GetContext());
+				hit.mFraction = fraction;
+				hit.mSubShapeID2 = inSubShapeID2;
+				mCollector.AddHit(hit);
 			}
 		}
 
-		CastRayCollector &		mCollector;
-		Vec3					mRayOrigin;
-		Vec3					mRayDirection;
-		RayInvDirection			mRayInvDirection;
-		EBackFaceMode			mBackFaceMode;
-		uint					mTriangleBlockIDBits;
-		SubShapeIDCreator		mSubShapeIDCreator;
-		float					mDistanceStack[NodeCodec::StackSize];
+		CastRayCollector &	mCollector;
+		Vec3				mRayOrigin;
+		Vec3				mRayDirection;
+		RayInvDirection		mRayInvDirection;
+		EBackFaceMode		mBackFaceMode;
+		float				mDistanceStack[NodeCodec::StackSize];
 	};
 
 	Visitor visitor(ioCollector);
@@ -690,10 +705,7 @@ void MeshShape::CastRay(const RayCast &inRay, const RayCastSettings &inRayCastSe
 	visitor.mRayOrigin = inRay.mOrigin;
 	visitor.mRayDirection = inRay.mDirection;
 	visitor.mRayInvDirection.Set(inRay.mDirection);
-	visitor.mTriangleBlockIDBits = NodeCodec::DecodingContext::sTriangleBlockIDBits(mTree);
-	visitor.mSubShapeIDCreator = inSubShapeIDCreator;
-
-	WalkTree(visitor);
+	WalkTreePerTriangle(inSubShapeIDCreator, visitor);
 }
 
 void MeshShape::CollidePoint(Vec3Arg inPoint, const SubShapeIDCreator &inSubShapeIDCreator, CollidePointCollector &ioCollector) const
@@ -740,17 +752,17 @@ void MeshShape::sCastConvexVsMesh(const ShapeCast &inShapeCast, const ShapeCastS
 	{
 		using CastConvexVsTriangles::CastConvexVsTriangles;
 
-		bool		ShouldAbort() const
+		JPH_INLINE bool		ShouldAbort() const
 		{
 			return mCollector.ShouldEarlyOut();
 		}
 
-		bool		ShouldVisitNode(int inStackTop) const
+		JPH_INLINE bool		ShouldVisitNode(int inStackTop) const
 		{
 			return mDistanceStack[inStackTop] < mCollector.GetEarlyOutFraction();
 		}
 
-		int			VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+		JPH_INLINE int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
 		{
 			// Scale the bounding boxes of this node
 			Vec4 bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z;
@@ -763,57 +775,71 @@ void MeshShape::sCastConvexVsMesh(const ShapeCast &inShapeCast, const ShapeCastS
 			Vec4 distance = RayAABox4(mBoxCenter, mInvDirection, bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z);
 	
 			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
+			return SortReverseAndStore(distance, mCollector.GetEarlyOutFraction(), ioProperties, &mDistanceStack[inStackTop]);
+		}
 
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mCollector.GetEarlyOutFraction()));
-			int num_results = closer.CountTrues();
+		JPH_INLINE void		VisitTriangle(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, uint8 inActiveEdges, SubShapeID inSubShapeID2) 
+		{
+			Cast(inV0, inV1, inV2, inActiveEdges, inSubShapeID2);
+		}
 
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
+		RayInvDirection		mInvDirection;
+		Vec3				mBoxCenter;
+		Vec3				mBoxExtent;
+		float				mDistanceStack[NodeCodec::StackSize];
+	};
 
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
-		}
+	JPH_ASSERT(inShape->GetSubType() == EShapeSubType::Mesh);
+	const MeshShape *shape = static_cast<const MeshShape *>(inShape);
+
+	Visitor visitor(inShapeCast, inShapeCastSettings, inScale, inShapeFilter, inCenterOfMassTransform2, inSubShapeIDCreator1, ioCollector);
+	visitor.mInvDirection.Set(inShapeCast.mDirection);
+	visitor.mBoxCenter = inShapeCast.mShapeWorldBounds.GetCenter();
+	visitor.mBoxExtent = inShapeCast.mShapeWorldBounds.GetExtent();
+	shape->WalkTreePerTriangle(inSubShapeIDCreator2, visitor);
+}
+
+void MeshShape::sCastSphereVsMesh(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector)
+{
+	JPH_PROFILE_FUNCTION();
+
+	struct Visitor : public CastSphereVsTriangles
+	{
+		using CastSphereVsTriangles::CastSphereVsTriangles;
 
-		void		VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+		JPH_INLINE bool		ShouldAbort() const
 		{
-			// Create ID for triangle block
-			SubShapeIDCreator block_sub_shape_id = mSubShapeIDCreator2.PushID(inTriangleBlockID, mTriangleBlockIDBits);
+			return mCollector.ShouldEarlyOut();
+		}
 
-			// Decode vertices
-			JPH_ASSERT(inNumTriangles <= MaxTrianglesPerLeaf);
-			Vec3 vertices[MaxTrianglesPerLeaf * 3];
-			ioContext.Unpack(inRootBoundsMin, inRootBoundsMax, inTriangles, inNumTriangles, vertices);
+		JPH_INLINE bool		ShouldVisitNode(int inStackTop) const
+		{
+			return mDistanceStack[inStackTop] < mCollector.GetEarlyOutFraction();
+		}
 
-			// Decode triangle flags
-			uint8 flags[MaxTrianglesPerLeaf];
-			TriangleCodec::DecodingContext::sGetFlags(inTriangles, inNumTriangles, flags);
+		JPH_INLINE int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+		{
+			// Scale the bounding boxes of this node
+			Vec4 bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z;
+			AABox4Scale(mScale, inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ, bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z);
 
-			int triangle_idx = 0;
-			for (Vec3 *v = vertices, *v_end = vertices + inNumTriangles * 3; v < v_end; v += 3, triangle_idx++)
-			{
-				// Determine active edges
-				uint8 active_edges = (flags[triangle_idx] >> FLAGS_ACTIVE_EGDE_SHIFT) & FLAGS_ACTIVE_EDGE_MASK;
+			// Enlarge them by the radius of the sphere
+			AABox4EnlargeWithExtent(Vec3::sReplicate(mRadius), bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z);
 
-				// Create ID for triangle
-				SubShapeIDCreator triangle_sub_shape_id = block_sub_shape_id.PushID(triangle_idx, NumTriangleBits);
+			// Test bounds of 4 children
+			Vec4 distance = RayAABox4(mStart, mInvDirection, bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z);
+	
+			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
+			return SortReverseAndStore(distance, mCollector.GetEarlyOutFraction(), ioProperties, &mDistanceStack[inStackTop]);
+		}
 
-				Cast(v[0], v[1], v[2], active_edges, triangle_sub_shape_id.GetID());
-				
-				// Check if we should exit because we found our hit
-				if (mCollector.ShouldEarlyOut())
-					break;
-			}
+		JPH_INLINE void		VisitTriangle(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, uint8 inActiveEdges, SubShapeID inSubShapeID2) 
+		{
+			Cast(inV0, inV1, inV2, inActiveEdges, inSubShapeID2);
 		}
 
-		RayInvDirection				mInvDirection;
-		Vec3						mBoxCenter;
-		Vec3						mBoxExtent;
-		SubShapeIDCreator			mSubShapeIDCreator2;
-		uint						mTriangleBlockIDBits;
-		float						mDistanceStack[NodeCodec::StackSize];
+		RayInvDirection		mInvDirection;
+		float				mDistanceStack[NodeCodec::StackSize];
 	};
 
 	JPH_ASSERT(inShape->GetSubType() == EShapeSubType::Mesh);
@@ -821,16 +847,12 @@ void MeshShape::sCastConvexVsMesh(const ShapeCast &inShapeCast, const ShapeCastS
 
 	Visitor visitor(inShapeCast, inShapeCastSettings, inScale, inShapeFilter, inCenterOfMassTransform2, inSubShapeIDCreator1, ioCollector);
 	visitor.mInvDirection.Set(inShapeCast.mDirection);
-	visitor.mBoxCenter = inShapeCast.mShapeWorldBounds.GetCenter();
-	visitor.mBoxExtent = inShapeCast.mShapeWorldBounds.GetExtent();
-	visitor.mSubShapeIDCreator2 = inSubShapeIDCreator2;
-	visitor.mTriangleBlockIDBits = NodeCodec::DecodingContext::sTriangleBlockIDBits(shape->mTree);
-	shape->WalkTree(visitor);
+	shape->WalkTreePerTriangle(inSubShapeIDCreator2, visitor);
 }
 
 struct MeshShape::MSGetTrianglesContext
 {
-			MSGetTrianglesContext(const MeshShape *inShape, const AABox &inBox, Vec3Arg inPositionCOM, QuatArg inRotation, Vec3Arg inScale) : 
+	JPH_INLINE 		MSGetTrianglesContext(const MeshShape *inShape, const AABox &inBox, Vec3Arg inPositionCOM, QuatArg inRotation, Vec3Arg inScale) : 
 		mDecodeCtx(sGetNodeHeader(inShape->mTree)),
 		mShape(inShape),
 		mLocalBox(Mat44::sInverseRotationTranslation(inRotation, inPositionCOM), inBox),
@@ -840,17 +862,17 @@ struct MeshShape::MSGetTrianglesContext
 	{
 	}
 
-	bool	ShouldAbort() const
+	JPH_INLINE bool	ShouldAbort() const
 	{
 		return mShouldAbort;
 	}
 
-	bool	ShouldVisitNode(int inStackTop) const
+	JPH_INLINE bool	ShouldVisitNode([[maybe_unused]] int inStackTop) const
 	{
 		return true;
 	}
 
-	int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+	JPH_INLINE int	VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, [[maybe_unused]] int inStackTop) const
 	{
 		// Scale the bounding boxes of this node
 		Vec4 bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z;
@@ -866,7 +888,7 @@ struct MeshShape::MSGetTrianglesContext
 		return collides.CountTrues();
 	}
 
-	void	VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+	JPH_INLINE void	VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, [[maybe_unused]] uint32 inTriangleBlockID) 
 	{
 		// When the buffer is full and we cannot process the triangles, abort the tree walk. The next time GetTrianglesNext is called we will continue here.
 		if (mNumTrianglesFound + inNumTriangles > mMaxTrianglesRequested)
@@ -982,17 +1004,17 @@ void MeshShape::sCollideConvexVsMesh(const Shape *inShape1, const Shape *inShape
 	{
 		using CollideConvexVsTriangles::CollideConvexVsTriangles;
 
-		bool	ShouldAbort() const
+		JPH_INLINE bool	ShouldAbort() const
 		{
 			return mCollector.ShouldEarlyOut();
 		}
 
-		bool	ShouldVisitNode(int inStackTop) const
+		JPH_INLINE bool	ShouldVisitNode([[maybe_unused]] int inStackTop) const
 		{
 			return true;
 		}
 
-		int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+		JPH_INLINE int	VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, [[maybe_unused]] int inStackTop) 
 		{
 			// Scale the bounding boxes of this node
 			Vec4 bounds_min_x, bounds_min_y, bounds_min_z, bounds_max_x, bounds_max_y, bounds_max_z;
@@ -1008,49 +1030,14 @@ void MeshShape::sCollideConvexVsMesh(const Shape *inShape1, const Shape *inShape
 			return collides.CountTrues();
 		}
 
-		void	VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+		JPH_INLINE void	VisitTriangle(Vec3Arg inV0, Vec3Arg inV1, Vec3Arg inV2, uint8 inActiveEdges, SubShapeID inSubShapeID2) 
 		{
-			// Create ID for triangle block
-			SubShapeIDCreator block_sub_shape_id = mSubShapeIDCreator2.PushID(inTriangleBlockID, mTriangleBlockIDBits);
-
-			// Decode vertices
-			JPH_ASSERT(inNumTriangles <= MaxTrianglesPerLeaf);
-			Vec3 vertices[MaxTrianglesPerLeaf * 3];
-			ioContext.Unpack(inRootBoundsMin, inRootBoundsMax, inTriangles, inNumTriangles, vertices);
-
-			// Decode triangle flags
-			uint8 flags[MaxTrianglesPerLeaf];
-			TriangleCodec::DecodingContext::sGetFlags(inTriangles, inNumTriangles, flags);
-
-			// Loop over all triangles
-			for (int triangle_idx = 0; triangle_idx < inNumTriangles; ++triangle_idx)
-			{
-				// Create ID for triangle
-				SubShapeID triangle_sub_shape_id = block_sub_shape_id.PushID(triangle_idx, NumTriangleBits).GetID();
-
-				// Determine active edges
-				uint8 active_edges = (flags[triangle_idx] >> FLAGS_ACTIVE_EGDE_SHIFT) & FLAGS_ACTIVE_EDGE_MASK;
-
-				// Determine vertices
-				const Vec3 *vertex = vertices + triangle_idx * 3;
-
-				Collide(vertex[0], vertex[1], vertex[2], active_edges, triangle_sub_shape_id);
-
-				// Check if we should exit because we found our hit
-				if (mCollector.ShouldEarlyOut())
-					break;
-			}
+			Collide(inV0, inV1, inV2, inActiveEdges, inSubShapeID2);
 		}
-
-		uint							mTriangleBlockIDBits;
-		SubShapeIDCreator				mSubShapeIDCreator2;
 	};
 
 	Visitor visitor(shape1, inScale1, inScale2, inCenterOfMassTransform1, inCenterOfMassTransform2, inSubShapeIDCreator1.GetID(), inCollideShapeSettings, ioCollector);
-	visitor.mTriangleBlockIDBits = NodeCodec::DecodingContext::sTriangleBlockIDBits(shape2->mTree);
-	visitor.mSubShapeIDCreator2 = inSubShapeIDCreator2;
-
-	shape2->WalkTree(visitor);
+	shape2->WalkTreePerTriangle(inSubShapeIDCreator2, visitor);
 }
 
 void MeshShape::SaveBinaryState(StreamOut &inStream) const
@@ -1082,17 +1069,17 @@ Shape::Stats MeshShape::GetStats() const
 	// Walk the tree to count the triangles
 	struct Visitor
 	{
-		bool		ShouldAbort() const
+		JPH_INLINE bool		ShouldAbort() const
 		{
 			return false;
 		}
 
-		bool		ShouldVisitNode(int inStackTop) const
+		JPH_INLINE bool		ShouldVisitNode([[maybe_unused]] int inStackTop) const
 		{
 			return true;
 		}
 
-		int			VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, int inStackTop) 
+		JPH_INLINE int		VisitNodes(Vec4Arg inBoundsMinX, Vec4Arg inBoundsMinY, Vec4Arg inBoundsMinZ, Vec4Arg inBoundsMaxX, Vec4Arg inBoundsMaxY, Vec4Arg inBoundsMaxZ, UVec4 &ioProperties, [[maybe_unused]] int inStackTop) const
 		{
 			// Visit all valid children
 			UVec4 valid = UVec4::sOr(UVec4::sOr(Vec4::sLess(inBoundsMinX, inBoundsMaxX), Vec4::sLess(inBoundsMinY, inBoundsMaxY)), Vec4::sLess(inBoundsMinZ, inBoundsMaxZ));
@@ -1100,13 +1087,14 @@ Shape::Stats MeshShape::GetStats() const
 			return valid.CountTrues();
 		}
 
-		void		VisitTriangles(const TriangleCodec::DecodingContext &ioContext, Vec3Arg inRootBoundsMin, Vec3Arg inRootBoundsMax, const void *inTriangles, int inNumTriangles, uint32 inTriangleBlockID) 
+		JPH_INLINE void		VisitTriangles([[maybe_unused]] const TriangleCodec::DecodingContext &ioContext, [[maybe_unused]] Vec3Arg inRootBoundsMin, [[maybe_unused]] Vec3Arg inRootBoundsMax, [[maybe_unused]] const void *inTriangles, int inNumTriangles, [[maybe_unused]] uint32 inTriangleBlockID) 
 		{
 			mNumTriangles += inNumTriangles;
 		}
 
-		uint		mNumTriangles = 0;
+		uint				mNumTriangles = 0;
 	};
+
 	Visitor visitor;
 	WalkTree(visitor);
 	
@@ -1124,6 +1112,9 @@ void MeshShape::sRegister()
 		CollisionDispatch::sRegisterCollideShape(s, EShapeSubType::Mesh, sCollideConvexVsMesh);
 		CollisionDispatch::sRegisterCastShape(s, EShapeSubType::Mesh, sCastConvexVsMesh);
 	}
+
+	// Specialized collision functions
+	CollisionDispatch::sRegisterCastShape(EShapeSubType::Sphere, EShapeSubType::Mesh, sCastSphereVsMesh);
 }
 
 } // JPH

+ 5 - 0
Jolt/Physics/Collision/Shape/MeshShape.h

@@ -131,12 +131,17 @@ private:
 	template <class Visitor>
 	void							WalkTree(Visitor &ioVisitor) const;
 
+	/// Same as above but with a callback per triangle instead of per block of triangles
+	template <class Visitor>
+	void							WalkTreePerTriangle(const SubShapeIDCreator &inSubShapeIDCreator2, Visitor &ioVisitor) const;
+
 	/// Decode a sub shape ID
 	inline void						DecodeSubShapeID(const SubShapeID &inSubShapeID, const void *&outTriangleBlock, uint32 &outTriangleIndex) const;
 
 	// Helper functions called by CollisionDispatch
 	static void						sCollideConvexVsMesh(const Shape *inShape1, const Shape *inShape2, Vec3Arg inScale1, Vec3Arg inScale2, Mat44Arg inCenterOfMassTransform1, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, const CollideShapeSettings &inCollideShapeSettings, CollideShapeCollector &ioCollector);
 	static void						sCastConvexVsMesh(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector);
+	static void						sCastSphereVsMesh(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector);
 
 	/// Materials assigned to the triangles. Each triangle specifies which material it uses through its mMaterialIndex
 	PhysicsMaterialList				mMaterials;

+ 4 - 36
Jolt/Physics/Collision/Shape/StaticCompoundShape.cpp

@@ -6,6 +6,7 @@
 #include <Physics/Collision/Shape/StaticCompoundShape.h>
 #include <Physics/Collision/Shape/RotatedTranslatedShape.h>
 #include <Physics/Collision/Shape/CompoundShapeVisitors.h>
+#include <Physics/Collision/SortReverseAndStore.h>
 #include <Core/Profiler.h>
 #include <Core/StreamIn.h>
 #include <Core/StreamOut.h>
@@ -424,18 +425,7 @@ bool StaticCompoundShape::CastRay(const RayCast &inRay, const SubShapeIDCreator
 			Vec4 distance = TestBounds(inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
 	
 			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
-
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mHit.mFraction));
-			int num_results = closer.CountTrues();
-
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
-
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
+			return SortReverseAndStore(distance, mHit.mFraction, ioProperties, &mDistanceStack[inStackTop]);
 		}
 
 		float				mDistanceStack[cStackSize];
@@ -465,18 +455,7 @@ void StaticCompoundShape::CastRay(const RayCast &inRay, const RayCastSettings &i
 			Vec4 distance = TestBounds(inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
 	
 			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
-
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mCollector.GetEarlyOutFraction()));
-			int num_results = closer.CountTrues();
-
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
-
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
+			return SortReverseAndStore(distance, mCollector.GetEarlyOutFraction(), ioProperties, &mDistanceStack[inStackTop]);
 		}
 
 		float				mDistanceStack[cStackSize];
@@ -534,18 +513,7 @@ void StaticCompoundShape::sCastShapeVsCompound(const ShapeCast &inShapeCast, con
 			Vec4 distance = TestBounds(inBoundsMinX, inBoundsMinY, inBoundsMinZ, inBoundsMaxX, inBoundsMaxY, inBoundsMaxZ);
 	
 			// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
-			Vec4::sSort4Reverse(distance, ioProperties);
-
-			// Count how many results are closer
-			UVec4 closer = Vec4::sLess(distance, Vec4::sReplicate(mCollector.GetEarlyOutFraction()));
-			int num_results = closer.CountTrues();
-
-			// Shift the results so that only the closer ones remain
-			distance = distance.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
-			ioProperties = ioProperties.ShiftComponents4Minus(num_results);
-
-			distance.StoreFloat4((Float4 *)&mDistanceStack[inStackTop]);
-			return num_results;
+			return SortReverseAndStore(distance, mCollector.GetEarlyOutFraction(), ioProperties, &mDistanceStack[inStackTop]);
 		}
 
 		float				mDistanceStack[cStackSize];

+ 13 - 0
Jolt/Physics/Collision/Shape/TriangleShape.cpp

@@ -12,6 +12,7 @@
 #include <Physics/Collision/CollidePointResult.h>
 #include <Physics/Collision/TransformedShape.h>
 #include <Physics/Collision/CastConvexVsTriangles.h>
+#include <Physics/Collision/CastSphereVsTriangles.h>
 #include <Physics/Collision/CollisionDispatch.h>
 #include <Geometry/ConvexSupport.h>
 #include <Geometry/RayTriangle.h>
@@ -243,6 +244,15 @@ void TriangleShape::sCastConvexVsTriangle(const ShapeCast &inShapeCast, const Sh
 	caster.Cast(shape->mV1, shape->mV2, shape->mV3, 0b111, inSubShapeIDCreator2.GetID());
 }
 
+void TriangleShape::sCastSphereVsTriangle(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector)
+{
+	JPH_ASSERT(inShape->GetSubType() == EShapeSubType::Triangle);
+	const TriangleShape *shape = static_cast<const TriangleShape *>(inShape);
+
+	CastSphereVsTriangles caster(inShapeCast, inShapeCastSettings, inScale, inShapeFilter, inCenterOfMassTransform2, inSubShapeIDCreator1, ioCollector);
+	caster.Cast(shape->mV1, shape->mV2, shape->mV3, 0b111, inSubShapeIDCreator2.GetID());
+}
+
 void TriangleShape::TransformShape(Mat44Arg inCenterOfMassTransform, TransformedShapeCollector &ioCollector) const
 {
 	Vec3 scale;
@@ -331,6 +341,9 @@ void TriangleShape::sRegister()
 
 	for (EShapeSubType s : sConvexSubShapeTypes)
 		CollisionDispatch::sRegisterCastShape(s, EShapeSubType::Triangle, sCastConvexVsTriangle);
+
+	// Specialized collision functions
+	CollisionDispatch::sRegisterCastShape(EShapeSubType::Sphere, EShapeSubType::Triangle, sCastSphereVsTriangle);
 }
 
 } // JPH

+ 1 - 0
Jolt/Physics/Collision/Shape/TriangleShape.h

@@ -111,6 +111,7 @@ protected:
 private:
 	// Helper functions called by CollisionDispatch
 	static void				sCastConvexVsTriangle(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector);
+	static void				sCastSphereVsTriangle(const ShapeCast &inShapeCast, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector);
 
 	// Context for GetTrianglesStart/Next
 	class					TSGetTrianglesContext;

+ 33 - 0
Jolt/Physics/Collision/SortReverseAndStore.h

@@ -0,0 +1,33 @@
+// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+namespace JPH {
+
+/// This function will sort values from high to low and only keep the ones that are less than inMaxValue
+/// @param inValues Values to be sorted
+/// @param inMaxValue Values need to be less than this to keep them
+/// @param ioIdentifiers 4 identifiers that will be sorted in the same way as the values
+/// @param outValues The values are stored here from high to low
+/// @return The number of values that were kept
+JPH_INLINE int SortReverseAndStore(Vec4Arg inValues, float inMaxValue, UVec4 &ioIdentifiers, float *outValues)
+{	
+	// Sort so that highest values are first (we want to first process closer hits and we process stack top to bottom)
+	Vec4::sSort4Reverse(inValues, ioIdentifiers);
+
+	// Count how many results are less than the max value
+	UVec4 closer = Vec4::sLess(inValues, Vec4::sReplicate(inMaxValue));
+	int num_results = closer.CountTrues();
+
+	// Shift the values so that only the ones that are less than max are kept
+	inValues = inValues.ReinterpretAsInt().ShiftComponents4Minus(num_results).ReinterpretAsFloat();
+	ioIdentifiers = ioIdentifiers.ShiftComponents4Minus(num_results);
+
+	// Store the values
+	inValues.StoreFloat4((Float4 *)outValues);
+
+	return num_results;
+}
+
+} // JPH

+ 1 - 1
Jolt/Physics/PhysicsSettings.h

@@ -18,7 +18,7 @@ constexpr float cDefaultConvexRadius = 0.05f;
 static constexpr float cCapsuleProjectionSlop = 0.02f;
 
 /// Maximum amount of jobs to allow
-constexpr int cMaxPhysicsJobs = 1024;
+constexpr int cMaxPhysicsJobs = 2048;
 
 /// Maximum amount of barriers to allow
 constexpr int cMaxPhysicsBarriers = 8;

+ 0 - 2
Samples/SamplesApp.cpp

@@ -1181,8 +1181,6 @@ bool SamplesApp::CastProbe(float inProbeLength, float &outFraction, Vec3 &outPos
 						Vec3 contact_position1 = hit.mContactPointOn1;
 						Vec3 contact_position2 = hit.mContactPointOn2;
 						Vec3 normal = hit.mPenetrationAxis.Normalized();
-						if (hit.mIsBackFaceHit)
-							normal = -normal;
 						mDebugRenderer->DrawArrow(contact_position2, contact_position2 - normal, color, 0.01f); // Flip to make it point towards the cast body
 
 						// Contact position 1

+ 1 - 1
Samples/SamplesApp.h

@@ -176,7 +176,7 @@ private:
 	ECollectFacesMode		mCollectFacesMode = ECollectFacesMode::NoFaces;				// If we should collect colliding faces
 	float					mMaxSeparationDistance = 0.0f;								// Max separation distance for collide shape test
 	bool					mTreatConvexAsSolid = true;									// For ray casts if the shape should be treated as solid or if the ray should only collide with the surface
-	bool					mReturnDeepestPoint = false;								// For shape casts, when true this will return the deepest point
+	bool					mReturnDeepestPoint = true;									// For shape casts, when true this will return the deepest point
 	bool					mUseShrunkenShapeAndConvexRadius = false;					// Shrink then expand the shape by the convex radius
 	int						mMaxHits = 10;												// The maximum number of hits to request for a collision probe.
 

+ 28 - 0
UnitTests/Physics/CastShapeTests.cpp

@@ -17,6 +17,24 @@
 
 TEST_SUITE("CastShapeTests")
 {
+	/// Helper function that tests a sphere against a triangle
+	static void sTestCastSphereVertexOrEdge(const Shape *inSphere, Vec3Arg inPosition, Vec3Arg inDirection, const Shape *inTriangle)
+	{
+		ShapeCast shape_cast(inSphere, Vec3::sReplicate(1.0f), Mat44::sTranslation(inPosition - inDirection), inDirection);
+		ShapeCastSettings cast_settings;
+		cast_settings.mBackFaceModeTriangles = EBackFaceMode::CollideWithBackFaces;
+		cast_settings.mBackFaceModeConvex = EBackFaceMode::CollideWithBackFaces;
+		AllHitCollisionCollector<CastShapeCollector> collector;
+		CollisionDispatch::sCastShapeVsShape(shape_cast, cast_settings, inTriangle, Vec3::sReplicate(1.0f), ShapeFilter(), Mat44::sIdentity(), SubShapeIDCreator(), SubShapeIDCreator(), collector);
+		CHECK(collector.mHits.size() == 1);
+		const ShapeCastResult &result = collector.mHits.back();
+		CHECK_APPROX_EQUAL(result.mFraction, 1.0f - 0.2f / inDirection.Length(), 1.0e-4f);
+		CHECK_APPROX_EQUAL(result.mPenetrationAxis.Normalized(), inDirection.Normalized(), 1.0e-3f);
+		CHECK_APPROX_EQUAL(result.mPenetrationDepth, 0.0f, 1.0e-3f);
+		CHECK_APPROX_EQUAL(result.mContactPointOn1, inPosition, 1.0e-3f);
+		CHECK_APPROX_EQUAL(result.mContactPointOn2, inPosition, 1.0e-3f);
+	}
+
 	/// Helper function that tests a shere against a triangle centered on the origin with normal Z
 	static void sTestCastSphereTriangle(const Shape *inTriangle)
 	{
@@ -91,6 +109,16 @@ TEST_SUITE("CastShapeTests")
 			CHECK_APPROX_EQUAL(result.mContactPointOn2, Vec3::sZero(), 1.0e-3f);
 			CHECK(result.mIsBackFaceHit);
 		}
+
+		// Hit vertex 1, 2 and 3
+		sTestCastSphereVertexOrEdge(sphere, Vec3(50, 25, 0), Vec3(-10, -10, 0), inTriangle);
+		sTestCastSphereVertexOrEdge(sphere, Vec3(-50, 25, 0), Vec3(10, -10, 0), inTriangle);
+		sTestCastSphereVertexOrEdge(sphere, Vec3(0, -25, 0), Vec3(0, 10, 0), inTriangle);
+
+		// Hit edge 1, 2 and 3
+		sTestCastSphereVertexOrEdge(sphere, Vec3(0, 25, 0), Vec3(0, -10, 0), inTriangle); // Edge: Vec3(50, 25, 0), Vec3(-50, 25, 0)
+		sTestCastSphereVertexOrEdge(sphere, Vec3(-25, 0, 0), Vec3(10, 10, 0), inTriangle); // Edge: Vec3(-50, 25, 0), Vec3(0,-25, 0)
+		sTestCastSphereVertexOrEdge(sphere, Vec3(25, 0, 0), Vec3(-10, 10, 0), inTriangle); // Edge: Float3(0,-25, 0), Float3(50, 25, 0)
 	}
 
 	TEST_CASE("TestCastSphereTriangle")